include/util/compress/bzip2.hpp

Go to the documentation of this file.
00001 #ifndef UTIL_COMPRESS__BZIP2__HPP
00002 #define UTIL_COMPRESS__BZIP2__HPP
00003 
00004 /*  $Id: bzip2.hpp 169067 2009-08-25 13:41:52Z ivanov $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Author:  Vladimir Ivanov
00030  *
00031  * File Description:  BZip2 Compression API
00032  *
00033  * NOTE: The bzip2 documentation can be found here: 
00034  *       http://sources.redhat.com/bzip2/
00035  */
00036 
00037 #include <util/compress/stream.hpp>
00038 #include <stdio.h>
00039 
00040 /** @addtogroup Compression
00041  *
00042  * @{
00043  */
00044 
00045 BEGIN_NCBI_SCOPE
00046 
00047 
00048 //////////////////////////////////////////////////////////////////////////////
00049 //
00050 // Special compression parameters (description from bzip2 docs)
00051 //        
00052 // <verbosity>
00053 //    This parameter should be set to a number between 0 and 4 inclusive.
00054 //    0 is silent, and greater numbers give increasingly verbose
00055 //    monitoring/debugging output. If the library has been compiled with
00056 //    -DBZ_NO_STDIO, no such output will appear for any verbosity setting. 
00057 //
00058 // <work_factor> 
00059 //    Parameter work_factor controls how the compression phase behaves when
00060 //    presented with worst case, highly repetitive, input data.
00061 //    If compression runs into difficulties caused by repetitive data, the
00062 //    library switches from the standard sorting algorithm to a fallback
00063 //    algorithm. The fallback is slower than the standard algorithm by
00064 //    perhaps a factor of three, but always behaves reasonably, no matter
00065 //    how bad the input. Lower values of work_factor reduce the amount of
00066 //    effort the standard algorithm will expend before resorting to the
00067 //    fallback. You should set this parameter carefully; too low, and many
00068 //    inputs will be handled by the fallback algorithm and so compress
00069 //    rather slowly, too high, and your average-to-worst case compression
00070 //    times can become very large. The default value of 30 gives reasonable
00071 //    behaviour over a wide range of circumstances. Allowable values range
00072 //    from 0 to 250 inclusive. 0 is a special case, equivalent to using
00073 //    the default value of 30.
00074 //
00075 // <small_decompress> 
00076 //    If it is nonzero, the library will use an alternative decompression
00077 //    algorithm which uses less memory but at the cost of decompressing more
00078 //    slowly (roughly speaking, half the speed, but the maximum memory
00079 //    requirement drops to around 2300k).
00080 //
00081 
00082 
00083 //////////////////////////////////////////////////////////////////////////////
00084 ///
00085 /// CBZip2Compression --
00086 ///
00087 /// Define a base methods for compression/decompression memory buffers
00088 /// and files.
00089 
00090 class  CBZip2Compression : public CCompression 
00091 {
00092 public:
00093     /// Compression/decompression flags.
00094     enum EFlags {
00095         ///< Allow transparent reading data from buffer/file/stream
00096         ///< regardless is it compressed or not. But be aware,
00097         ///< if data source contains broken data and API cannot detect that
00098         ///< it is compressed data, that you can get binary instead of
00099         ///< decompressed data. By default this flag is OFF.
00100         fAllowTransparentRead = (1<<0)
00101     };
00102     typedef CBZip2Compression::TFlags TBZip2Flags; ///< Bitwise OR of EFlags
00103 
00104     /// Constructor.
00105     CBZip2Compression(
00106         ELevel level            = eLevel_Default,
00107         int    verbosity        = 0,              // [0..4]
00108         int    work_factor      = 0,              // [0..250] 
00109         int    small_decompress = 0               // [0,1]
00110     );
00111 
00112     /// Destructor.
00113     virtual ~CBZip2Compression(void);
00114 
00115     /// Return name and version of the compression library.
00116     virtual CVersionInfo GetVersion(void) const;
00117 
00118     /// Get compression level.
00119     ///
00120     /// NOTE: BZip2 algorithm do not support zero level compression.
00121     ///       So the "eLevel_NoCompression" will be translated to
00122     ///       "eLevel_Lowest".
00123     virtual ELevel GetLevel(void) const;
00124 
00125     /// Return default compression level for a compression algorithm
00126     virtual ELevel GetDefaultLevel(void) const
00127         { return eLevel_VeryHigh; };
00128 
00129     //
00130     // Utility functions 
00131     //
00132 
00133     /// Compress data in the buffer.
00134     ///
00135     /// Altogether, the total size of the destination buffer must be little
00136     /// more then size of the source buffer.
00137     /// @param src_buf
00138     ///   [in] Source buffer.
00139     /// @param src_len
00140     ///   [in] Size of data in source  buffer.
00141     /// @param dst_buf
00142     ///   [in] Destination buffer.
00143     /// @param dst_size
00144     ///   [in] Size of destination buffer.
00145     /// @param dst_len
00146     ///   [out] Size of compressed data in destination buffer.
00147     /// @return
00148     ///   Return TRUE if operation was succesfully or FALSE otherwise.
00149     ///   On success, 'dst_buf' contains compressed data of dst_len size.
00150     /// @sa
00151     ///   DecompressBuffer
00152     virtual bool CompressBuffer(
00153         const void* src_buf, size_t  src_len,
00154         void*       dst_buf, size_t  dst_size,
00155         /* out */            size_t* dst_len
00156     );
00157 
00158     /// Decompress data in the buffer.
00159     ///
00160     /// @param src_buf
00161     ///   Source buffer.
00162     /// @param src_len
00163     ///   Size of data in source buffer.
00164     /// @param dst_buf
00165     ///   Destination buffer.
00166     /// @param dst_len
00167     ///   Size of destination buffer.
00168     /// @param dst_len
00169     ///   Size of decompressed data in destination buffer.
00170     /// @return
00171     ///   Return TRUE if operation was succesfully or FALSE otherwise.
00172     ///   On success, 'dst_buf' contains decompressed data of dst_len size.
00173     /// @sa
00174     ///   CompressBuffer
00175     virtual bool DecompressBuffer(
00176         const void* src_buf, size_t  src_len,
00177         void*       dst_buf, size_t  dst_size,
00178         /* out */            size_t* dst_len
00179     );
00180 
00181     /// Compress file.
00182     ///
00183     /// @param src_file
00184     ///   File name of source file.
00185     /// @param dst_file
00186     ///   File name of result file.
00187     /// @param buf_size
00188     ///   Buffer size used to read/write files.
00189     /// @return
00190     ///   Return TRUE on success, FALSE on error.
00191     /// @sa
00192     ///   DecompressFile
00193     virtual bool CompressFile(
00194         const string& src_file,
00195         const string& dst_file,
00196         size_t        buf_size = kCompressionDefaultBufSize
00197     );
00198 
00199     /// Decompress file.
00200     ///
00201     /// @param src_file
00202     ///   File name of source file.
00203     /// @param dst_file
00204     ///   File name of result file.
00205     /// @param buf_size
00206     ///   Buffer size used to read/write files.
00207     /// @return
00208     ///   Return TRUE on success, FALSE on error.
00209     /// @sa
00210     ///   CompressFile
00211     virtual bool DecompressFile(
00212         const string& src_file,
00213         const string& dst_file, 
00214         size_t        buf_size = kCompressionDefaultBufSize
00215     );
00216 
00217 protected:
00218     /// Get error description for specified error code.
00219     const char* GetBZip2ErrorDescription(int errcode);
00220 
00221     /// Format string with last error description.
00222     string FormatErrorMessage(string where, bool use_stream_data = true) const;
00223 
00224 protected:
00225     void*  m_Stream;          ///< Compressor stream
00226     int    m_Verbosity;       ///< Verbose monitoring/debugging output level
00227     int    m_WorkFactor;      ///< See description above
00228     int    m_SmallDecompress; ///< Use memory-frugal decompression algorithm
00229 };
00230 
00231 
00232 
00233 //////////////////////////////////////////////////////////////////////////////
00234 ///
00235 /// CBZip2CompressionFile class --
00236 ///
00237 /// Throw exceptions on critical errors.
00238 
00239 class  CBZip2CompressionFile : public CBZip2Compression,
00240                                                 public CCompressionFile
00241 {
00242 public:
00243     /// Constructor.
00244     /// For a special parameters description see CBZip2Compression.
00245     CBZip2CompressionFile(
00246         const string& file_name,
00247         EMode         mode,
00248         ELevel        level            = eLevel_Default,
00249         int           verbosity        = 0,
00250         int           work_factor      = 0,
00251         int           small_decompress = 0 
00252     );
00253 
00254     /// Conventional constructor.
00255     /// For a special parameters description see CBZip2Compression.
00256     CBZip2CompressionFile(
00257         ELevel        level            = eLevel_Default,
00258         int           verbosity        = 0,
00259         int           work_factor      = 0,
00260         int           small_decompress = 0 
00261     );
00262 
00263     /// Destructor.
00264     ~CBZip2CompressionFile(void);
00265 
00266     /// Opens a compressed file for reading or writing.
00267     ///
00268     /// @param file_name
00269     ///   File name of the file to open.
00270     /// @param mode
00271     ///   File open mode.
00272     /// @return
00273     ///   TRUE if file was opened succesfully or FALSE otherwise.
00274     /// @sa
00275     ///   CBZip2Compression, Read, Write, Close
00276     virtual bool Open(const string& file_name, EMode mode);
00277 
00278     /// Read data from compressed file.
00279     /// 
00280     /// Read up to "len" uncompressed bytes from the compressed file "file"
00281     /// into the buffer "buf". 
00282     /// @param buf
00283     ///    Buffer for requested data.
00284     /// @param len
00285     ///    Number of bytes to read.
00286     /// @return
00287     ///   Number of bytes actually read (0 for end of file, -1 for error).
00288     ///   The number of really readed bytes can be less than requested.
00289     /// @sa
00290     ///   Open, Write, Close
00291     virtual long Read(void* buf, size_t len);
00292 
00293     /// Write data to compressed file.
00294     /// 
00295     /// Writes the given number of uncompressed bytes from the buffer
00296     /// into the compressed file.
00297     /// @param buf
00298     ///    Buffer with written data.
00299     /// @param len
00300     ///    Number of bytes to write.
00301     /// @return
00302     ///   Number of bytes actually written or -1 for error.
00303     /// @sa
00304     ///   Open, Read, Close
00305     virtual long Write(const void* buf, size_t len);
00306 
00307     /// Close compressed file.
00308     ///
00309     /// Flushes all pending output if necessary, closes the compressed file.
00310     /// @return
00311     ///   TRUE on success, FALSE on error.
00312     /// @sa
00313     ///   Open, Read, Write
00314     virtual bool Close(void);
00315 
00316 protected:
00317     FILE*      m_FileStream;   ///< Underlying file stream
00318     bool       m_EOF;          ///< EOF flag for read mode
00319 };
00320 
00321 
00322 
00323 /////////////////////////////////////////////////////////////////////////////
00324 ///
00325 /// CBZip2Compressor -- bzip2 based compressor
00326 ///
00327 /// Used in CBZip2StreamCompressor.
00328 /// @sa CBZip2StreamCompressor, CBZip2Compression, CCompressionProcessor
00329 
00330 class  CBZip2Compressor : public CBZip2Compression,
00331                                            public CCompressionProcessor
00332 {
00333 public:
00334     /// Constructor.
00335     CBZip2Compressor(
00336         ELevel      level       = eLevel_Default,
00337         int         verbosity   = 0,           // [0..4]
00338         int         work_factor = 0,           // [0..250] 
00339         TBZip2Flags flags       = 0
00340     );
00341 
00342     /// Destructor.
00343     virtual ~CBZip2Compressor(void);
00344 
00345 protected:
00346     virtual EStatus Init   (void);
00347     virtual EStatus Process(const char* in_buf,  size_t  in_len,
00348                             char*       out_buf, size_t  out_size,
00349                             /* out */            size_t* in_avail,
00350                             /* out */            size_t* out_avail);
00351     virtual EStatus Flush  (char*       out_buf, size_t  out_size,
00352                             /* out */            size_t* out_avail);
00353     virtual EStatus Finish (char*       out_buf, size_t  out_size,
00354                             /* out */            size_t* out_avail);
00355     virtual EStatus End    (void);
00356 };
00357 
00358 
00359 /////////////////////////////////////////////////////////////////////////////
00360 ///
00361 /// CBZip2Decompressor -- bzip2 based decompressor
00362 ///
00363 /// Used in CBZip2StreamCompressor.
00364 /// @sa CBZip2StreamCompressor, CBZip2Compression, CCompressionProcessor
00365 
00366 class  CBZip2Decompressor : public CBZip2Compression,
00367                                              public CCompressionProcessor
00368 {
00369 public:
00370     /// Constructor.
00371     CBZip2Decompressor(
00372         int         verbosity        = 0,  // [0..4]
00373         int         small_decompress = 0,  // [0,1]
00374         TBZip2Flags flags            = 0
00375     );
00376 
00377     /// Destructor.
00378     virtual ~CBZip2Decompressor(void);
00379 
00380 protected:
00381     virtual EStatus Init   (void); 
00382     virtual EStatus Process(const char* in_buf,  size_t  in_len,
00383                             char*       out_buf, size_t  out_size,
00384                             /* out */            size_t* in_avail,
00385                             /* out */            size_t* out_avail);
00386     virtual EStatus Flush  (char*       out_buf, size_t  out_size,
00387                             /* out */            size_t* out_avail);
00388     virtual EStatus Finish (char*       out_buf, size_t  out_size,
00389                             /* out */            size_t* out_avail);
00390     virtual EStatus End    (void);
00391 };
00392 
00393 
00394 
00395 //////////////////////////////////////////////////////////////////////////////
00396 ///
00397 /// CBZip2StreamCompressor -- bzip2 based compression stream processor
00398 ///
00399 /// See util/compress/stream.hpp for details of stream processing.
00400 /// @sa CCompressionStreamProcessor
00401 
00402 class  CBZip2StreamCompressor
00403     : public CCompressionStreamProcessor
00404 {
00405 public:
00406     /// Constructor.
00407     CBZip2StreamCompressor(
00408         CBZip2Compression::ELevel level       = CCompression::eLevel_Default,
00409         streamsize                in_bufsize  = kCompressionDefaultBufSize,
00410         streamsize                out_bufsize = kCompressionDefaultBufSize,
00411         int                       verbosity   = 0,
00412         int                       work_factor = 0,
00413         CBZip2Compression::TBZip2Flags flags  = 0
00414         )
00415 
00416         : CCompressionStreamProcessor(
00417               new CBZip2Compressor(level, verbosity, work_factor, flags),
00418               eDelete, in_bufsize, out_bufsize)
00419     {}
00420 };
00421 
00422 
00423 /////////////////////////////////////////////////////////////////////////////
00424 ///
00425 /// CLZOStreamDecompressor -- bzip2 based decompression stream processor
00426 ///
00427 /// See util/compress/stream.hpp for details.
00428 /// @sa CCompressionStreamProcessor
00429 
00430 class  CBZip2StreamDecompressor
00431     : public CCompressionStreamProcessor
00432 {
00433 public:
00434     /// Full constructor.
00435     CBZip2StreamDecompressor(
00436         streamsize                     in_bufsize,
00437         streamsize                     out_bufsize,
00438         int                            verbosity,
00439         int                            small_decompress,
00440         CBZip2Compression::TBZip2Flags flags = 0
00441         )
00442         : CCompressionStreamProcessor(
00443              new CBZip2Decompressor(verbosity, small_decompress, flags),
00444              eDelete, in_bufsize, out_bufsize)
00445     {}
00446 
00447     /// Conventional constructor.
00448     CBZip2StreamDecompressor(CBZip2Compression::TBZip2Flags flags = 0)
00449         : CCompressionStreamProcessor( 
00450               new CBZip2Decompressor(0, 0, flags),
00451               eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize)
00452     {}
00453 };
00454 
00455 
00456 END_NCBI_SCOPE
00457 
00458 
00459 /* @} */
00460 
00461 #endif  /* UTIL_COMPRESS__BZIP2__HPP */
00462 
00463 

Generated on Wed Dec 9 03:51:07 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:44 2009 by modify_doxy.py rev. 173732