include/util/compress/zlib.hpp

Go to the documentation of this file.
00001 #ifndef UTIL_COMPRESS__ZLIB__HPP
00002 #define UTIL_COMPRESS__ZLIB__HPP
00003 
00004 /*  $Id: zlib.hpp 139071 2008-09-02 15:06:17Z ivanov $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Author:  Vladimir Ivanov
00030  *
00031  */
00032 
00033 /// @file zlib.hpp
00034 /// ZLib Compression API.
00035 ///
00036 /// CZipCompression        - base methods for compression/decompression
00037 ///                          memory buffers and files.
00038 /// CZipCompressionFile    - allow read/write operations on files in
00039 ///                          zlib or gzip (.gz) format.
00040 /// CZipCompressor         - zlib based compressor
00041 ///                          (used in CZipStreamCompressor). 
00042 /// CZipDecompressor       - zlib based decompressor 
00043 ///                          (used in CZipStreamDecompressor). 
00044 /// CZipStreamCompressor   - zlib based compression stream processor
00045 ///                          (see util/compress/stream.hpp for details).
00046 /// CZipStreamDecompressor - zlib based decompression stream processor
00047 ///                          (see util/compress/stream.hpp for details).
00048 ///
00049 /// The zlib documentation can be found here: 
00050 ///     http://zlib.org,   or
00051 ///     http://www.gzip.org/zlib/manual.html
00052  
00053 
00054 #include <util/compress/stream.hpp>
00055 
00056 /** @addtogroup Compression
00057  *
00058  * @{
00059  */
00060 
00061 BEGIN_NCBI_SCOPE
00062 
00063 
00064 //////////////////////////////////////////////////////////////////////////////
00065 //
00066 // Special compressor's parameters (description from zlib docs)
00067 //        
00068 // <window_bits>
00069 //    This parameter is the base two logarithm of the window size
00070 //    (the size of the history buffer). It should be in the range 8..15 for
00071 //    this version of the library. Larger values of this parameter result
00072 //    in better compression at the expense of memory usage. 
00073 //
00074 // <mem_level> 
00075 //    The "mem_level" parameter specifies how much memory should be
00076 //    allocated for the internal compression state. mem_level=1 uses minimum
00077 //    memory but is slow and reduces compression ratio; mem_level=9 uses
00078 //    maximum memory for optimal speed. The default value is 8. See zconf.h
00079 //    for total memory usage as a function of windowBits and memLevel.
00080 //
00081 // <strategy> 
00082 //    The strategy parameter is used to tune the compression algorithm.
00083 //    Use the value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data
00084 //    produced by a filter (or predictor), or Z_HUFFMAN_ONLY to force
00085 //    Huffman encoding only (no string match). Filtered data consists mostly
00086 //    of small values with a somewhat random distribution. In this case,
00087 //    the compression algorithm is tuned to compress them better. The effect
00088 //    of Z_FILTERED is to force more Huffman coding and less string matching;
00089 //    it is somewhat intermediate between Z_DEFAULT and Z_HUFFMAN_ONLY.
00090 //    The strategy parameter only affects the compression ratio but not the
00091 //    correctness of the compressed output even if it is not set appropriately.
00092 
00093 // Use default values, defined in zlib library
00094 const int kZlibDefaultWbits       = -1;
00095 const int kZlibDefaultMemLevel    = -1;
00096 const int kZlibDefaultStrategy    = -1;
00097 const int kZlibDefaultCompression = -1;
00098 
00099 
00100 /////////////////////////////////////////////////////////////////////////////
00101 ///
00102 /// CZipCompression --
00103 ///
00104 /// Define a base methods for compression/decompression memory buffers
00105 /// and files.
00106 
00107 class  CZipCompression : public CCompression
00108 {
00109 public:
00110     /// Compression/decompression flags.
00111     enum EFlags {
00112         ///< Allow transparent reading data from buffer/file/stream
00113         ///< regardless is it compressed or not. But be aware,
00114         ///< if data source contains broken data and API cannot detect that
00115         ///< it is compressed data, that you can get binary instead of
00116         ///< decompressed data. By default this flag is OFF.
00117         ///< NOTE: zlib v1.1.4 and earlier have a bug in decoding. 
00118         ///< In some cases decompressor can produce output data on invalid 
00119         ///< compressed data. So, this is not recommended to use this flag
00120         ///< with old zlib versions.
00121         fAllowTransparentRead = (1<<0), 
00122         ///< Check (and skip) file header for decompression stream
00123         fCheckFileHeader      = (1<<1), 
00124         ///< Use gzip (.gz) file format to write into compression stream
00125         ///< (the archive also can store file name and file modification
00126         ///< date in this format)
00127         fWriteGZipFormat      = (1<<2),
00128         ///< This flag can be used only with DecompressFile[IntoDir]().
00129         ///< It allow to restore the original file name and/or time stamp stored
00130         ///< in the file header, if present.
00131         ///< @sa DecompressFile, DecompressFileIntoDir
00132         fRestoreFileAttr      = (1<<3)
00133     };
00134 
00135     /// Constructor.
00136     CZipCompression(
00137         ELevel level       = eLevel_Default,
00138         int    window_bits = kZlibDefaultWbits,     // [8..15]
00139         int    mem_level   = kZlibDefaultMemLevel,  // [1..9] 
00140         int    strategy    = kZlibDefaultStrategy   // [0..2]
00141     );
00142 
00143     /// Destructor.
00144     virtual ~CZipCompression(void);
00145 
00146     /// Return name and version of the compression library.
00147     virtual CVersionInfo GetVersion(void) const;
00148 
00149     /// Returns default compression level for a compression algorithm.
00150     virtual ELevel GetDefaultLevel(void) const
00151         { return ELevel(kZlibDefaultCompression); };
00152 
00153     //
00154     // Utility functions 
00155     //
00156 
00157     /// Compress data in the buffer.
00158     ///
00159     /// Altogether, the total size of the destination buffer must be little
00160     /// more then size of the source buffer.
00161     /// @param src_buf
00162     ///   Source buffer.
00163     /// @param src_len
00164     ///   Size of data in source  buffer.
00165     /// @param dst_buf
00166     ///   Destination buffer.
00167     /// @param dst_size
00168     ///   Size of destination buffer.
00169     /// @param dst_len
00170     ///   Size of compressed data in destination buffer.
00171     /// @return
00172     ///   Return TRUE if operation was succesfully or FALSE otherwise.
00173     ///   On success, 'dst_buf' contains compressed data of dst_len size.
00174     /// @sa
00175     ///   EstimateCompressionBufferSize, DecompressBuffer
00176     virtual bool CompressBuffer(
00177         const void* src_buf, size_t  src_len,
00178         void*       dst_buf, size_t  dst_size,
00179         /* out */            size_t* dst_len
00180     );
00181 
00182     /// Compress data in the buffer.
00183     ///
00184     /// Altogether, the total size of the destination buffer must be little
00185     /// more then size of the source buffer.
00186     /// @param src_buf
00187     ///   Source buffer.
00188     /// @param src_len
00189     ///   Size of data in source  buffer.
00190     /// @param dst_buf
00191     ///   Destination buffer.
00192     /// @param dst_size
00193     ///   Size of destination buffer.
00194     /// @param dst_len
00195     ///   Size of compressed data in destination buffer.
00196     /// @return
00197     ///   Return TRUE if operation was succesfully or FALSE otherwise.
00198     ///   On success, 'dst_buf' contains compressed data of dst_len size.
00199     /// @sa
00200     ///   CompressBuffer
00201     virtual bool DecompressBuffer(
00202         const void* src_buf, size_t  src_len,
00203         void*       dst_buf, size_t  dst_size,
00204         /* out */            size_t* dst_len
00205     );
00206 
00207     /// Estimate buffer size for data compression.
00208     ///
00209     /// The function shall estimate the size of buffer required to compress
00210     /// specified number of bytes of data using the CompressBuffer() function.
00211     /// This function may return a conservative value that may be larger
00212     /// than 'src_len'. 
00213     /// @param src_len
00214     ///   Size of compressed data.
00215     /// @return
00216     ///   Estimated buffer size.
00217     ///   Return -1 on error, or if this method is not supported by current
00218     ///   version of the zlib library. 
00219     /// @sa
00220     ///   CompressBuffer
00221     long EstimateCompressionBufferSize(size_t src_len);
00222 
00223     /// Compress file.
00224     ///
00225     /// @param src_file
00226     ///   File name of source file.
00227     /// @param dst_file
00228     ///   File name of result file.
00229     /// @param buf_size
00230     ///   Buffer size used to read/write files.
00231     /// @return
00232     ///   Return TRUE on success, FALSE on error.
00233     /// @sa
00234     ///   DecompressFile, DecompressFileIntoDir
00235     /// @note
00236     ///   This method, as well as some gzip utilities, always
00237     ///   keeps the original file name and timestamp in
00238     ///   the compressed file. On this moment DecompressFile()
00239     ///   do not use original file name at all, but be aware...
00240     ///   If you assign different base name to destination
00241     ///   compressed file, that behavior of decompression utilities
00242     ///   on different platforms may differ.
00243     ///   For example, WinZip on MS Windows always restore
00244     ///   original file name and timestamp stored in the file.
00245     ///   UNIX gunzip have -N option for this, but by default
00246     ///   do not use it, and just creates a decompressed file with
00247     ///   the name of the compressed file without .gz extention.
00248     virtual bool CompressFile(
00249         const string& src_file,
00250         const string& dst_file,
00251         size_t        buf_size = kCompressionDefaultBufSize
00252     );
00253 
00254     /// Decompress file.
00255     ///
00256     /// @param src_file
00257     ///   File name of source file.
00258     /// @param dst_file
00259     ///   File name of result file.
00260     /// @param buf_size
00261     ///   Buffer size used to read/write files.
00262     /// @return
00263     ///   Return TRUE on success, FALSE on error.
00264     /// @sa
00265     ///   CompressFile, DecompressFileIntoDir
00266     /// @note
00267     ///   CompressFile() method, as well as some gzip utilities,
00268     ///   always keeps the original file name and timestamp in
00269     ///   the compressed file. If fRestoreFileAttr flag is set,
00270     ///   that time stamp, stored in the file header will be restored.
00271     ///   The original file name cannot be restored here,
00272     ///   see DecompressFileIntoDir().
00273     virtual bool DecompressFile(
00274         const string& src_file,
00275         const string& dst_file, 
00276         size_t        buf_size = kCompressionDefaultBufSize
00277     );
00278 
00279     /// Decompress file into specified directory.
00280     ///
00281     /// @param src_file
00282     ///   File name of source file.
00283     /// @param dst_dir
00284     ///   Destination directory.
00285     /// @param buf_size
00286     ///   Buffer size used to read/write files.
00287     /// @return
00288     ///   Return TRUE on success, FALSE on error.
00289     /// @sa
00290     ///   CompressFile, DecompressFile
00291     /// @note
00292     ///   CompressFile() method, as well as some gzip utilities,
00293     ///   always keeps the original file name and timestamp in
00294     ///   the compressed file. If fRestoreFileAttr flag is set,
00295     ///   that original file name and time stamp, stored in
00296     ///   the file header will be restored. If not, that destination
00297     ///   file will be named as archive name without extention.
00298     virtual bool DecompressFileIntoDir(
00299         const string& src_file,
00300         const string& dst_dir, 
00301         size_t        buf_size = kCompressionDefaultBufSize
00302     );
00303 
00304     /// Structure to keep compressed file information.
00305     struct SFileInfo {
00306         string  name;
00307         string  comment;
00308         time_t  mtime;
00309         SFileInfo(void) : mtime(0) {};
00310     };
00311 
00312 protected:
00313     /// Format string with last error description
00314     string FormatErrorMessage(string where, bool use_stream_data =true) const;
00315 
00316 protected:
00317     void*  m_Stream;     ///< Compressor stream.
00318     int    m_WindowBits; ///< The base two logarithm of the window size
00319                          ///< (the size of the history buffer). 
00320     int    m_MemLevel;   ///< The allocation memory level for the
00321                          ///< internal compression state.
00322     int    m_Strategy;   ///< The parameter to tune compression algorithm.
00323 };
00324 
00325  
00326 /////////////////////////////////////////////////////////////////////////////
00327 ///
00328 /// CZipCompressionFile --
00329 ///
00330 /// Allow read/write operations on files in zlib or gzip (.gz) formats.
00331 /// Throw exceptions on critical errors.
00332 
00333 class  CZipCompressionFile : public CZipCompression,
00334                                               public CCompressionFile
00335 {
00336 public:
00337     /// Constructor.
00338     /// For a special parameters description see CZipCompression.
00339     CZipCompressionFile(
00340         const string& file_name,
00341         EMode         mode,
00342         ELevel        level       = eLevel_Default,
00343         int           window_bits = kZlibDefaultWbits,
00344         int           mem_level   = kZlibDefaultMemLevel,
00345         int           strategy    = kZlibDefaultStrategy
00346     );
00347     /// Conventional constructor.
00348     /// For a special parameters description see CZipCompression.
00349     CZipCompressionFile(
00350         ELevel        level       = eLevel_Default,
00351         int           window_bits = kZlibDefaultWbits,
00352         int           mem_level   = kZlibDefaultMemLevel,
00353         int           strategy    = kZlibDefaultStrategy
00354     );
00355 
00356     /// Destructor
00357     ~CZipCompressionFile(void);
00358 
00359     /// Opens a compressed file for reading or writing.
00360     ///
00361     /// For reading/writing gzip (.gz) files the appropriate
00362     /// CZipCompression::EFlags flags should be set before Open() call.
00363     /// @param file_name
00364     ///   File name of the file to open.
00365     /// @param mode
00366     ///   File open mode.
00367     /// @return
00368     ///   TRUE if file was opened succesfully or FALSE otherwise.
00369     /// @sa
00370     ///   CZipCompression, Read, Write, Close
00371     virtual bool Open(const string& file_name, EMode mode);
00372 
00373     /// Opens a compressed file for reading or writing.
00374     ///
00375     /// Do the same as standard Open(), but can also get/set file info.
00376     /// @param file_name
00377     ///   File name of the file to open.
00378     /// @param mode
00379     ///   File open mode.
00380     /// @param info
00381     ///   Pointer to file information structure. If it is not NULL,
00382     ///   that it will be used to get information about compressed file
00383     ///   in the read mode, and set it in the write mode for gzip files.
00384     /// @return
00385     ///   TRUE if file was opened succesfully or FALSE otherwise.
00386     /// @sa
00387     ///   CZipCompression, Read, Write, Close
00388     virtual bool Open(const string& file_name, EMode mode, SFileInfo* info);
00389 
00390     /// Read data from compressed file.
00391     /// 
00392     /// Read up to "len" uncompressed bytes from the compressed file "file"
00393     /// into the buffer "buf". 
00394     /// @param buf
00395     ///    Buffer for requested data.
00396     /// @param len
00397     ///    Number of bytes to read.
00398     /// @return
00399     ///   Number of bytes actually read (0 for end of file, -1 for error).
00400     ///   The number of really readed bytes can be less than requested.
00401     /// @sa
00402     ///   Open, Write, Close
00403     virtual long Read(void* buf, size_t len);
00404 
00405     /// Write data to compressed file.
00406     /// 
00407     /// Writes the given number of uncompressed bytes from the buffer
00408     /// into the compressed file.
00409     /// @param buf
00410     ///    Buffer with written data.
00411     /// @param len
00412     ///    Number of bytes to write.
00413     /// @return
00414     ///   Number of bytes actually written or -1 for error.
00415     /// @sa
00416     ///   Open, Read, Close
00417     virtual long Write(const void* buf, size_t len);
00418 
00419     /// Close compressed file.
00420     ///
00421     /// Flushes all pending output if necessary, closes the compressed file.
00422     /// @return
00423     ///   TRUE on success, FALSE on error.
00424     /// @sa
00425     ///   Open, Read, Write
00426     virtual bool Close(void);
00427 
00428 private:
00429     EMode                  m_Mode;     ///< I/O mode (read/write).
00430     CNcbiFstream*          m_File;     ///< File stream.
00431     CCompressionIOStream*  m_Zip;      ///< [De]comression stream.
00432 };
00433 
00434 
00435 /////////////////////////////////////////////////////////////////////////////
00436 ///
00437 /// CZipCompressor -- zlib based compressor
00438 ///
00439 /// Used in CZipStreamCompressor.
00440 /// @sa CZipStreamCompressor, CZipCompression, CCompressionProcessor
00441 
00442 class  CZipCompressor : public CZipCompression,
00443                                          public CCompressionProcessor
00444 {
00445 public:
00446     /// Constructor.
00447     CZipCompressor(
00448         ELevel               level       = eLevel_Default,
00449         int                  window_bits = kZlibDefaultWbits,
00450         int                  mem_level   = kZlibDefaultMemLevel,
00451         int                  strategy    = kZlibDefaultStrategy,
00452         CCompression::TFlags flags       = 0
00453     );
00454     /// Destructor.
00455     virtual ~CZipCompressor(void);
00456 
00457     /// Set information about compressed file.
00458     ///
00459     /// Used for compression of gzip files.
00460     void SetFileInfo(const SFileInfo& info);
00461 
00462 protected:
00463     virtual EStatus Init   (void);
00464     virtual EStatus Process(const char* in_buf,  size_t  in_len,
00465                             char*       out_buf, size_t  out_size,
00466                             /* out */            size_t* in_avail,
00467                             /* out */            size_t* out_avail);
00468     virtual EStatus Flush  (char*       out_buf, size_t  out_size,
00469                             /* out */            size_t* out_avail);
00470     virtual EStatus Finish (char*       out_buf, size_t  out_size,
00471                             /* out */            size_t* out_avail);
00472     virtual EStatus End    (void);
00473 
00474 private:
00475     unsigned long m_CRC32;    ///< CRC32 for compressed data.
00476     string        m_Cache;    ///< Buffer to cache small pieces of data.
00477     bool          m_NeedWriteHeader;
00478                               ///< Is true if needed to write a file header.
00479     SFileInfo     m_FileInfo; ///< Compressed file info.
00480 };
00481 
00482 
00483 
00484 /////////////////////////////////////////////////////////////////////////////
00485 ///
00486 /// CZipCompressor -- zlib based decompressor
00487 ///
00488 /// Used in CZipStreamCompressor.
00489 /// @sa CZipStreamCompressor, CZipCompression, CCompressionProcessor
00490 
00491 class  CZipDecompressor : public CZipCompression,
00492                                            public CCompressionProcessor
00493 {
00494 public:
00495     /// Constructor.
00496     CZipDecompressor(
00497         int                  window_bits = kZlibDefaultWbits,
00498         CCompression::TFlags flags       = 0
00499     );
00500     /// Destructor.
00501     virtual ~CZipDecompressor(void);
00502 
00503 protected:
00504     virtual EStatus Init   (void); 
00505     virtual EStatus Process(const char* in_buf,  size_t  in_len,
00506                             char*       out_buf, size_t  out_size,
00507                             /* out */            size_t* in_avail,
00508                             /* out */            size_t* out_avail);
00509     virtual EStatus Flush  (char*       out_buf, size_t  out_size,
00510                             /* out */            size_t* out_avail);
00511     virtual EStatus Finish (char*       out_buf, size_t  out_size,
00512                             /* out */            size_t* out_avail);
00513     virtual EStatus End    (void);
00514 
00515 private:
00516     bool   m_NeedCheckHeader; ///< Is TRUE if needed to check at file header.
00517     string m_Cache;           ///< Buffer to cache small pieces of data.
00518 };
00519 
00520 
00521 
00522 /////////////////////////////////////////////////////////////////////////////
00523 ///
00524 /// CZipStreamCompressor -- zlib based compression stream processor
00525 ///
00526 /// See util/compress/stream.hpp for details.
00527 /// @sa CCompressionStreamProcessor
00528 
00529 class  CZipStreamCompressor
00530     : public CCompressionStreamProcessor
00531 {
00532 public:
00533     /// Full constructor
00534     CZipStreamCompressor(
00535         CCompression::ELevel  level,
00536         streamsize            in_bufsize,
00537         streamsize            out_bufsize,
00538         int                   window_bits,
00539         int                   mem_level,
00540         int                   strategy,
00541         CCompression::TFlags  flags = 0
00542         ) 
00543         : CCompressionStreamProcessor(
00544               new CZipCompressor(level,window_bits,mem_level,strategy,flags),
00545               eDelete, in_bufsize, out_bufsize)
00546     {}
00547 
00548     /// Conventional constructor
00549     CZipStreamCompressor(
00550         CCompression::ELevel  level,
00551         CCompression::TFlags  flags = 0
00552         )
00553         : CCompressionStreamProcessor(
00554               new CZipCompressor(level, kZlibDefaultWbits,
00555                                  kZlibDefaultMemLevel, kZlibDefaultStrategy,
00556                                  flags),
00557               eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize)
00558     {}
00559 
00560     /// Conventional constructor
00561     CZipStreamCompressor(CCompression::TFlags flags = 0)
00562         : CCompressionStreamProcessor(
00563               new CZipCompressor(CCompression::eLevel_Default,
00564                                  kZlibDefaultWbits, kZlibDefaultMemLevel,
00565                                  kZlibDefaultStrategy, flags),
00566               eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize)
00567     {}
00568 };
00569 
00570 
00571 /////////////////////////////////////////////////////////////////////////////
00572 ///
00573 /// CZipStreamDecompressor -- zlib based decompression stream processor
00574 ///
00575 /// See util/compress/stream.hpp for details.
00576 /// @sa CCompressionStreamProcessor
00577 
00578 class  CZipStreamDecompressor
00579     : public CCompressionStreamProcessor
00580 {
00581 public:
00582     /// Full constructor
00583     CZipStreamDecompressor(
00584         streamsize            in_bufsize,
00585         streamsize            out_bufsize,
00586         int                   window_bits,
00587         CCompression::TFlags  flags
00588         )
00589         : CCompressionStreamProcessor( 
00590               new CZipDecompressor(window_bits, flags),
00591               eDelete, in_bufsize, out_bufsize)
00592     {}
00593 
00594     /// Conventional constructor
00595     CZipStreamDecompressor(CCompression::TFlags flags = 0)
00596         : CCompressionStreamProcessor( 
00597               new CZipDecompressor(kZlibDefaultWbits, flags),
00598               eDelete, kCompressionDefaultBufSize, kCompressionDefaultBufSize)
00599     {}
00600 };
00601 
00602 
00603 END_NCBI_SCOPE
00604 
00605 
00606 /* @} */
00607 
00608 #endif  /* UTIL_COMPRESS__ZLIB__HPP */
00609 
00610 

Generated on Mon Oct 13 00:57:19 2008 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Oct 13 17:28:19 2008 by modify_doxy.py rev. 117643