include/util/compress/compress.hpp

Go to the documentation of this file.
00001 #ifndef UTIL_COMPRESS__COMPRESS__HPP
00002 #define UTIL_COMPRESS__COMPRESS__HPP
00003 
00004 /*  $Id: compress.hpp 149158 2009-01-07 19:46:38Z ivanov $
00005  * ===========================================================================
00006  *
00007  *                            PUBLIC DOMAIN NOTICE
00008  *               National Center for Biotechnology Information
00009  *
00010  *  This software/database is a "United States Government Work" under the
00011  *  terms of the United States Copyright Act.  It was written as part of
00012  *  the author's official duties as a United States Government employee and
00013  *  thus cannot be copyrighted.  This software/database is freely available
00014  *  to the public for use. The National Library of Medicine and the U.S.
00015  *  Government have not placed any restriction on its use or reproduction.
00016  *
00017  *  Although all reasonable efforts have been taken to ensure the accuracy
00018  *  and reliability of the software and data, the NLM and the U.S.
00019  *  Government do not and cannot warrant the performance or results that
00020  *  may be obtained by using this software or data. The NLM and the U.S.
00021  *  Government disclaim all warranties, express or implied, including
00022  *  warranties of performance, merchantability or fitness for any particular
00023  *  purpose.
00024  *
00025  *  Please cite the author in any work or product based on this material.
00026  *
00027  * ===========================================================================
00028  *
00029  * Author:  Vladimir Ivanov
00030  *
00031  * File Description:  The Compression API
00032  *
00033  */
00034 
00035 #include <corelib/ncbistd.hpp>
00036 #include <corelib/version.hpp>
00037 
00038 
00039 /** @addtogroup Compression
00040  *
00041  * @{
00042  */
00043 
00044 BEGIN_NCBI_SCOPE
00045 
00046 
00047 /// Default compression I/O stream buffer size.
00048 const streamsize kCompressionDefaultBufSize = 16*1024;
00049 
00050 /// Macro to report errors in compression API.
00051 #define ERR_COMPRESS(subcode, message) ERR_POST_X(subcode, Warning << message)
00052 
00053 // Forward declaration
00054 class CCompressionFile;
00055 class CCompressionStreambuf;
00056 
00057 
00058 //////////////////////////////////////////////////////////////////////////////
00059 //
00060 // ICompression -- abstract interface class
00061 //
00062 
00063 class  ICompression
00064 {
00065 public:
00066     /// Compression level.
00067     ///
00068     /// It is in range [0..9]. Increase of level might mean better compression
00069     /// and usualy greater time of compression. Usualy 1 gives best speed,
00070     /// 9 gives best compression, 0 gives no compression at all.
00071     /// eDefault value requests a compromise between speed and compression
00072     /// (according to developers of the corresponding compression algorithm).
00073     enum ELevel {
00074         eLevel_Default       = -1,  // default
00075         eLevel_NoCompression =  0,  // just store data
00076         eLevel_Lowest        =  1,
00077         eLevel_VeryLow       =  2,
00078         eLevel_Low           =  3,
00079         eLevel_MediumLow     =  4,
00080         eLevel_Medium        =  5,
00081         eLevel_MediumHigh    =  6,
00082         eLevel_High          =  7,
00083         eLevel_VeryHigh      =  8,
00084         eLevel_Best          =  9
00085     };
00086 
00087     /// Compression flags. The flag selection depends from compression
00088     /// algorithm implementation.
00089     typedef unsigned int TFlags;    // Bitwise OR of EFlags*
00090 
00091 public:
00092     /// Destructor
00093     virtual ~ICompression(void) {}
00094 
00095     /// Return name and version of the compression library.
00096     virtual CVersionInfo GetVersion(void) const = 0;
00097 
00098     // Get/set compression level.
00099     virtual void   SetLevel(ELevel level) = 0;
00100     virtual ELevel GetLevel(void) const = 0;
00101 
00102     /// Return the default compression level for current compression algorithm.
00103     virtual ELevel GetDefaultLevel(void) const = 0;
00104 
00105     // Get compressor's internal status/error code and description
00106     // for the last operation.
00107     virtual int    GetErrorCode(void) const = 0;
00108     virtual string GetErrorDescription(void) const = 0;
00109 
00110     // Get/set flags
00111     virtual TFlags GetFlags(void) const = 0;
00112     virtual void   SetFlags(TFlags flags) = 0;
00113 
00114     //
00115     // Utility functions 
00116     //
00117 
00118     // (De)compress the source buffer into the destination buffer.
00119     // Return TRUE on success, FALSE on error.
00120     // The compressor error code can be acquired via GetErrorCode() call.
00121     // Notice that altogether the total size of the destination buffer must
00122     // be little more then size of the source buffer. 
00123     virtual bool CompressBuffer(
00124         const void* src_buf, size_t  src_len,
00125         void*       dst_buf, size_t  dst_size,
00126         /* out */            size_t* dst_len
00127     ) = 0;
00128 
00129     virtual bool DecompressBuffer(
00130         const void* src_buf, size_t  src_len,
00131         void*       dst_buf, size_t  dst_size,
00132         /* out */            size_t* dst_len
00133     ) = 0;
00134 
00135     // (De)compress file "src_file" and put result to file "dst_file".
00136     // Return TRUE on success, FALSE on error.
00137     virtual bool CompressFile(
00138         const string&     src_file,
00139         const string&     dst_file,
00140         size_t            buf_size = kCompressionDefaultBufSize
00141     ) = 0;
00142     virtual bool DecompressFile(
00143         const string&     src_file,
00144         const string&     dst_file, 
00145         size_t            buf_size = kCompressionDefaultBufSize
00146     ) = 0;
00147 };
00148 
00149 
00150 //////////////////////////////////////////////////////////////////////////////
00151 //
00152 // CCompression -- abstract base class
00153 //
00154 
00155 class  CCompression : public ICompression
00156 {
00157 public:
00158     // 'ctors
00159     CCompression(ELevel level = eLevel_Default);
00160     virtual ~CCompression(void);
00161 
00162     /// Return name and version of the compression library.
00163     virtual CVersionInfo GetVersion(void) const = 0;
00164 
00165     // Get/set compression level.
00166     // NOTE 1:  Changing compression level after compression has begun will
00167     //          be ignored.
00168     // NOTE 2:  If the level is not supported by the underlying algorithm,
00169     //          then it will be translated to the nearest supported value.
00170     virtual void   SetLevel(ELevel level);
00171     virtual ELevel GetLevel(void) const;
00172 
00173     // Get compressor's internal status/error code and description
00174     // for the last operation.
00175     virtual int    GetErrorCode(void) const;
00176     virtual string GetErrorDescription(void) const;
00177 
00178     /// Get flags.
00179     virtual TFlags GetFlags(void) const;
00180     /// Set flags.
00181     virtual void   SetFlags(TFlags flags);
00182 
00183 protected:
00184     // Universal file compression/decompression functions.
00185     // Return TRUE on success, FALSE on error.
00186     virtual bool x_CompressFile(
00187         const string&     src_file,
00188         CCompressionFile& dst_file,
00189         size_t            buf_size = kCompressionDefaultBufSize
00190     );
00191     virtual bool x_DecompressFile(
00192         CCompressionFile& src_file,
00193         const string&     dst_file,
00194         size_t            buf_size = kCompressionDefaultBufSize
00195     );
00196 
00197     // Set last action error/status code and description
00198     void SetError(int status, const char* description = 0);
00199     void SetError(int status, const string& description);
00200 
00201 protected:
00202     /// Decompression mode (see fAllowTransparentRead flag).
00203     enum EDecompressMode {
00204         eMode_Unknown,         ///< Not known yet (decompress/transparent read)
00205         eMode_Decompress,      ///< Generic decompression
00206         eMode_TransparentRead  ///< Transparent read, the data is uncompressed
00207     };
00208     ///< Decompress mode (Decompress/TransparentRead/Unknown).
00209     EDecompressMode m_DecompressMode;
00210 
00211 private:
00212     ELevel  m_Level;      // Compression level
00213     int     m_ErrorCode;  // Last compressor action error/status
00214     string  m_ErrorMsg;   // Last compressor action error message
00215     TFlags  m_Flags;      // Bitwise OR of flags
00216 
00217     // Friend classes
00218     friend class CCompressionStreambuf;
00219 };
00220 
00221 
00222 
00223 //////////////////////////////////////////////////////////////////////////////
00224 //
00225 // CCompressionFile -- abstract base class
00226 //
00227 
00228 // Class for support work with compressed files.
00229 // Assumed that file on hard disk is always compressed and data in memory
00230 // is uncompressed. 
00231 //
00232 
00233 class  CCompressionFile
00234 {
00235 public:
00236     /// Compression file handler
00237     typedef void* TFile;
00238 
00239     /// File open mode
00240     enum EMode {
00241         eMode_Read,         ///< Reading from compressed file
00242         eMode_Write         ///< Writing compressed data to file
00243     };
00244 
00245     // 'ctors
00246     CCompressionFile(void);
00247     CCompressionFile(const string& path, EMode mode); 
00248     virtual ~CCompressionFile(void);
00249 
00250     // Opens a compressed file for reading or writing.
00251     // Return NULL if error has been occurred.
00252     virtual bool Open(const string& path, EMode mode) = 0; 
00253 
00254     // Read up to "len" uncompressed bytes from the compressed file "file"
00255     // into the buffer "buf". Return the number of bytes actually read
00256     // (0 for end of file, -1 for error)
00257     virtual long Read(void* buf, size_t len) = 0;
00258 
00259     // Writes the given number of uncompressed bytes into the compressed file.
00260     // Return the number of bytes actually written or -1 for error.
00261     virtual long Write(const void* buf, size_t len) = 0;
00262 
00263     // Flushes all pending output if necessary, closes the compressed file.
00264     // Return TRUE on success, FALSE on error.
00265     virtual bool Close(void) = 0;
00266 
00267 protected:
00268     TFile  m_File;   ///< File handler.
00269     EMode  m_Mode;   ///< File open mode.
00270 };
00271 
00272 
00273 
00274 //////////////////////////////////////////////////////////////////////////////
00275 //
00276 // CCompressionProcessor -- abstract base class
00277 //
00278 // Contains a functions for service a compression/decompression session.
00279 //
00280 
00281 class  CCompressionProcessor
00282 {
00283 public:
00284     /// Type of the result of all basic functions
00285     enum EStatus {
00286         /// Everything is fine, no errors occurred
00287         eStatus_Success,
00288         /// Special case of eStatus_Success.
00289         /// Logical end of (compressed) stream is detected, no errors occurred.
00290         /// All subsequent inquiries about data processing should be ignored.
00291         eStatus_EndOfData,
00292         /// Error has occurred. The error code can be acquired by GetErrorCode().
00293         eStatus_Error,
00294         /// Output buffer overflow - not enough output space.
00295         /// Buffer must be emptied and the last action repeated.
00296         eStatus_Overflow,
00297         /// Special value. Just need to repeat last action.
00298         eStatus_Repeat,
00299         /// Special value, status is undefined.
00300         eStatus_Unknown
00301     };
00302 
00303     // 'ctors
00304     CCompressionProcessor(void);
00305     virtual ~CCompressionProcessor(void);
00306 
00307     // Return compressor's busy flag. If returns value is true that
00308     // the current compression object already have being use in other
00309     // compression session.
00310     bool IsBusy(void) const;
00311 
00312     // Return number of processed/output bytes.
00313     unsigned long GetProcessedSize(void);
00314     unsigned long GetOutputSize(void);
00315 
00316 protected:
00317     // Initialize the internal stream state for compression/decompression.
00318     // It does not perform any compression, this will be done by Process().
00319     virtual EStatus Init(void) = 0;
00320 
00321     // Compress/decompress as much data as possible, and stops when the input
00322     // buffer becomes empty or the output buffer becomes full. It may
00323     // introduce some output latency (reading input without producing any
00324     // output).
00325     virtual EStatus Process
00326     (const char* in_buf,      // [in]  input buffer 
00327      size_t      in_len,      // [in]  input data length
00328      char*       out_buf,     // [in]  output buffer
00329      size_t      out_size,    // [in]  output buffer size
00330      size_t*     in_avail,    // [out] count unproc.bytes in input buffer
00331      size_t*     out_avail    // [out] count bytes putted into out buffer
00332      ) = 0;
00333 
00334     // Flush compressed/decompressed data from the output buffer. 
00335     // Flushing may degrade compression for some compression algorithms
00336     // and so it should be used only when necessary.
00337     virtual EStatus Flush
00338     (char*       out_buf,     // [in]  output buffer
00339      size_t      out_size,    // [in]  output buffer size
00340      size_t*     out_avail    // [out] count bytes putted into out buffer
00341      ) = 0;
00342 
00343     // Finish the compression/decompression process.
00344     // Process pending input, flush pending output.
00345     // This function slightly like to Flush(), but it must be called only
00346     // at the end of compression process before End().
00347     virtual EStatus Finish
00348     (char*       out_buf,     // [in]  output buffer
00349      size_t      out_size,    // [in]  output buffer size
00350      size_t*     out_avail    // [out] count bytes putted into out buffer
00351      ) = 0;
00352 
00353     // Free all dynamically allocated data structures.
00354     // This function discards any unprocessed input and does not flush
00355     // any pending output.
00356     virtual EStatus End(void) = 0;
00357 
00358 protected:
00359     // Reset internal state
00360     void Reset(void);
00361 
00362     // Set/unset compressor busy flag
00363     void SetBusy(bool busy = true);
00364 
00365     // Increase number of processed/output bytes.
00366     void IncreaseProcessedSize(unsigned long n_bytes);
00367     void IncreaseOutputSize(unsigned long n_bytes);
00368 
00369 private:
00370     unsigned long m_ProcessedSize; //< The number of processed bytes
00371     unsigned long m_OutputSize;    //< The number of output bytes
00372     bool          m_Busy;          //< Is true if compressor is ready to begin
00373                                    //< next session
00374     // Friend classes
00375     friend class CCompressionStream;
00376     friend class CCompressionStreambuf;
00377     friend class CCompressionStreamProcessor;
00378 };
00379 
00380 
00381 /////////////////////////////////////////////////////////////////////////////
00382 //
00383 // CCompressionException
00384 //
00385 // Exceptions generated by CCompresson and derived classes
00386 //
00387 
00388 class  CCompressionException : public CCoreException
00389 {
00390 public:
00391     enum EErrCode {
00392         eCompression,      ///< Compression/decompression error
00393         eCompressionFile   ///< Compression/decompression file error
00394     };
00395     virtual const char* GetErrCodeString(void) const
00396     {
00397         switch (GetErrCode()) {
00398         case eCompression     : return "eCompression";
00399         case eCompressionFile : return "eCompressionFile";
00400         default               : return CException::GetErrCodeString();
00401         }
00402     }
00403     NCBI_EXCEPTION_DEFAULT(CCompressionException,CCoreException);
00404 };
00405 
00406 
00407 /////////////////////////////////////////////////////////////////////////////
00408 //
00409 // CCompressionUtil
00410 //
00411 // Utility functions
00412 //
00413 
00414 class  CCompressionUtil
00415 {
00416 public:
00417     /// Store 4 bytes of value in the buffer.
00418     static void StoreUI4(void* buf, unsigned long value);
00419 
00420     /// Read 4 bytes from buffer.
00421     static Uint4 GetUI4(void* buf);
00422 
00423     /// Store 2 bytes of value in the buffer.
00424     static void StoreUI2(void* buf, unsigned long value);
00425 
00426     /// Read 2 bytes from buffer.
00427     static Uint2 GetUI2(void* buf);
00428 };
00429 
00430 
00431 /* @} */
00432 
00433 
00434 //===========================================================================
00435 //
00436 //  Inline
00437 //
00438 //===========================================================================
00439 
00440 inline
00441 void CCompressionProcessor::Reset(void)
00442 {
00443     m_ProcessedSize  = 0;
00444     m_OutputSize     = 0;
00445     m_Busy           = false;
00446 }
00447 
00448 inline
00449 bool CCompressionProcessor::IsBusy(void) const
00450 {
00451     return m_Busy;
00452 }
00453 
00454 inline
00455 void CCompressionProcessor::SetBusy(bool busy)
00456 {
00457     if ( busy  &&  m_Busy ) {
00458         NCBI_THROW(CCompressionException, eCompression,
00459                    "CCompression::SetBusy(): The compressor is busy now");
00460     }
00461     m_Busy = busy;
00462 }
00463 
00464 inline
00465 void CCompressionProcessor::IncreaseProcessedSize(unsigned long n_bytes)
00466 {
00467     m_ProcessedSize += n_bytes;
00468 }
00469 
00470 inline
00471 void CCompressionProcessor::IncreaseOutputSize(unsigned long n_bytes)
00472 {
00473     m_OutputSize += n_bytes;
00474 }
00475 
00476 inline
00477 unsigned long CCompressionProcessor::GetProcessedSize(void)
00478 {
00479     return m_ProcessedSize;
00480 }
00481 
00482 inline
00483 unsigned long CCompressionProcessor::GetOutputSize(void)
00484 {
00485     return m_OutputSize;
00486 }
00487 
00488 
00489 END_NCBI_SCOPE
00490 
00491 
00492 #endif  /* UTIL_COMPRESS__COMPRESS__HPP */
00493 
00494 

Generated on Sun Dec 6 22:15:53 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:49 2009 by modify_doxy.py rev. 173732