NCBI C++ ToolKit
archive_zip.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: archive_zip.cpp 72358 2016-05-03 17:12:04Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Vladimir Ivanov
27  *
28  * File Description:
29  * Compression archive API - ZIP file support.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <util/error_codes.hpp>
35 #include <util/compress/zlib.hpp>
36 #include "archive_zip.hpp"
37 
38 #define NCBI_USE_ERRCODE_X Util_Compress
39 
40 
42 
43 
44 // Directly include miniz library
45 #include "miniz/miniz.c"
46 
47 
48 /////////////////////////////////////////////////////////////////////////
49 //
50 // Constants / macros / typedefs
51 //
52 
53 /// ZIP archive handle type definition.
54 struct SZipHandle {
56  Reset();
57  }
58  void Reset(void) {
59  memset(&zip, 0, sizeof(zip));
60  }
62 };
63 
64 
65 // Macros to work with zip-archive handle
66 #define ZIP_HANDLE &(m_Handle->zip)
67 #define ZIP_CHECK _ASSERT(m_Handle != NULL)
68 #define ZIP_NEW \
69  { \
70  _ASSERT(m_Handle == NULL); \
71  m_Handle = new SZipHandle(); \
72  _ASSERT(m_Handle != NULL); \
73  }
74 
75 #define ZIP_DELETE \
76  { \
77  _ASSERT(m_Handle != NULL); \
78  delete m_Handle; \
79  m_Handle = NULL; \
80  }
81 
82 // Throw exception
83 #define ZIP_THROW(errcode, message) \
84  NCBI_THROW(CArchiveException, errcode, message)
85 
86 
87 
88 /////////////////////////////////////////////////////////////////////////
89 //
90 // CZipArchive
91 //
92 
94 {
95  try {
96  if ( m_Handle ) {
97  Close();
98  delete m_Handle;
99  }
100  }
101  COMPRESS_HANDLE_EXCEPTIONS(94, "CArchiveZip::~CArchiveZip");
102 }
103 
104 
105 void CArchiveZip::CreateFile(const string& filename)
106 {
107  ZIP_NEW;
108  m_Mode = eWrite;
109  m_Location = eFile;
110  mz_bool status = mz_zip_writer_init_file(ZIP_HANDLE, filename.c_str(), 0);
111  if (!status) {
112  m_Handle = NULL;
113  ZIP_THROW(eCreate, "Cannot create archive file '" + filename + "'");
114  }
115  return;
116 }
117 
118 
119 void CArchiveZip::CreateMemory(size_t initial_allocation_size)
120 {
121  ZIP_NEW;
122  m_Mode = eWrite;
124  mz_bool status = mz_zip_writer_init_heap(ZIP_HANDLE, 0, initial_allocation_size);
125  if (!status) {
126  m_Handle = NULL;
127  ZIP_THROW(eCreate, "Cannot create archive in memory");
128  }
129  return;
130 }
131 
132 
133 void CArchiveZip::OpenFile(const string& filename)
134 {
135  ZIP_NEW;
136  m_Mode = eRead;
137  m_Location = eFile;
138  mz_bool status = mz_zip_reader_init_file(ZIP_HANDLE, filename.c_str(), 0);
139  if (!status) {
140  ZIP_DELETE;
141  ZIP_THROW(eOpen, "Cannot open archive file '" + filename + "'");
142  }
143  return;
144 }
145 
146 
147 void CArchiveZip::OpenMemory(const void* buf, size_t size)
148 {
149  ZIP_NEW;
150  m_Mode = eRead;
152  mz_bool status = mz_zip_reader_init_mem(ZIP_HANDLE, buf, size, 0);
153  if (!status) {
154  ZIP_DELETE;
155  ZIP_THROW(eOpen, "Cannot open archive in memory");
156  }
157  return;
158 }
159 
160 
161 void CArchiveZip::FinalizeMemory(void** buf, size_t* size)
162 {
164  _ASSERT(m_Mode == eWrite);
165  _ASSERT(buf);
166  _ASSERT(size);
167  ZIP_CHECK;
168 
169  *buf = NULL;
170  *size = 0;
172  if (!status) {
173  // Deallocate memory buffer to avoid memory leak
174  if (*buf) {
175  free(*buf);
176  *buf = NULL;
177  *size = 0;
178  }
179  ZIP_THROW(eMemory, "Cannot finalize archive in memory");
180  }
181  return;
182 }
183 
184 
186 {
187  _ASSERT(m_Mode == eRead || m_Mode == eWrite);
188  ZIP_CHECK;
189 
190  mz_bool status = true;
191  switch(m_Mode) {
192  case eRead:
193  status = mz_zip_reader_end(ZIP_HANDLE);
194  break;
195  case eWrite:
196  // Automatically finalize file archive only.
197  // The archive located in memory will be lost
198  // on this step, unless FinalizeMemory() was
199  // not called before.
200  if (m_Location == eFile) {
202  }
203  if ( !mz_zip_writer_end(ZIP_HANDLE) ) {
204  status = false;
205  }
206  break;
207  default:
208  break;
209  }
210  if (!status) {
211  ZIP_THROW(eClose, "Error closing archive");
212  }
213  ZIP_DELETE;
214  return;
215 }
216 
217 
219 {
220  _ASSERT(m_Mode == eRead);
221  ZIP_CHECK;
223  // TODO: remove with zip64 support added to miniz
224  if (n == 65535) {
225  ZIP_THROW(eUnsupported, "Too many files in the archive");
226  }
227  return n;
228 }
229 
230 
232 {
233  _ASSERT(m_Mode == eRead);
234  _ASSERT(info);
235  ZIP_CHECK;
236 
237  // Check index to fit 'unsigned int' which used internally in miniz
238  if (index > (size_t)kMax_UInt) {
239  NCBI_THROW(CCoreException, eInvalidArg, "Bad index value");
240  }
241  // Get file informations
243  mz_bool status = mz_zip_reader_file_stat(ZIP_HANDLE, (mz_uint)index, &fs);
244  if (!status) {
245  ZIP_THROW(eList, "Cannot get entry information by index " +
246  NStr::SizetToString(index));
247  }
248  // Copy known data into CArchiveEntryInfo
249  info->m_Index = index;
250  info->m_CompressedSize = fs.m_comp_size;
251  info->m_Stat.st_size = fs.m_uncomp_size;
252  info->m_Stat.st_atime = fs.m_time;
253  info->m_Stat.st_ctime = fs.m_time;
254  info->m_Stat.st_mtime = fs.m_time;
255  info->m_Name.assign(fs.m_filename);
256  info->m_Comment.assign(fs.m_comment, fs.m_comment_size);
257 
258  // Rough check on a directory (using MS-DOS type compatible attribute)?
260  info->m_Type = status ? CDirEntry::eDir : CDirEntry::eFile;
261 
262  // miniz don't work with entry attributes, because it
263  // is very OS- and creation software dependent.
264  // Try to analyze some common cases for Unix-type attributes:
265 
266  char ver = (char)(fs.m_version_made_by >> 8);
267  mode_t mode = (fs.m_external_attr >> 16) & 0xFFFF;
268 
269  switch (ver) {
270  // Unix
271  case 1: // Amiga
272  case 2: // VAX VMS
273  case 3: // Unix
274  case 4: // VM/CMS
275  case 5: // Atari ST
276  case 7: // Macintosh
277  case 8: // Z-System
278  case 9: // CP/M
279  {{
280  info->m_Stat.st_mode = mode;
281  info->m_Type = CDirEntry::GetType(info->m_Stat);
282  if (info->m_Type == CDirEntry::eUnknown) {
283  // Reset attributes value, we cannot be sure that
284  // it hold correct value
285  info->m_Stat.st_mode = 0;
286  }
287  }}
288  break;
289  // Dos
290  case 0: // MS-DOS or OS/2 FAT
291  case 6: // OS/2 HPFS
292  // Unknown
293  default:
294  break;
295  }
296  return;
297 }
298 
299 
301 {
302  switch (type) {
303  // supported
304  case CDirEntry::eFile:
305  case CDirEntry::eDir:
306  return true;
307  // unsupported
308  case CDirEntry::eLink:
311  case CDirEntry::ePipe:
312  case CDirEntry::eDoor:
313  case CDirEntry::eSocket:
314  case CDirEntry::eUnknown:
315  default:
316  break;
317  }
318  return false;
319 }
320 
321 
323  const string& dst_path)
324 {
325  _ASSERT(m_Mode == eRead);
326  ZIP_CHECK;
327 
328  // If this is a directory entry, we should create it.
329  if (info.GetType() == CDirEntry::eDir) {
330  if (!CDir(dst_path).CreatePath()) {
331  ZIP_THROW(eExtract, "Cannot create directory '" + dst_path + "'");
332  }
333  return;
334  }
335  // The code below extract files only.
336  mz_bool status;
337  MZ_FILE *pFile = MZ_FOPEN(dst_path.c_str(), "wb");
338  if (!pFile) {
339  ZIP_THROW(eExtract, "Cannot create target file '" + dst_path + "'");
340  }
342  mz_zip_file_write_callback, pFile, 0);
343  if (MZ_FCLOSE(pFile) == EOF) {
344  ZIP_THROW(eExtract, "Error close file '" + dst_path + "'");
345  }
346  if (!status) {
347  ZIP_THROW(eExtract, "Error extracting entry with index '" +
348  NStr::SizetToString(info.m_Index) + " to file '" + dst_path + "'");
349  }
350  return;
351 }
352 
353 
355 {
356  _ASSERT(m_Mode == eRead);
357  _ASSERT(buf);
358  _ASSERT(size);
359  ZIP_CHECK;
360 
361  // If this is a directory entry, skip it
362  if (info.GetType() == CDirEntry::eDir) {
363  return;
364  }
365  // The code below extract files only.
366  mz_bool status;
367  status = mz_zip_reader_extract_to_mem(ZIP_HANDLE, (mz_uint)info.m_Index, buf, size, 0);
368  if (!status) {
369  ZIP_THROW(eExtract, "Error extracting entry with index " +
370  NStr::SizetToString(info.m_Index) + " to memory");
371  }
372  return;
373 }
374 
375 
376 // Structure to pass all necessary data to write callback
380 };
381 
382 // Callback for extracting data, call user-defined callback to do a real job.
383 extern "C"
384 {
385  static size_t s_ZipExtractCallback(void* params, mz_uint64 /*ofs*/, const void* buf, size_t n)
386  {
387  _ASSERT(params);
388  SWriteCallbackData& data = *(SWriteCallbackData*)(params);
389  // Call user callback
390  size_t processed = data.callback(*data.info, buf, n);
391  return processed;
392  }
393 }
394 
395 void CArchiveZip::ExtractEntryToCallback(const CArchiveEntryInfo& info, Callback_Write callback)
396 {
397  _ASSERT(m_Mode == eRead);
398  ZIP_CHECK;
399 
400  // If this is a directory entry, skip it
401  if (info.GetType() == CDirEntry::eDir) {
402  return;
403  }
404  // The code below extract files only.
405  SWriteCallbackData data;
406  data.callback = callback;
407  data.info = &info;
408  mz_bool status;
410  s_ZipExtractCallback, &data, 0);
411  if (!status) {
412  ZIP_THROW(eExtract, "Error extracting entry with index " +
413  NStr::SizetToString(info.m_Index) + " to callback");
414  }
415  return;
416 }
417 
418 
419 // Dummy callback to test an entry extraction
420 extern "C"
421 {
422  static size_t s_ZipTestCallback(void* /*pOpaque*/, mz_uint64 /*ofs*/,
423  const void* /*pBuf*/, size_t n)
424  {
425  // Just return number of extracted bytes
426  return n;
427  }
428 }
429 
431 {
432  _ASSERT(m_Mode == eRead);
433  ZIP_CHECK;
434 
435  // If this is a directory entry, skip it
436  if (info.GetType() == CDirEntry::eDir) {
437  return;
438  }
439  // The code below test files only.
440  mz_bool status;
442  s_ZipTestCallback, 0, 0);
443  if (!status) {
444  ZIP_THROW(eExtract, "Test entry with index " +
445  NStr::SizetToString(info.m_Index) + " failed");
446  }
447  return;
448 }
449 
450 
452  const string& src_path, ELevel level)
453 {
454  // TODO: remove with zip64 support added to miniz
455  {{
457  if (n >= 65534) {
458  ZIP_THROW(eAppend, "Too many files in the archive");
459  }
460  }}
461 
462  const string& comment = info.m_Comment;
463  mz_uint16 comment_size = (mz_uint16)comment.size();
464  mz_bool status;
465  if (info.m_Type == CDirEntry::eDir) {
466  status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
467  NULL, 0, /* empty buffer */
468  comment.c_str(), comment_size, (mz_uint)level, 0, 0);
469  } else {
470  // Files only
473  info.GetName().c_str(), src_path.c_str(),
474  comment.c_str(), comment_size, (mz_uint)level);
475  }
476  if (!status) {
477  ZIP_THROW(eAppend, "Error appending entry '" + src_path + "' to archive");
478  }
479  return;
480 }
481 
482 
484  void* buf, size_t size, ELevel level)
485 {
486  // TODO: remove with zip64 support added to miniz
487  {{
489  if (n >= 65534) {
490  ZIP_THROW(eAppend, "Too many files in the archive");
491  }
492  }}
493 
494  const string& comment = info.m_Comment;
495  mz_uint16 comment_size = (mz_uint16)comment.size();
496  mz_bool status;
497  status = mz_zip_writer_add_mem_ex(ZIP_HANDLE, info.GetName().c_str(),
498  buf, size, comment.c_str(), comment_size, (mz_uint)level, 0, 0);
499  if (!status) {
500  ZIP_THROW(eAppend, "Error appending entry with name '" +
501  info.GetName() + "' from memory to archive");
502  }
503  return;
504 }
505 
506 
virtual void FinalizeMemory(void **buf, size_t *size)
Finalize the archive created in memory.
Regular file.
Definition: ncbifile.hpp:718
virtual void Close(void)
Close the archive.
char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]
Definition: miniz.c:534
#define COMPRESS_HANDLE_EXCEPTIONS(subcode, message)
Macro to catch and handle exceptions (from streams in the destructor)
Definition: compress.hpp:55
mz_bool mz_zip_reader_end(mz_zip_archive *pZip)
Definition: miniz.c:3996
mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip)
Definition: miniz.c:3422
char * buf
mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size)
Definition: miniz.c:4096
#define MZ_FCLOSE
Definition: miniz.c:3004
#define ZIP_HANDLE
Definition: archive_zip.cpp:66
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2628
static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n)
Definition: miniz.c:3970
CDirEntry::EType GetType(void) const
Definition: archive_.hpp:135
Door (UNIX only)
Definition: ncbifile.hpp:724
ELocation m_Location
Archive location (file/memory)
Definition: archive_.hpp:389
CArchiveEntryInfo class.
Definition: archive_.hpp:111
ZLib Compression API.
bool CreatePath(void) const
Create the directory path recursively possibly more than one at a time.
Definition: ncbifile.cpp:4005
#define ZIP_THROW(errcode, message)
Definition: archive_zip.cpp:83
mz_uint32 m_external_attr
Definition: miniz.c:530
unsigned long long mz_uint64
Definition: miniz.c:491
mz_zip_archive zip
Definition: archive_zip.cpp:61
const CArchiveEntryInfo * info
mz_uint16 m_version_made_by
Definition: miniz.c:519
unsigned int mode_t
Definition: ncbifile.hpp:79
mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
Definition: miniz.c:3798
virtual void ExtractEntryToFileSystem(const CArchiveEntryInfo &info, const string &dst_path)
Extracts an archive entry to file system.
#define NULL
Definition: ncbistd.hpp:225
unsigned short mz_uint16
Definition: miniz.c:487
mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags)
Definition: miniz.c:3361
Symbolic link (UNIX only)
Definition: ncbifile.hpp:721
EDirection m_Mode
Processing direction (read/write)
Definition: archive_.hpp:388
CCoreException –.
Definition: ncbiexpt.hpp:1292
virtual ~CArchiveZip(void)
Destructor.
Definition: archive_zip.cpp:93
static size_t s_ZipExtractCallback(void *params, mz_uint64, const void *buf, size_t n)
const string & GetName(void) const
Definition: archive_.hpp:136
EType
Directory entry type.
Definition: ncbifile.hpp:717
mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags)
Definition: miniz.c:3743
static size_t s_ZipTestCallback(void *, mz_uint64, const void *, size_t n)
virtual void GetEntryInfo(size_t index, CArchiveEntryInfo *info)
Get detailed information about an archive entry by index.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
TNcbiSys_stat m_Stat
Direntry-compatible info (as applicable)
Definition: archive_.hpp:161
Socket (UNIX only)
Definition: ncbifile.hpp:723
SZipHandle * m_Handle
Archive handle.
string m_Comment
Entry comment.
Definition: archive_.hpp:167
#define kMax_UInt
Definition: ncbi_limits.h:185
size_t m_Index
Entry index in the archive.
Definition: archive_.hpp:160
virtual void CreateFile(const string &filename)
Create new archive file.
CDir –.
Definition: ncbifile.hpp:1610
virtual void AddEntryFromFileSystem(const CArchiveEntryInfo &info, const string &src_path, ELevel level)
Add single entry to newly created archive from file system.
Writing into archive.
Definition: archive_.hpp:194
Definition: type.c:8
mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning)
Definition: miniz.c:4124
#define ZIP_NEW
Definition: archive_zip.cpp:68
virtual void ExtractEntryToCallback(const CArchiveEntryInfo &info, Callback_Write callback)
Extracts an archive file using user-defined callback to process extracted data.
IArchive::Callback_Write callback
mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat)
Definition: miniz.c:3469
mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip)
Definition: miniz.c:4783
virtual void TestEntry(const CArchiveEntryInfo &info)
Verify entry integrity.
int size
virtual size_t GetNumEntries(void)
Returns the total number of entries in the archive.
unsigned int mz_uint
Definition: miniz.c:489
File-based archive.
Definition: archive_.hpp:198
#define MZ_FOPEN(f, m)
Definition: miniz.c:3003
Block special (UNIX only)
Definition: ncbifile.hpp:725
string m_Name
Entry name.
Definition: archive_.hpp:163
mz_uint64 m_uncomp_size
Definition: miniz.c:528
mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize)
Definition: miniz.c:4832
virtual void OpenFile(const string &filename)
Open archive file for reading.
ZIP archive handle type definition.
Definition: archive_zip.cpp:54
virtual void OpenMemory(const void *buf, size_t size)
Open archive located in memory for reading.
ELevel
Compression level.
Definition: compress.hpp:99
mz_uint64 m_comp_size
Definition: miniz.c:527
#define MZ_FILE
Definition: miniz.c:3002
#define ZIP_DELETE
Definition: archive_zip.cpp:75
mz_bool mz_zip_writer_end(mz_zip_archive *pZip)
Definition: miniz.c:4848
yy_size_t n
mz_uint32 m_comment_size
Definition: miniz.c:532
Uint8 m_CompressedSize
Compressed size.
Definition: archive_.hpp:168
#define _ASSERT
Directory.
Definition: ncbifile.hpp:719
mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index)
Definition: miniz.c:3444
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string...
Definition: ncbiexpt.hpp:546
Unknown type.
Definition: ncbifile.hpp:728
int mz_bool
Definition: miniz.c:492
#define ZIP_CHECK
Definition: archive_zip.cpp:67
EType GetType(EFollowLinks follow=eIgnoreLinks) const
Get a type of a directory entry.
Definition: ncbifile.cpp:2211
void Reset(void)
Definition: archive_zip.cpp:58
virtual void ExtractEntryToMemory(const CArchiveEntryInfo &info, void *buf, size_t size)
Extracts an archive file to a memory buffer.
virtual bool HaveSupport_Type(CDirEntry::EType type)
Check that current archive format have support for specific feature.
virtual void CreateMemory(size_t initial_allocation_size=0)
Create new archive located in memory.
mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
Definition: miniz.c:4482
Reading from archive.
Definition: archive_.hpp:193
char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]
Definition: miniz.c:533
Character special.
Definition: ncbifile.hpp:726
Memory-based archive.
Definition: archive_.hpp:199
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
CDirEntry::EType m_Type
Type.
Definition: archive_.hpp:162
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags)
Definition: miniz.c:3392
mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32)
Definition: miniz.c:4331
size_t(* Callback_Write)(const CArchiveEntryInfo &info, const void *buf, size_t n)
Type of user-defined callback for extraction from archive.
Definition: archive_.hpp:215
virtual void AddEntryFromMemory(const CArchiveEntryInfo &info, void *buf, size_t size, ELevel level)
Add entry to newly created archive from memory buffer.
Modified on Mon Jul 25 16:54:52 2016 by modify_doxy.py rev. 506947