NCBI C++ ToolKit
pack_string.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: pack_string.cpp 65007 2014-10-29 19:01:34Z vasilche $
2 * ===========================================================================
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 * ===========================================================================
23 *
24 * Author: Eugene Vasilchenko
25 *
26 * File Description: Serialization hooks to make strings with equal value
27 * to share representation object.
28 *
29 */
30 
31 #include <ncbi_pch.hpp>
32 #include <serial/pack_string.hpp>
33 #include <serial/objistr.hpp>
34 #include <serial/objectiter.hpp>
35 
37 
38 static const char* const STRING_PACK_ENV = "NCBI_SERIAL_PACK_STRINGS";
39 static const char* const ENV_YES = "YES";
40 
41 static const size_t kDefaultLengthLimit = 32;
42 static const size_t kDefaultCountLimit = 32;
43 
45  : m_LengthLimit(kDefaultLengthLimit), m_CountLimit(kDefaultCountLimit),
46  m_Skipped(0), m_CompressedIn(0),
47  m_CompressedOut(0)
48 {
49 }
50 
51 
52 CPackString::CPackString(size_t length_limit, size_t count_limit)
53  : m_LengthLimit(length_limit), m_CountLimit(count_limit),
54  m_Skipped(0), m_CompressedIn(0),
55  m_CompressedOut(0)
56 {
57 }
58 
59 
61 {
62 }
63 
64 
66 {
67  size_t total = 0;
68  typedef multiset< pair<size_t, string> > TStat;
69  TStat stat;
70  ITERATE ( TStrings, i, m_Strings ) {
71  stat.insert(TStat::value_type(i->GetCount(), i->GetString()));
72  total += i->GetCount();
73  }
74  ITERATE ( TStat, i, stat ) {
75  out << setw(10) << i->first << " : \"" << i->second << "\"\n";
76  }
77  out << setw(10) << total << " = " << m_CompressedIn << " -> " << m_CompressedOut << " strings\n";
78  out << setw(10) << m_Skipped << " skipped\n";
79  return out;
80 }
81 
82 
83 bool CPackString::s_GetEnvFlag(const char* env, bool def_val)
84 {
85  const char* val = ::getenv(env);
86  if ( !val ) {
87  return def_val;
88  }
89  string s(val);
90  return s == "1" || NStr::CompareNocase(s, ENV_YES) == 0;
91 }
92 
93 
95 {
96  static bool use_string_pack = s_GetEnvFlag(STRING_PACK_ENV, true);
97  if ( !use_string_pack ) {
98  return false;
99  }
100 
101  string s1("test"), s2;
102  s2 = s1;
103  if ( s1.data() != s2.data() ) {
104  // strings don't use reference counters
105  return (use_string_pack = false);
106  }
107 
108  return true;
109 }
110 
111 
113 {
114  THROW1_TRACE(runtime_error,
115  "CPackString: bad ref counting");
116 }
117 
118 
119 bool CPackString::x_Assign(string& s, const string& src)
120 {
121  if ( TryStringPack() ) {
122  const_cast<string&>(src) = s;
123  s = src;
124  if ( s.data() != src.data() ) {
126  }
127  return true;
128  }
129  else {
130  return false;
131  }
132 }
133 
134 
135 bool CPackString::Pack(string& s)
136 {
137  if ( s.size() <= GetLengthLimit() ) {
138  SNode key(s);
139  iterator iter = m_Strings.lower_bound(key);
140  bool found = iter != m_Strings.end() && *iter == key;
141  if ( found ) {
142  AddOld(s, iter);
143  return false;
144  }
145  else if ( GetCount() < GetCountLimit() ) {
146  iter = m_Strings.insert(iter, key);
147  ++m_CompressedOut;
148  iter->SetString(s);
149  AddOld(s, iter);
150  return true;
151  }
152  }
153  Skipped();
154  return false;
155 }
156 
157 
158 bool CPackString::Pack(string& s, const char* data, size_t size)
159 {
160  if ( size <= GetLengthLimit() ) {
161  SNode key(data, size);
162  iterator iter = m_Strings.lower_bound(key);
163  bool found = iter != m_Strings.end() && *iter == key;
164  if ( found ) {
165  AddOld(s, iter);
166  return false;
167  }
168  else if ( GetCount() < GetCountLimit() ) {
169  iter = m_Strings.insert(iter, key);
170  ++m_CompressedOut;
171  iter->SetString();
172  AddOld(s, iter);
173  return true;
174  }
175  }
176  Skipped();
177  s.assign(data, size);
178  return false;
179 }
180 
181 
182 bool CPackString::AddNew(string& s, const char* data, size_t size,
183  iterator iter)
184 {
185  SNode key(data, size);
186  _ASSERT(size <= GetLengthLimit());
187  _ASSERT(iter == m_Strings.lower_bound(key));
188  _ASSERT(!(iter != m_Strings.end() && *iter == key));
189  if ( GetCount() < GetCountLimit() ) {
190  iter = m_Strings.insert(iter, key);
191  ++m_CompressedOut;
192  iter->SetString();
193  AddOld(s, iter);
194  return true;
195  }
196  Skipped();
197  s.assign(data, size);
198  return false;
199 }
200 
201 
203 {
204 }
205 
206 
208  size_t count_limit)
209  : m_PackString(length_limit, count_limit)
210 {
211 }
212 
213 
215 {
216 #if 0
217  NcbiCout << "CPackStringClassHook statistics:\n" <<
219 #endif
220 }
221 
222 
224 {
225 }
226 
227 
229  size_t count_limit)
230  : m_PackString(length_limit, count_limit)
231 {
232 }
233 
234 
236 {
237 #if 0
238  NcbiCout << "CPackStringChoiceHook statistics:\n" <<
240 #endif
241 }
242 
243 
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
static void x_RefCounterError(void)
const struct ncbi::grid::netcache::search::fields::KEY key
std::ofstream out("events_result.xml")
main entry point for tests
bool Pack(string &s)
CPackString(void)
Definition: pack_string.cpp:44
size_t GetCountLimit(void) const
#define NcbiCout
Definition: ncbistre.hpp:398
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
#define NcbiEndl
Definition: ncbistre.hpp:403
TStrings::iterator iterator
const_iterator end() const
Definition: set.hpp:136
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
int i
#define THROW1_TRACE(exception_class, exception_arg)
Throw trace.
Definition: ncbiexpt.hpp:257
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
size_t m_CompressedOut
CPackString m_PackString
set< SNode > m_Strings
size_t GetCount(void) const
size_t GetLengthLimit(void) const
const_iterator lower_bound(const key_type &key) const
Definition: set.hpp:138
static const char *const STRING_PACK_ENV
Definition: pack_string.cpp:38
CPackString m_PackString
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:142
int size
static bool x_Assign(string &s, const string &src)
~CPackString(void)
Definition: pack_string.cpp:60
void AddOld(string &s, const iterator &iter)
void Skipped(void)
static bool TryStringPack(void)
Definition: pack_string.cpp:94
static bool s_GetEnvFlag(const char *env, bool def_val)
Definition: pack_string.cpp:83
#define _ASSERT
static const char *const ENV_YES
Definition: pack_string.cpp:39
bool AddNew(string &s, const char *data, size_t size, iterator iter)
CNcbiOstream & DumpStatistics(CNcbiOstream &out) const
Definition: pack_string.cpp:65
size_t m_CompressedIn
static HENV env
Definition: transaction2.c:41
static int CompareNocase(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern)
Case-insensitive compare of a substring with a pattern.
Definition: ncbistr.cpp:170
Definition: set.hpp:44
size_t m_Skipped
static const size_t kDefaultLengthLimit
Definition: pack_string.cpp:41
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
static const size_t kDefaultCountLimit
Definition: pack_string.cpp:42
Modified on Sat May 27 15:23:10 2017 by modify_doxy.py rev. 533848