|
NCBI Home IEB Home C++ Toolkit docs C Toolkit source browser C Toolkit source browser (2) |
NCBI C++ Toolkit Cross ReferenceC++/src/util/sgml_entity.cpp |
source navigation diff markup identifier search freetext search file search |
1 /* $Id: sgml_entity.cpp 164824 2009-07-01 15:29:16Z bollin $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Mati Shomrat
27 *
28 * File Description:
29 * Functions to Convert SGML to ASCII for Backbone subset SGML
30 */
31 #include <ncbi_pch.hpp>
32 #include <util/sgml_entity.hpp>
33 #include <util/static_map.hpp>
34
35 BEGIN_NCBI_SCOPE
36
37 // mapping from SGML to ASCII
38
39 typedef pair<string, string> TSgmlAsciiPair;
40 static const TSgmlAsciiPair sc_sgml_entity[] = {
41 TSgmlAsciiPair("Agr" , "Alpha"),
42 TSgmlAsciiPair("Bgr" , "Beta"),
43 TSgmlAsciiPair("Dgr" , "Delta"),
44 TSgmlAsciiPair("EEgr", "Eta"),
45 TSgmlAsciiPair("Egr" , "Epsilon"),
46 TSgmlAsciiPair("Ggr" , "Gamma"),
47 TSgmlAsciiPair("Igr" , "Iota"),
48 TSgmlAsciiPair("KHgr", "Chi"),
49 TSgmlAsciiPair("Kgr" , "Kappa"),
50 TSgmlAsciiPair("Lgr" , "Lambda"),
51 TSgmlAsciiPair("Mgr" , "Mu"),
52 TSgmlAsciiPair("Ngr" , "Nu"),
53 TSgmlAsciiPair("OHgr", "Omega"),
54 TSgmlAsciiPair("Ogr" , "Omicron"),
55 TSgmlAsciiPair("PHgr", "Phi"),
56 TSgmlAsciiPair("PSgr", "Psi"),
57 TSgmlAsciiPair("Pgr" , "Pi"),
58 TSgmlAsciiPair("Rgr" , "Rho"),
59 TSgmlAsciiPair("Sgr" , "Sigma"),
60 TSgmlAsciiPair("THgr", "Theta"),
61 TSgmlAsciiPair("Tgr" , "Tau"),
62 TSgmlAsciiPair("Ugr" , "Upsilon"),
63 TSgmlAsciiPair("Xgr" , "Xi"),
64 TSgmlAsciiPair("Zgr" , "Zeta"),
65 TSgmlAsciiPair("agr" , "alpha"),
66 TSgmlAsciiPair("amp" , "&"),
67 TSgmlAsciiPair("bgr" , "beta"),
68 TSgmlAsciiPair("dgr" , "delta"),
69 TSgmlAsciiPair("eegr", "eta"),
70 TSgmlAsciiPair("egr" , "epsilon"),
71 TSgmlAsciiPair("ggr" , "gamma"),
72 TSgmlAsciiPair("gt" , ">"),
73 TSgmlAsciiPair("igr" , "iota"),
74 TSgmlAsciiPair("kgr" , "kappa"),
75 TSgmlAsciiPair("khgr", "chi"),
76 TSgmlAsciiPair("lgr" , "lambda"),
77 TSgmlAsciiPair("lt" , "<"),
78 TSgmlAsciiPair("mgr" , "mu"),
79 TSgmlAsciiPair("ngr" , "nu"),
80 TSgmlAsciiPair("ogr" , "omicron"),
81 TSgmlAsciiPair("ohgr", "omega"),
82 TSgmlAsciiPair("pgr" , "pi"),
83 TSgmlAsciiPair("phgr", "phi"),
84 TSgmlAsciiPair("psgr", "psi"),
85 TSgmlAsciiPair("rgr" , "rho"),
86 TSgmlAsciiPair("sfgr", "s"),
87 TSgmlAsciiPair("sgr" , "sigma"),
88 TSgmlAsciiPair("tgr" , "tau"),
89 TSgmlAsciiPair("thgr", "theta"),
90 TSgmlAsciiPair("ugr" , "upsilon"),
91 TSgmlAsciiPair("xgr" , "xi"),
92 TSgmlAsciiPair("zgr" , "zeta")
93 };
94
95 typedef CStaticArrayMap<string, string> TSgmlAsciiMap;
96 DEFINE_STATIC_ARRAY_MAP(TSgmlAsciiMap, sc_SgmlAsciiMap, sc_sgml_entity);
97
98
99 // in place conversion from SGML to ASCII
100 // we replace "&SGML entity; -> "<ASCII>"
101 void Sgml2Ascii(string& sgml)
102 {
103 SIZE_TYPE amp = sgml.find('&');
104
105 while (amp != NPOS) {
106 SIZE_TYPE semi = sgml.find(';', amp);
107 if (semi != NPOS) {
108 size_t old_len = semi - amp - 1;
109 TSgmlAsciiMap::const_iterator it =
110 sc_SgmlAsciiMap.find(sgml.substr(amp + 1, old_len));
111 if (it != sc_SgmlAsciiMap.end()) {
112 size_t new_len = it->second.size();
113 sgml[amp] = '<';
114 sgml[semi] = '>';
115 sgml.replace(amp + 1, old_len, it->second);
116 semi = amp + 1 + new_len;
117 }
118 else {
119 semi = amp;
120 }
121 }
122 else {
123 semi = amp;
124 }
125 amp = sgml.find('&', semi + 1);
126 }
127 }
128
129
130 // conversion of SGML to ASCII
131 string Sgml2Ascii(const string& sgml)
132 {
133 string result = sgml;
134 Sgml2Ascii(result);
135 return result;
136 }
137
138
139 //detecting SGML in string
140 bool ContainsSgml(const string& str)
141 {
142 bool found = false;
143 size_t pos = NStr::Find(str, "&");
144 while (pos != string::npos && !found) {
145 size_t len = 0;
146 const char *end = str.c_str() + pos + 1;
147 while (*end != 0 && isalpha (*end)) {
148 len++;
149 end++;
150 }
151 if (*end == ';' && len > 1) {
152 string match = str.substr(pos + 1, len);
153
154 TSgmlAsciiMap::const_iterator it = sc_SgmlAsciiMap.begin();
155 while (it != sc_SgmlAsciiMap.end() && !found) {
156 if (NStr::StartsWith(match, it->first)) {
157 found = true;
158 }
159 ++it;
160 }
161 }
162 if (*end == 0) {
163 pos = string::npos;
164 } else if (!found) {
165 pos = NStr::Find(str, "&", pos + len + 1);
166 }
167 }
168 return found;
169 }
170
171
172 END_NCBI_SCOPE
173 |
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more information. |