NCBI C++ Toolkit Cross Reference

C++/src/util/sgml_entity.cpp


  1 /*  $Id: sgml_entity.cpp 164824 2009-07-01 15:29:16Z bollin $
  2  * ===========================================================================
  3  *
  4  *                            PUBLIC DOMAIN NOTICE
  5  *               National Center for Biotechnology Information
  6  *
  7  *  This software/database is a "United States Government Work" under the
  8  *  terms of the United States Copyright Act.  It was written as part of
  9  *  the author's official duties as a United States Government employee and
 10  *  thus cannot be copyrighted.  This software/database is freely available
 11  *  to the public for use. The National Library of Medicine and the U.S.
 12  *  Government have not placed any restriction on its use or reproduction.
 13  *
 14  *  Although all reasonable efforts have been taken to ensure the accuracy
 15  *  and reliability of the software and data, the NLM and the U.S.
 16  *  Government do not and cannot warrant the performance or results that
 17  *  may be obtained by using this software or data. The NLM and the U.S.
 18  *  Government disclaim all warranties, express or implied, including
 19  *  warranties of performance, merchantability or fitness for any particular
 20  *  purpose.
 21  *
 22  *  Please cite the author in any work or product based on this material.
 23  *
 24  * ===========================================================================
 25  *
 26  * Authors:  Mati Shomrat
 27  *
 28  * File Description:
 29  *   Functions to Convert SGML to ASCII for Backbone subset SGML
 30  */
 31 #include <ncbi_pch.hpp>
 32 #include <util/sgml_entity.hpp>
 33 #include <util/static_map.hpp>
 34 
 35 BEGIN_NCBI_SCOPE
 36 
 37 // mapping from SGML to ASCII
 38 
 39 typedef pair<string, string> TSgmlAsciiPair;
 40 static const TSgmlAsciiPair sc_sgml_entity[] = {
 41     TSgmlAsciiPair("Agr" , "Alpha"),
 42     TSgmlAsciiPair("Bgr" , "Beta"),
 43     TSgmlAsciiPair("Dgr" , "Delta"),
 44     TSgmlAsciiPair("EEgr", "Eta"),
 45     TSgmlAsciiPair("Egr" , "Epsilon"),
 46     TSgmlAsciiPair("Ggr" , "Gamma"),
 47     TSgmlAsciiPair("Igr" , "Iota"),
 48     TSgmlAsciiPair("KHgr", "Chi"),
 49     TSgmlAsciiPair("Kgr" , "Kappa"),
 50     TSgmlAsciiPair("Lgr" , "Lambda"),
 51     TSgmlAsciiPair("Mgr" , "Mu"),
 52     TSgmlAsciiPair("Ngr" , "Nu"),
 53     TSgmlAsciiPair("OHgr", "Omega"),
 54     TSgmlAsciiPair("Ogr" , "Omicron"),
 55     TSgmlAsciiPair("PHgr", "Phi"),
 56     TSgmlAsciiPair("PSgr", "Psi"),
 57     TSgmlAsciiPair("Pgr" , "Pi"),
 58     TSgmlAsciiPair("Rgr" , "Rho"),
 59     TSgmlAsciiPair("Sgr" , "Sigma"),
 60     TSgmlAsciiPair("THgr", "Theta"),
 61     TSgmlAsciiPair("Tgr" , "Tau"),
 62     TSgmlAsciiPair("Ugr" , "Upsilon"),
 63     TSgmlAsciiPair("Xgr" , "Xi"),
 64     TSgmlAsciiPair("Zgr" , "Zeta"),
 65     TSgmlAsciiPair("agr" , "alpha"),
 66     TSgmlAsciiPair("amp" , "&"),
 67     TSgmlAsciiPair("bgr" , "beta"),
 68     TSgmlAsciiPair("dgr" , "delta"),
 69     TSgmlAsciiPair("eegr", "eta"),
 70     TSgmlAsciiPair("egr" , "epsilon"),
 71     TSgmlAsciiPair("ggr" , "gamma"),
 72     TSgmlAsciiPair("gt"  , ">"),
 73     TSgmlAsciiPair("igr" , "iota"),
 74     TSgmlAsciiPair("kgr" , "kappa"),
 75     TSgmlAsciiPair("khgr", "chi"),
 76     TSgmlAsciiPair("lgr" , "lambda"),
 77     TSgmlAsciiPair("lt"  , "<"),
 78     TSgmlAsciiPair("mgr" , "mu"),
 79     TSgmlAsciiPair("ngr" , "nu"),
 80     TSgmlAsciiPair("ogr" , "omicron"),
 81     TSgmlAsciiPair("ohgr", "omega"),
 82     TSgmlAsciiPair("pgr" , "pi"),
 83     TSgmlAsciiPair("phgr", "phi"),
 84     TSgmlAsciiPair("psgr", "psi"),
 85     TSgmlAsciiPair("rgr" , "rho"),
 86     TSgmlAsciiPair("sfgr", "s"),
 87     TSgmlAsciiPair("sgr" , "sigma"),
 88     TSgmlAsciiPair("tgr" , "tau"),
 89     TSgmlAsciiPair("thgr", "theta"),
 90     TSgmlAsciiPair("ugr" , "upsilon"),
 91     TSgmlAsciiPair("xgr" , "xi"),
 92     TSgmlAsciiPair("zgr" , "zeta")
 93 };
 94 
 95 typedef CStaticArrayMap<string, string> TSgmlAsciiMap;
 96 DEFINE_STATIC_ARRAY_MAP(TSgmlAsciiMap, sc_SgmlAsciiMap, sc_sgml_entity);
 97 
 98 
 99 // in place conversion from SGML to ASCII
100 // we replace "&SGML entity; -> "<ASCII>"
101 void Sgml2Ascii(string& sgml)
102 {
103     SIZE_TYPE amp = sgml.find('&');
104     
105     while (amp != NPOS) {
106         SIZE_TYPE semi = sgml.find(';', amp);
107         if (semi != NPOS) {
108             size_t old_len = semi - amp - 1;
109             TSgmlAsciiMap::const_iterator it =
110                 sc_SgmlAsciiMap.find(sgml.substr(amp + 1, old_len));
111             if (it != sc_SgmlAsciiMap.end()) {
112                 size_t new_len = it->second.size();
113                 sgml[amp] = '<';
114                 sgml[semi] =  '>';
115                 sgml.replace(amp + 1, old_len, it->second);
116                 semi = amp + 1 + new_len;
117             }
118             else {
119                 semi = amp;
120             }
121         }
122         else {
123             semi = amp;
124         }
125         amp = sgml.find('&', semi + 1);
126     }
127 }
128 
129 
130 // conversion of SGML to ASCII
131 string Sgml2Ascii(const string& sgml)
132 {
133     string result = sgml;
134     Sgml2Ascii(result);
135     return result;
136 }
137 
138 
139 //detecting SGML in string
140 bool ContainsSgml(const string& str)
141 {
142         bool found = false;
143         size_t pos = NStr::Find(str, "&");
144         while (pos != string::npos && !found) {
145                 size_t len = 0;
146                 const char *end = str.c_str() + pos + 1;
147                 while (*end != 0 && isalpha (*end)) {
148                         len++;
149                         end++;
150                 }
151                 if (*end == ';' && len > 1) {
152                         string match = str.substr(pos + 1, len);
153 
154                         TSgmlAsciiMap::const_iterator it = sc_SgmlAsciiMap.begin();
155                         while (it != sc_SgmlAsciiMap.end() && !found) {
156                                 if (NStr::StartsWith(match, it->first)) {
157                                         found = true;
158                                 }
159                                 ++it;
160                         }
161                 }
162                 if (*end == 0) {
163                         pos = string::npos;
164                 } else if (!found) {
165                         pos = NStr::Find(str, "&", pos + len + 1);
166                 }
167         }
168     return found;
169 }
170 
171 
172 END_NCBI_SCOPE
173 

source navigation ]   [ diff markup ]   [ identifier search ]   [ freetext search ]   [ file search ]  

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.