Orcus
sax_ns_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9 #define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10 
11 #include "sax_parser.hpp"
12 #include "xml_namespace.hpp"
13 #include "global.hpp"
14 
15 #include <unordered_set>
16 #include <vector>
17 #include <memory>
18 #include <algorithm>
19 
20 namespace orcus {
21 
23 {
24  xmlns_id_t ns; // element namespace
25  pstring ns_alias; // element namespace alias
26  pstring name; // element name
27  std::ptrdiff_t begin_pos; // position of the opening brace '<'.
28  std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
29 };
30 
32 {
33  xmlns_id_t ns; // attribute namespace
34  pstring ns_alias; // attribute namespace alias
35  pstring name; // attribute name
36  pstring value; // attribute value
37  bool transient; // whether or not the attribute value is transient.
38 };
39 
40 namespace __sax {
41 
43 {
44  pstring ns;
45  pstring name;
46 
47  entity_name(const pstring& _ns, const pstring& _name) :
48  ns(_ns), name(_name) {}
49 
50  bool operator== (const entity_name& other) const
51  {
52  return other.ns == ns && other.name == name;
53  }
54 
55  struct hash
56  {
57  size_t operator() (const entity_name& v) const
58  {
59  static pstring::hash hasher;
60  return hasher(v.ns) + hasher(v.name);
61  }
62  };
63 };
64 
65 typedef std::unordered_set<pstring, pstring::hash> ns_keys_type;
66 typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
67 
68 struct elem_scope
69 {
70  xmlns_id_t ns;
71  pstring name;
72  ns_keys_type ns_keys;
73 };
74 
75 typedef std::vector<std::unique_ptr<elem_scope>> elem_scopes_type;
76 
77 class pop_ns_by_key : std::unary_function<pstring, void>
78 {
79  xmlns_context& m_cxt;
80 public:
81  pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {}
82  void operator() (const pstring& key)
83  {
84  m_cxt.pop(key);
85  }
86 };
87 
88 }
89 
93 template<typename _Handler>
95 {
96 public:
97  typedef _Handler handler_type;
98 
99  sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler);
100  sax_ns_parser(const char* content, const size_t size, bool transient_stream,
101  xmlns_context& ns_cxt, handler_type& handler);
102  ~sax_ns_parser();
103 
104  void parse();
105 
106 private:
111  class handler_wrapper
112  {
113  __sax::elem_scopes_type m_scopes;
114  __sax::ns_keys_type m_ns_keys;
115  __sax::entity_names_type m_attrs;
116 
117  sax_ns_parser_element m_elem;
119 
120  xmlns_context& m_ns_cxt;
121  handler_type& m_handler;
122 
123  bool m_declaration;
124 
125  public:
126  handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
127 
128  void doctype(const sax::doctype_declaration& dtd)
129  {
130  m_handler.doctype(dtd);
131  }
132 
133  void start_declaration(const pstring& name)
134  {
135  m_declaration = true;
136  m_handler.start_declaration(name);
137  }
138 
139  void end_declaration(const pstring& name)
140  {
141  m_declaration = false;
142  m_handler.end_declaration(name);
143  }
144 
145  void start_element(const sax::parser_element& elem)
146  {
147  m_scopes.push_back(orcus::make_unique<__sax::elem_scope>());
148  __sax::elem_scope& scope = *m_scopes.back();
149  scope.ns = m_ns_cxt.get(elem.ns);
150  scope.name = elem.name;
151  scope.ns_keys.swap(m_ns_keys);
152 
153  m_elem.ns = scope.ns;
154  m_elem.ns_alias = elem.ns;
155  m_elem.name = scope.name;
156  m_elem.begin_pos = elem.begin_pos;
157  m_elem.end_pos = elem.end_pos;
158  m_handler.start_element(m_elem);
159 
160  m_attrs.clear();
161  }
162 
163  void end_element(const sax::parser_element& elem)
164  {
165  __sax::elem_scope& scope = *m_scopes.back();
166  if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
167  throw sax::malformed_xml_error("mis-matching closing element.", -1);
168 
169  m_elem.ns = scope.ns;
170  m_elem.ns_alias = elem.ns;
171  m_elem.name = scope.name;
172  m_elem.begin_pos = elem.begin_pos;
173  m_elem.end_pos = elem.end_pos;
174  m_handler.end_element(m_elem);
175 
176  // Pop all namespaces declared in this scope.
177  std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt));
178 
179  m_scopes.pop_back();
180  }
181 
182  void characters(const pstring& val, bool transient)
183  {
184  m_handler.characters(val, transient);
185  }
186 
187  void attribute(const sax::parser_attribute& attr)
188  {
189  if (m_declaration)
190  {
191  // XML declaration attribute. Pass it through to the handler without namespace.
192  m_handler.attribute(attr.name, attr.value);
193  return;
194  }
195 
196  if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0)
198  "You can't define two attributes of the same name in the same element.", -1);
199 
200  m_attrs.insert(__sax::entity_name(attr.ns, attr.name));
201 
202  if (attr.ns.empty() && attr.name == "xmlns")
203  {
204  // Default namespace
205  m_ns_cxt.push(pstring(), attr.value);
206  m_ns_keys.insert(pstring());
207  return;
208  }
209 
210  if (attr.ns == "xmlns")
211  {
212  // Namespace alias
213  if (!attr.name.empty())
214  {
215  m_ns_cxt.push(attr.name, attr.value);
216  m_ns_keys.insert(attr.name);
217  }
218  return;
219  }
220 
221  m_attr.ns = m_ns_cxt.get(attr.ns);
222  m_attr.ns_alias = attr.ns;
223  m_attr.name = attr.name;
224  m_attr.value = attr.value;
225  m_attr.transient = attr.transient;
226  m_handler.attribute(m_attr);
227  }
228  };
229 
230 private:
231  handler_wrapper m_wrapper;
233 };
234 
235 template<typename _Handler>
237  const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) :
238  m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper)
239 {
240 }
241 
242 template<typename _Handler>
243 sax_ns_parser<_Handler>::sax_ns_parser(
244  const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) :
245  m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper)
246 {
247 }
248 
249 template<typename _Handler>
250 sax_ns_parser<_Handler>::~sax_ns_parser()
251 {
252 }
253 
254 template<typename _Handler>
255 void sax_ns_parser<_Handler>::parse()
256 {
257  m_parser.parse();
258 }
259 
260 }
261 
262 #endif
263 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: pstring.hpp:82
Definition: sax_ns_parser.hpp:68
Definition: pstring.hpp:27
Definition: sax_ns_parser.hpp:55
Definition: sax_ns_parser.hpp:42
Definition: sax_ns_parser.hpp:31
Definition: sax_parser_base.hpp:33
Definition: sax_ns_parser.hpp:94
Definition: xml_namespace.hpp:82
Definition: sax_ns_parser.hpp:22
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_parser_base.hpp:45
Definition: sax_ns_parser.hpp:77
xmlns_id_t get(const pstring &key) const
Definition: base64.hpp:15