bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
DmrppParserSax2.h
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2012 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#ifndef dmrpp_parser_sax2_h
27#define dmrpp_parser_sax2_h
28
29#define ATTR 1
30
31#include <cstring>
32
33#include <memory>
34#include <string>
35#include <iostream>
36#include <map>
37#include <unordered_map>
38#include <stack>
39
40#include <libxml/parserInternals.h>
41
42#include <libdap/Type.h> // from libdap
43#include "BESRegex.h"
44#include "url_impl.h"
45#include "EffectiveUrlCache.h"
46
47#define CRLF "\r\n"
48#define D4_PARSE_BUFF_SIZE 1048576
49
50namespace libdap {
51class DMR;
52class BaseType;
53class D4BaseTypeFactory;
54class D4Group;
55class D4Attributes;
56class D4EnumDef;
57class D4Dimension;
58}
59
60namespace dmrpp {
66class DmrppParserSax2
67{
68private:
71 enum ParseState {
72 parser_start,
73
74 inside_dataset,
75
76 // inside_group is the state just after parsing the start of a Group element.
77 inside_group,
78
79 inside_attribute_container,
80 inside_attribute,
81 inside_attribute_value,
82 inside_other_xml_attribute,
83
84 inside_enum_def,
85 inside_enum_const,
86
87 inside_dim_def,
88
89 // This covers Byte, ..., Url, Opaque
90 inside_simple_type,
91
92 // inside_array,
93 inside_dim,
94 inside_map,
95
96 inside_constructor,
97
98 not_dap4_element,
99 inside_dmrpp_object,
100 inside_dmrpp_chunkDimensionSizes_element,
101 inside_dmrpp_compact_element,
102
103 parser_unknown,
104 parser_error,
105 parser_fatal_error,
106
107 parser_end
108 };
109
110 char d_parse_buffer[D4_PARSE_BUFF_SIZE+1]; // Buff size plus one byte for NULL termination.
111
112 xmlSAXHandler dmrpp_sax_parser;
113
114 // The results of the parse operation are stored in these fields.
115 // This is passed into the parser using the intern() methods.
116 libdap::DMR *d_dmr; // dump DMR here
117 libdap::DMR *dmr() const { return d_dmr; }
118
119 // These stacks hold the state of the parse as it progresses.
120 std::stack<ParseState> s; // Current parse state
121 void push_state(DmrppParserSax2::ParseState state) { s.push(state); }
122 DmrppParserSax2::ParseState get_state() const { return s.top(); }
123 void pop_state() { s.pop(); }
124 bool empty_state() const { return s.empty(); }
125
126 std::stack<libdap::BaseType*> btp_stack; // current variable(s)
127 void push_basetype(libdap::BaseType *btp) { btp_stack.push(btp); }
128 libdap::BaseType *top_basetype() const { return btp_stack.top(); }
129 void pop_basetype() { btp_stack.pop(); }
130 bool empty_basetype() const { return btp_stack.empty(); }
131
132 std::stack<libdap::D4Group*> grp_stack; // current groups(s)
133 void push_group(libdap::D4Group *grp) { grp_stack.push(grp); }
134 libdap::D4Group *top_group() const { return grp_stack.top(); }
135 void pop_group() { grp_stack.pop(); }
136 bool empty_group() const { return grp_stack.empty(); }
137
138 std::stack<libdap::D4Attributes*> d_attrs_stack; // DAP4 Attributes
139 void push_attributes(libdap::D4Attributes *attr) { d_attrs_stack.push(attr); }
140 libdap::D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
141 void pop_attributes() { d_attrs_stack.pop(); }
142 bool empty_attributes() const { return d_attrs_stack.empty(); }
143
144 libdap::D4EnumDef *d_enum_def;
145 libdap::D4EnumDef *enum_def();
146 void clear_enum_def() { d_enum_def = 0; }
147
148 libdap::D4Dimension *d_dim_def;
149 libdap::D4Dimension *dim_def();
150 void clear_dim_def() { d_dim_def = 0; }
151
152 // Accumulate stuff inside an 'OtherXML' DAP attribute here
153 std::string other_xml;
154
155 // When we're parsing unknown XML, how deeply is it nested? This is used
156 // for the OtherXML DAP attributes.
157 unsigned int other_xml_depth;
158 unsigned int unknown_depth;
159
160 // These are used for processing errors.
161 std::string error_msg; // Error message(s), if any.
162 xmlParserCtxtPtr context; // used for error message line numbers
163
164 // These hold temporary values read during the parse.
165 std::string dods_attr_name; // DAP4 attributes, not XML attributes
166 std::string dods_attr_type; // ... not XML ...
167 std::string char_data; // char data in value elements; null after use
168 std::string root_ns; // What is the namespace of the root node (Group)
169
170 bool d_strict;
171
172 std::shared_ptr<http::url> dmrpp_dataset_href;
173
174 class XMLAttribute {
175 public:
176 std::string prefix;
177 std::string nsURI;
178 std::string value;
179
180 void clone(const XMLAttribute &src) {
181 prefix = src.prefix;
182 nsURI = src.nsURI;
183 value = src.value;
184 }
185
186 XMLAttribute() : prefix(""), nsURI(""), value("") {}
187 XMLAttribute(const std::string &p, const std::string &ns, const std::string &v)
188 : prefix(p), nsURI(ns), value(v) {}
189 // 'attributes' as passed from libxml2 is a five element array but this
190 // ctor gets the back four elements.
191 XMLAttribute(const xmlChar **attributes/*[4]*/) {
192 prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
193 nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
194 value = std::string((const char *)attributes[2], (const char *)attributes[3]);
195 }
196 XMLAttribute(const XMLAttribute &rhs) {
197 clone(rhs);
198 }
199 XMLAttribute &operator=(const XMLAttribute &rhs) {
200 if (this == &rhs)
201 return *this;
202 clone(rhs);
203 return *this;
204 }
205 };
206
207 typedef std::unordered_map<std::string, XMLAttribute> XMLAttrMap;
208 XMLAttrMap xml_attrs; // dump XML attributes here
209
210 XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
211
212 XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
213
214 std::map<std::string, std::string> namespace_table;
215
216 void cleanup_parse();
217
224#if 0
225 void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
226#endif
227 std::string get_attribute_val(const std::string &name, const xmlChar **attributes, int num_attributes);
228 void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
229 bool check_required_attribute(const std::string &attr);
230 bool check_required_attribute(const std::string &attr, const xmlChar **attributes, int num_attributes);
231 bool check_attribute(const std::string & attr);
232 bool check_attribute(const std::string &name, const xmlChar **attributes, int num_attributes);
233 void process_variable_helper(libdap::Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
234
235 void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
236 void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
237
238 bool process_dmrpp_compact_start(const char *name);
239 void process_dmrpp_compact_end(const char *localname);
240 bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
241 bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
242 bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
243 bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
244 bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
245 bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
246 bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
247 bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
248 bool process_dmrpp_object(const char *name, const xmlChar **attrs, int nb_attributes);
249
250 void finish_variable(const char *tag, libdap::Type t, const char *expected);
252
253 friend class DmrppParserSax2Test;
254
255public:
256 DmrppParserSax2() :
257 d_dmr(0), d_enum_def(0), d_dim_def(0),
258 other_xml(""), other_xml_depth(0), unknown_depth(0),
259 error_msg(""), context(0),
260 dods_attr_name(""), dods_attr_type(""),
261 char_data(""), root_ns(""), d_strict(true),
262 dmrpp_dataset_href(nullptr)
263 {
264 //xmlSAXHandler ddx_sax_parser;
265 memset(&dmrpp_sax_parser, 0, sizeof(xmlSAXHandler));
266
267 dmrpp_sax_parser.getEntity = &DmrppParserSax2::dmr_get_entity;
268 dmrpp_sax_parser.startDocument = &DmrppParserSax2::dmr_start_document;
269 dmrpp_sax_parser.endDocument = &DmrppParserSax2::dmr_end_document;
270 dmrpp_sax_parser.characters = &DmrppParserSax2::dmr_get_characters;
271 dmrpp_sax_parser.ignorableWhitespace = &DmrppParserSax2::dmr_ignoreable_whitespace;
272 dmrpp_sax_parser.cdataBlock = &DmrppParserSax2::dmr_get_cdata;
273 dmrpp_sax_parser.warning = &DmrppParserSax2::dmr_error;
274 dmrpp_sax_parser.error = &DmrppParserSax2::dmr_error;
275 dmrpp_sax_parser.fatalError = &DmrppParserSax2::dmr_fatal_error;
276 dmrpp_sax_parser.initialized = XML_SAX2_MAGIC;
277 dmrpp_sax_parser.startElementNs = &DmrppParserSax2::dmr_start_element;
278 dmrpp_sax_parser.endElementNs = &DmrppParserSax2::dmr_end_element;
279 }
280
281 ~DmrppParserSax2(){}
282
283 void intern(std::istream &f, libdap::DMR *dest_dmr);
284 void intern(const std::string &document, libdap::DMR *dest_dmr);
285 void intern(const char *buffer, int size, libdap::DMR *dest_dmr);
286
297
299 void set_strict(bool s) { d_strict = s; }
303 bool get_strict() const { return d_strict; }
305
306 static void dmr_start_document(void *parser);
307 static void dmr_end_document(void *parser);
308
309 static void dmr_start_element(void *parser,
310 const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
311 int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
312 int nb_defaulted, const xmlChar **attributes);
313 static void dmr_end_element(void *parser, const xmlChar *localname,
314 const xmlChar *prefix, const xmlChar *URI);
315
316 static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
317 static void dmr_ignoreable_whitespace(void *parser,
318 const xmlChar * ch, int len);
319 static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
320
321 static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
322 static void dmr_fatal_error(void *parser, const char *msg, ...);
323 static void dmr_error(void *parser, const char *msg, ...);
324
325 };
326
327} // namespace dmrpp
328
329#endif // dmrpp_parser_sax2_h
void intern(std::istream &f, libdap::DMR *dest_dmr)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
bool get_strict() const
Get the setting of the 'strict' mode.
void set_strict(bool s)
Set the 'strict' mode to true or false.
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_fatal_error(void *parser, const char *msg,...)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_start_document(void *parser)