libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.h
Go to the documentation of this file.
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2012 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#ifndef d4_parser_sax2_h
27#define d4_parser_sax2_h
28
29#define ATTR 1
30
31#include <string.h>
32
33#include <iostream>
34#include <map>
35#include <stack>
36#include <string>
37
38#include <libxml/parserInternals.h>
39
40#define CRLF "\r\n"
41#define D4_PARSE_BUFF_SIZE 1048576
42
43namespace libdap {
44
45class DMR;
46class BaseType;
47class D4BaseTypeFactory;
48class D4Group;
49class D4Attributes;
50class D4EnumDef;
51class D4Dimension;
52
77private:
80 enum ParseState {
81 parser_start,
82
83 inside_dataset,
84
85 // inside_group is the state just after parsing the start of a Group
86 // element.
87 inside_group,
88
89 inside_attribute_container,
90 inside_attribute,
91 inside_attribute_value,
92 inside_other_xml_attribute,
93
94 inside_enum_def,
95 inside_enum_const,
96
97 inside_dim_def,
98
99 // This covers Byte, ..., Url, Opaque
100 inside_simple_type,
101
102 // inside_array,
103 inside_dim,
104 inside_map,
105
106 inside_constructor,
107
108 // inside_sequence, Removed from merged code jhrg 5/2/14
109
110 not_dap4_element,
111
112 parser_unknown,
113 parser_error,
114 parser_fatal_error,
115
116 parser_end
117 };
118 char d_parse_buffer[D4_PARSE_BUFF_SIZE + 1]; // Buff size plus one byte for NULL termination.
119
120 xmlSAXHandler d_dmr_sax_parser;
121
122 // The results of the parse operation are stored in these fields.
123 // This is passed into the parser using the intern() methods.
124 DMR *d_dmr; // dump DMR here
125 DMR *dmr() const { return d_dmr; }
126
127 // These stacks hold the state of the parse as it progresses.
128 stack<ParseState> s; // Current parse state
129 void push_state(D4ParserSax2::ParseState state) { s.push(state); }
130 D4ParserSax2::ParseState get_state() const { return s.top(); }
131 void pop_state() { s.pop(); }
132 bool empty_state() const { return s.empty(); }
133
134 stack<BaseType *> btp_stack; // current variable(s)
135 void push_basetype(BaseType *btp) { btp_stack.push(btp); }
136 BaseType *top_basetype() const { return btp_stack.top(); }
137 void pop_basetype() { btp_stack.pop(); }
138 bool empty_basetype() const { return btp_stack.empty(); }
139
140 stack<D4Group *> grp_stack; // current groups(s)
141 void push_group(D4Group *grp) { grp_stack.push(grp); }
142 D4Group *top_group() const { return grp_stack.top(); }
143 void pop_group() { grp_stack.pop(); }
144 bool empty_group() const { return grp_stack.empty(); }
145
146 stack<D4Attributes *> d_attrs_stack; // DAP4 Attributes
147 void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
148 D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
149 void pop_attributes() { d_attrs_stack.pop(); }
150 bool empty_attributes() const { return d_attrs_stack.empty(); }
151
152 D4EnumDef *d_enum_def;
153 D4EnumDef *enum_def();
154 void clear_enum_def() { d_enum_def = 0; }
155
156 D4Dimension *d_dim_def;
157 D4Dimension *dim_def();
158 void clear_dim_def() { d_dim_def = 0; }
159
160 // Accumulate stuff inside an 'OtherXML' DAP attribute here
161 string other_xml;
162
163 // When we're parsing unknown XML, how deeply is it nested? This is used
164 // for the OtherXML DAP attributes.
165 unsigned int other_xml_depth;
166 unsigned int unknown_depth;
167
168 // These are used for processing errors.
169 string d_error_msg; // Error message(s), if any.
170 xmlParserCtxtPtr d_context; // used for error message line numbers
171
172 // These hold temporary values read during the parse.
173 string dods_attr_name; // DAP4 attributes, not XML attributes
174 string dods_attr_type; // ... not XML ...
175 string char_data; // char data in value elements; null after use
176 string root_ns; // What is the namespace of the root node (Group)
177
178 bool d_debug;
179 bool debug() const { return d_debug; }
180
181 bool d_strict;
182
183 class XMLAttribute {
184 public:
185 string prefix;
186 string nsURI;
187 string value;
188
189 void clone(const XMLAttribute &src) {
190 prefix = src.prefix;
191 nsURI = src.nsURI;
192 value = src.value;
193 }
194
195 XMLAttribute() : prefix(""), nsURI(""), value("") {}
196 XMLAttribute(const string &p, const string &ns, const string &v) : prefix(p), nsURI(ns), value(v) {}
197 // 'attributes' as passed from libxml2 is a five element array but this
198 // ctor gets the back four elements.
199 XMLAttribute(const xmlChar **attributes /*[4]*/) {
200 prefix = attributes[0] != 0 ? (const char *)attributes[0] : "";
201 nsURI = attributes[1] != 0 ? (const char *)attributes[1] : "";
202 value = string((const char *)attributes[2], (const char *)attributes[3]);
203 }
204 XMLAttribute(const XMLAttribute &rhs) { clone(rhs); }
207 if (this == &rhs)
208 return *this;
209 clone(rhs);
210 return *this;
211 }
212 };
213
214 typedef map<string, XMLAttribute> XMLAttrMap;
215 XMLAttrMap xml_attrs; // dump XML attributes here
216
217 XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
218
219 XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
220
221 map<string, string> namespace_table;
222
223 void cleanup_parse();
224
231 void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
232 void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
233 bool check_required_attribute(const string &attr);
234 bool check_attribute(const string &attr);
235 void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
236
237 void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
238 void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
239
240 bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
241 bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
242 bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
243 bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
244 bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
245 bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
246 bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
247 bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
248
249 void finish_variable(const char *tag, Type t, const char *expected);
251
252 friend class D4ParserSax2Test;
253
254public:
256 : d_dmr(0), d_enum_def(0), d_dim_def(0), other_xml(""), other_xml_depth(0), unknown_depth(0), d_error_msg(""),
257 d_context(0), dods_attr_name(""), dods_attr_type(""), char_data(""), root_ns(""), d_debug(false),
258 d_strict(true) {
259 // xmlSAXHandler ddx_sax_parser;
260 memset(&d_dmr_sax_parser, 0, sizeof(xmlSAXHandler));
261
262 d_dmr_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
263 d_dmr_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
264 d_dmr_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
265 d_dmr_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
266 d_dmr_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
267 d_dmr_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
268 d_dmr_sax_parser.warning = &D4ParserSax2::dmr_error;
269 d_dmr_sax_parser.error = &D4ParserSax2::dmr_error;
270 d_dmr_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
271 d_dmr_sax_parser.initialized = XML_SAX2_MAGIC;
272 d_dmr_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
273 d_dmr_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
274 }
275
276 void intern(istream &f, DMR *dest_dmr, bool debug = false);
277 // Deprecated - this does not read from a file, it parses text in the string 'document'
278 void intern(const string &document, DMR *dest_dmr, bool debug = false);
279 void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
280
291
293 void set_strict(bool s) { d_strict = s; }
297 bool get_strict() const { return d_strict; }
299
300 static void dmr_start_document(void *parser);
301 static void dmr_end_document(void *parser);
302
303 static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
304 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted,
305 const xmlChar **attributes);
306 static void dmr_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI);
307
308 static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
309 static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len);
310 static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
311
312 static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
313 static void dmr_fatal_error(void *parser, const char *msg, ...);
314 static void dmr_error(void *parser, const char *msg, ...);
315};
316
317} // namespace libdap
318
319#endif // d4_parser_sax2_h
#define D4_PARSE_BUFF_SIZE
STL class.
The basic data type for the DODS DAP types.
Definition BaseType.h:118
friend class D4ParserSax2Test
void intern(istream &f, DMR *dest_dmr, bool debug=false)
bool get_strict() const
Get the setting of the 'strict' mode.
static void dmr_start_document(void *parser)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
XMLAttribute & operator=(const XMLAttribute &rhs)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_error(void *parser, const char *msg,...)
static void dmr_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
void clone(const XMLAttribute &src)
static void dmr_end_document(void *parser)
XMLAttribute(const xmlChar **attributes)
XMLAttribute(const XMLAttribute &rhs)
XMLAttribute(const string &p, const string &ns, const string &v)
void set_strict(bool s)
Set the 'strict' mode to true or false.
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
top level DAP object to house generic methods
Definition AISConnect.cc:30
Type
Identifies the data type.
Definition Type.h:94