libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.h
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2012 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #ifndef d4_parser_sax2_h
27 #define d4_parser_sax2_h
28 
29 #define ATTR 1
30 
31 #include <string.h>
32 
33 #include <string>
34 #include <iostream>
35 #include <map>
36 #include <stack>
37 
38 #include <libxml/parserInternals.h>
39 
40 #define CRLF "\r\n"
41 #define D4_PARSE_BUFF_SIZE 1048576
42 
43 namespace libdap
44 {
45 
46 class DMR;
47 class BaseType;
48 class D4BaseTypeFactory;
49 class D4Group;
50 class D4Attributes;
51 class D4EnumDef;
52 class D4Dimension;
53 
78 {
79 private:
82  enum ParseState {
83  parser_start,
84 
85  inside_dataset,
86 
87  // inside_group is the state just after parsing the start of a Group
88  // element.
89  inside_group,
90 
91  inside_attribute_container,
92  inside_attribute,
93  inside_attribute_value,
94  inside_other_xml_attribute,
95 
96  inside_enum_def,
97  inside_enum_const,
98 
99  inside_dim_def,
100 
101  // This covers Byte, ..., Url, Opaque
102  inside_simple_type,
103 
104  // inside_array,
105  inside_dim,
106  inside_map,
107 
108  inside_constructor,
109 
110  // inside_sequence, Removed from merged code jhrg 5/2/14
111 
112  not_dap4_element,
113 
114  parser_unknown,
115  parser_error,
116  parser_fatal_error,
117 
118  parser_end
119  };
120  char d_parse_buffer[D4_PARSE_BUFF_SIZE+1]; // Buff size plus one byte for NULL termination.
121 
122  xmlSAXHandler d_dmr_sax_parser;
123 
124  // The results of the parse operation are stored in these fields.
125  // This is passed into the parser using the intern() methods.
126  DMR *d_dmr; // dump DMR here
127  DMR *dmr() const { return d_dmr; }
128 
129  // These stacks hold the state of the parse as it progresses.
130  stack<ParseState> s; // Current parse state
131  void push_state(D4ParserSax2::ParseState state) { s.push(state); }
132  D4ParserSax2::ParseState get_state() const { return s.top(); }
133  void pop_state() { s.pop(); }
134  bool empty_state() const { return s.empty(); }
135 
136  stack<BaseType*> btp_stack; // current variable(s)
137  void push_basetype(BaseType *btp) { btp_stack.push(btp); }
138  BaseType *top_basetype() const { return btp_stack.top(); }
139  void pop_basetype() { btp_stack.pop(); }
140  bool empty_basetype() const { return btp_stack.empty(); }
141 
142  stack<D4Group*> grp_stack; // current groups(s)
143  void push_group(D4Group *grp) { grp_stack.push(grp); }
144  D4Group *top_group() const { return grp_stack.top(); }
145  void pop_group() { grp_stack.pop(); }
146  bool empty_group() const { return grp_stack.empty(); }
147 
148  stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes
149  void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
150  D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
151  void pop_attributes() { d_attrs_stack.pop(); }
152  bool empty_attributes() const { return d_attrs_stack.empty(); }
153 
154  D4EnumDef *d_enum_def;
155  D4EnumDef *enum_def();
156  void clear_enum_def() { d_enum_def = 0; }
157 
158  D4Dimension *d_dim_def;
159  D4Dimension *dim_def();
160  void clear_dim_def() { d_dim_def = 0; }
161 
162  // Accumulate stuff inside an 'OtherXML' DAP attribute here
163  string other_xml;
164 
165  // When we're parsing unknown XML, how deeply is it nested? This is used
166  // for the OtherXML DAP attributes.
167  unsigned int other_xml_depth;
168  unsigned int unknown_depth;
169 
170  // These are used for processing errors.
171  string d_error_msg; // Error message(s), if any.
172  xmlParserCtxtPtr d_context; // used for error message line numbers
173 
174  // These hold temporary values read during the parse.
175  string dods_attr_name; // DAP4 attributes, not XML attributes
176  string dods_attr_type; // ... not XML ...
177  string char_data; // char data in value elements; null after use
178  string root_ns; // What is the namespace of the root node (Group)
179 
180  bool d_debug;
181  bool debug() const { return d_debug; }
182 
183  bool d_strict;
184 
185  class XMLAttribute {
186  public:
187  string prefix;
188  string nsURI;
189  string value;
190 
191  void clone(const XMLAttribute &src) {
192  prefix = src.prefix;
193  nsURI = src.nsURI;
194  value = src.value;
195  }
196 
197  XMLAttribute() : prefix(""), nsURI(""), value("") {}
198  XMLAttribute(const string &p, const string &ns, const string &v)
199  : prefix(p), nsURI(ns), value(v) {}
200  // 'attributes' as passed from libxml2 is a five element array but this
201  // ctor gets the back four elements.
202  XMLAttribute(const xmlChar **attributes/*[4]*/) {
203  prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
204  nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
205  value = string((const char *)attributes[2], (const char *)attributes[3]);
206  }
207  XMLAttribute(const XMLAttribute &rhs) {
208  clone(rhs);
209  }
210  ~XMLAttribute() {
211  }
212  XMLAttribute &operator=(const XMLAttribute &rhs) {
213  if (this == &rhs)
214  return *this;
215  clone(rhs);
216  return *this;
217  }
218  };
219 
220  typedef map<string, XMLAttribute> XMLAttrMap;
221  XMLAttrMap xml_attrs; // dump XML attributes here
222 
223  XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
224 
225  XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
226 
227  map<string, string> namespace_table;
228 
229  void cleanup_parse();
230 
237  void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
238  void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
239  bool check_required_attribute(const string &attr);
240  bool check_attribute(const string & attr);
241  void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
242 
243  void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
244  void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
245 
246  bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
247  bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
248  bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
249  bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
250  bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
251  bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
252  bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
253  bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
254 
255  void finish_variable(const char *tag, Type t, const char *expected);
257 
258  friend class D4ParserSax2Test;
259 
260 public:
261  D4ParserSax2() :
262  d_dmr(0), d_enum_def(0), d_dim_def(0),
263  other_xml(""), other_xml_depth(0), unknown_depth(0),
264  d_error_msg(""), d_context(0),
265  dods_attr_name(""), dods_attr_type(""),
266  char_data(""), root_ns(""), d_debug(false), d_strict(true)
267  {
268  //xmlSAXHandler ddx_sax_parser;
269  memset(&d_dmr_sax_parser, 0, sizeof(xmlSAXHandler));
270 
271  d_dmr_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
272  d_dmr_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
273  d_dmr_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
274  d_dmr_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
275  d_dmr_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
276  d_dmr_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
277  d_dmr_sax_parser.warning = &D4ParserSax2::dmr_error;
278  d_dmr_sax_parser.error = &D4ParserSax2::dmr_error;
279  d_dmr_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
280  d_dmr_sax_parser.initialized = XML_SAX2_MAGIC;
281  d_dmr_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
282  d_dmr_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
283  }
284 
285  void intern(istream &f, DMR *dest_dmr, bool debug = false);
286  // Deprecated - this does not read from a file, it parses text in the string 'document'
287  void intern(const string &document, DMR *dest_dmr, bool debug = false);
288  void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
289 
302  void set_strict(bool s) { d_strict = s; }
306  bool get_strict() const { return d_strict; }
309  static void dmr_start_document(void *parser);
310  static void dmr_end_document(void *parser);
311 
312  static void dmr_start_element(void *parser,
313  const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
314  int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
315  int nb_defaulted, const xmlChar **attributes);
316  static void dmr_end_element(void *parser, const xmlChar *localname,
317  const xmlChar *prefix, const xmlChar *URI);
318 
319  static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
320  static void dmr_ignoreable_whitespace(void *parser,
321  const xmlChar * ch, int len);
322  static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
323 
324  static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
325  static void dmr_fatal_error(void *parser, const char *msg, ...);
326  static void dmr_error(void *parser, const char *msg, ...);
327 };
328 
329 } // namespace libdap
330 
331 #endif // d4_parser_sax2_h
static void dmr_end_document(void *parser)
static void dmr_start_document(void *parser)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
void set_strict(bool s)
Set the &#39;strict&#39; mode to true or false.
Definition: D4ParserSax2.h:302
Type
Identifies the data type.
Definition: Type.h:94
top level DAP object to house generic methods
Definition: AISConnect.cc:30
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
bool get_strict() const
Get the setting of the &#39;strict&#39; mode.
Definition: D4ParserSax2.h:306
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
static void dmr_fatal_error(void *parser, const char *msg,...)