libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG 1
28 
29 #include <iostream>
30 #include <sstream>
31 
32 #include <cstring>
33 #include <cstdarg>
34 #include <cassert>
35 
36 #include <libxml2/libxml/parserInternals.h>
37 
38 #include "DMR.h"
39 
40 #include "BaseType.h"
41 #include "Array.h"
42 #include "D4Group.h"
43 #include "D4Attributes.h"
44 #include "D4Maps.h"
45 #include "D4Enum.h"
46 #include "D4BaseTypeFactory.h"
47 
48 #include "DapXmlNamespaces.h"
49 #include "D4ParserSax2.h"
50 
51 #include "util.h"
52 #include "debug.h"
53 
54 namespace libdap {
55 
56 static const char *states[] = {
57  "parser_start",
58 
59  "inside_dataset",
60 
61  // inside_group is the state just after parsing the start of a Group
62  // element.
63  "inside_group",
64 
65  "inside_attribute_container",
66  "inside_attribute",
67  "inside_attribute_value",
68  "inside_other_xml_attribute",
69 
70  "inside_enum_def",
71  "inside_enum_const",
72 
73  "inside_dim_def",
74 
75  // This covers Byte, ..., Url, Opaque
76  "inside_simple_type",
77 
78  // "inside_array",
79  "inside_dim",
80  "inside_map",
81 
82  "inside_constructor",
83 
84  "not_dap4_element",
85 
86  "parser_unknown",
87  "parser_error",
88  "parser_fatal_error",
89 
90  "parser_end"
91 };
92 
93 static bool is_not(const char *name, const char *tag)
94 {
95  return strcmp(name, tag) != 0;
96 }
97 
106 D4EnumDef *
107 D4ParserSax2::enum_def()
108 {
109  if (!d_enum_def) d_enum_def = new D4EnumDef;
110 
111  return d_enum_def;
112 }
113 
120 D4Dimension *
121 D4ParserSax2::dim_def() {
122  if (!d_dim_def) d_dim_def = new D4Dimension;
123 
124  return d_dim_def;
125 }
126 
132 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
133 {
134  if (!xml_attrs.empty())
135  xml_attrs.clear(); // erase old attributes
136 
137  // Make a value using the attribute name and the prefix, namespace URI
138  // and the value. The prefix might be null.
139  unsigned int index = 0;
140  for (int i = 0; i < nb_attributes; ++i, index += 5) {
141  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
142  XMLAttribute(attributes + index + 1)));
143 
144  DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
145  << xml_attrs[(const char *)attributes[index]].value << endl);
146  }
147 }
148 
155 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
156 {
157  // make a value with the prefix and namespace URI. The prefix might be null.
158  for (int i = 0; i < nb_namespaces; ++i) {
159  namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
160  (const char *)namespaces[i * 2 + 1]));
161  }
162 }
163 
170 bool D4ParserSax2::check_required_attribute(const string & attr)
171 {
172  if (xml_attrs.find(attr) == xml_attrs.end()) {
173  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
174  return false;
175  }
176  else
177  return true;
178 }
179 
186 bool D4ParserSax2::check_attribute(const string & attr)
187 {
188  return (xml_attrs.find(attr) != xml_attrs.end());
189 }
190 
191 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
192 {
193  if (is_not(name, "Dimension"))
194  return false;
195 
196  transfer_xml_attrs(attrs, nb_attributes);
197 
198  if (!(check_required_attribute("name") && check_required_attribute("size"))) {
199  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
200  return false;
201  }
202 
203  // This getter (dim_def) allocates a new object if needed.
204  dim_def()->set_name(xml_attrs["name"].value);
205  try {
206  dim_def()->set_size(xml_attrs["size"].value);
207  }
208  catch (Error &e) {
209  dmr_error(this, "%s", e.get_error_message().c_str());
210  return false;
211  }
212 
213  return true;
214 }
215 
233 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
234 {
235  if (is_not(name, "Dim"))
236  return false;
237 
238  transfer_xml_attrs(attrs, nb_attributes);
239 
240  if (check_attribute("size") && check_attribute("name")) {
241  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
242  return false;
243  }
244  if (!(check_attribute("size") || check_attribute("name"))) {
245  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
246  return false;
247  }
248 
249  if (!top_basetype()->is_vector_type()) {
250  // Make the top BaseType* an array
251  BaseType *b = top_basetype();
252  pop_basetype();
253 
254  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
255  a->set_is_dap4(true);
256  a->add_var_nocopy(b);
257  a->set_attributes_nocopy(b->attributes());
258  // trick: instead of popping b's attributes, copying them and then pushing
259  // a's copy, just move the pointer (but make sure there's only one object that
260  // references that pointer).
261  b->set_attributes_nocopy(0);
262 
263  push_basetype(a);
264  }
265 
266  assert(top_basetype()->is_vector_type());
267 
268  Array *a = static_cast<Array*>(top_basetype());
269  if (check_attribute("size")) {
270  a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
271  return true;
272  }
273  else if (check_attribute("name")) {
274  string name = xml_attrs["name"].value;
275 
276  D4Dimension *dim = 0;
277  if (name[0] == '/') // lookup the Dimension in the root group
278  dim = dmr()->root()->find_dim(name);
279  else // get enclosing Group and lookup Dimension there
280  dim = top_group()->find_dim(name);
281 
282  if (!dim)
283  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
284  a->append_dim(dim);
285  return true;
286  }
287 
288  return false;
289 }
290 
291 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
292 {
293  if (is_not(name, "Map"))
294  return false;
295 
296  transfer_xml_attrs(attrs, nb_attributes);
297 
298  if (!check_attribute("name")) {
299  dmr_error(this, "The 'name' attribute must be used in a Map element.");
300  return false;
301  }
302 
303  if (!top_basetype()->is_vector_type()) {
304  // Make the top BaseType* an array
305  BaseType *b = top_basetype();
306  pop_basetype();
307 
308  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
309  a->set_is_dap4(true);
310  a->add_var_nocopy(b);
311  a->set_attributes_nocopy(b->attributes());
312  // trick: instead of popping b's attributes, copying them and then pushing
313  // a's copy, just move the pointer (but make sure there's only one object that
314  // references that pointer).
315  b->set_attributes_nocopy(0);
316 
317  push_basetype(a);
318  }
319 
320  assert(top_basetype()->is_vector_type());
321 
322  Array *a = static_cast<Array*>(top_basetype());
323 
324  string map_name = xml_attrs["name"].value;
325  if (xml_attrs["name"].value[0] != '/')
326  map_name = top_group()->FQN() + map_name;
327 
328  Array *map_source = 0; // The array variable that holds the data for the Map
329 
330  if (map_name[0] == '/') // lookup the Map in the root group
331  map_source = dmr()->root()->find_map_source(map_name);
332  else // get enclosing Group and lookup Map there
333  map_source = top_group()->find_map_source(map_name);
334 
335  // Change: If the parser is in 'strict' mode (the default) and the Array named by
336  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
337  // mode), then this is not an error. However, the Array referenced by the Map will
338  // be null. This is a change in the parser's behavior to accommodate requests for
339  // Arrays that include Maps that do not also include the Map(s) in the request.
340  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
341  if (!map_source && d_strict)
342  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
343 
344  a->maps()->add_map(new D4Map(map_name, map_source));
345 
346  return true;
347 }
348 
349 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
350 {
351  if (is_not(name, "Group"))
352  return false;
353 
354  transfer_xml_attrs(attrs, nb_attributes);
355 
356  if (!check_required_attribute("name")) {
357  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
358  return false;
359  }
360 
361  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
362  if (!btp) {
363  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
364  return false;
365  }
366 
367  D4Group *grp = static_cast<D4Group*>(btp);
368 
369  // Need to set this to get the D4Attribute behavior in the type classes
370  // shared between DAP2 and DAP4. jhrg 4/18/13
371  grp->set_is_dap4(true);
372 
373  // link it up and change the current group
374  D4Group *parent = top_group();
375  if (!parent) {
376  dmr_fatal_error(this, "No Group on the Group stack.");
377  return false;
378  }
379 
380  grp->set_parent(parent);
381  parent->add_group_nocopy(grp);
382 
383  push_group(grp);
384  push_attributes(grp->attributes());
385  return true;
386 }
387 
394 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
395 {
396  if (is_not(name, "Attribute"))
397  return false;
398 
399  // These methods set the state to parser_error if a problem is found.
400  transfer_xml_attrs(attrs, nb_attributes);
401 
402  // add error
403  if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
404  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
405  return false;
406  }
407 
408  if (xml_attrs["type"].value == "Container") {
409  push_state(inside_attribute_container);
410 
411  DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
412  D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
413 
414  D4Attributes *tos = top_attributes();
415  // add return
416  if (!tos) {
417  delete child;
418  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
419  return false;
420  }
421 
422  tos->add_attribute_nocopy(child);
423  push_attributes(child->attributes());
424  }
425  else if (xml_attrs["type"].value == "OtherXML") {
426  push_state(inside_other_xml_attribute);
427 
428  dods_attr_name = xml_attrs["name"].value;
429  dods_attr_type = xml_attrs["type"].value;
430  }
431  else {
432  push_state(inside_attribute);
433 
434  dods_attr_name = xml_attrs["name"].value;
435  dods_attr_type = xml_attrs["type"].value;
436  }
437 
438  return true;
439 }
440 
446 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
447 {
448  if (is_not(name, "Enumeration"))
449  return false;
450 
451  transfer_xml_attrs(attrs, nb_attributes);
452 
453  if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
454  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
455  return false;
456  }
457 
458  Type t = get_type(xml_attrs["basetype"].value.c_str());
459  if (!is_integer_type(t)) {
460  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
461  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
462  return false;
463  }
464 
465  // This getter allocates a new object if needed.
466  string enum_def_path = xml_attrs["name"].value;
467 #if 0
468  // Use FQNs when things are referenced, not when they are defined
469  if (xml_attrs["name"].value[0] != '/')
470  enum_def_path = top_group()->FQN() + enum_def_path;
471 #endif
472  enum_def()->set_name(enum_def_path);
473  enum_def()->set_type(t);
474 
475  return true;
476 }
477 
478 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
479 {
480  if (is_not(name, "EnumConst"))
481  return false;
482 
483  // These methods set the state to parser_error if a problem is found.
484  transfer_xml_attrs(attrs, nb_attributes);
485 
486  if (!(check_required_attribute("name") && check_required_attribute("value"))) {
487  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
488  return false;
489  }
490 
491  istringstream iss(xml_attrs["value"].value);
492  long long value = 0;
493  iss >> skipws >> value;
494  if (iss.fail() || iss.bad()) {
495  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
496  xml_attrs["value"].value.c_str());
497  }
498  else if (!enum_def()->is_valid_enum_value(value)) {
499  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
500  xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
501  }
502  else {
503  // unfortunate choice of names... args are 'label' and 'value'
504  enum_def()->add_value(xml_attrs["name"].value, value);
505  }
506 
507  return true;
508 }
509 
515 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
516 {
517  Type t = get_type(name);
518  if (is_simple_type(t)) {
519  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
520  return true;
521  }
522  else {
523  switch(t) {
524  case dods_structure_c:
525  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
526  return true;
527 
528  case dods_sequence_c:
529  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
530  return true;
531 
532  default:
533  return false;
534  }
535  }
536 }
537 
545 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
546 {
547  transfer_xml_attrs(attrs, nb_attributes);
548 
549  if (check_required_attribute("name")) {
550  BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
551  if (!btp) {
552  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
553  return;
554  }
555 
556  if ((t == dods_enum_c) && check_required_attribute("enum")) {
557  D4EnumDef *enum_def = 0;
558  string enum_path = xml_attrs["enum"].value;
559  if (enum_path[0] == '/')
560  enum_def = dmr()->root()->find_enum_def(enum_path);
561  else
562  enum_def = top_group()->find_enum_def(enum_path);
563 
564  if (!enum_def)
565  dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
566 
567  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
568  }
569 
570  btp->set_is_dap4(true); // see comment above
571  push_basetype(btp);
572 
573  push_attributes(btp->attributes());
574 
575  push_state(s);
576  }
577 }
578 
585 
591 {
592  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
593  parser->d_error_msg = "";
594  parser->char_data = "";
595 
596  // Set this in intern_helper so that the loop test for the parser_end
597  // state works for the first iteration. It seems like XMLParseChunk calls this
598  // function on it's first run. jhrg 9/16/13
599  // parser->push_state(parser_start);
600 
601  parser->push_attributes(parser->dmr()->root()->attributes());
602 
603  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
604 }
605 
609 {
610  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
611 
612  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
613 
614  if (parser->get_state() != parser_end)
615  D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
616 
617  // If we've found any sort of error, don't make the DMR; intern() will
618  // take care of the error.
619  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
620  return;
621 
622  if (!parser->empty_basetype() || parser->empty_group())
623  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
624 
625  parser->pop_group(); // leave the stack 'clean'
626  parser->pop_attributes();
627 }
628 
642 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
643  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
644  const xmlChar **attributes)
645 {
646  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
647  const char *localname = (const char *) l;
648 
649  if (parser->debug()) cerr << "Start element " << localname << " prefix: "<< (prefix?(char *)prefix:"null") << " ns: "<< (URI?(char *)URI:"null")
650  << " (state: " << states[parser->get_state()] << ")" << endl;
651 
652  if(parser->get_state() != parser_error){
653  string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
654  if (parser->debug()) cerr << "dap4_ns_name: " << dap4_ns_name << endl;
655 
656  string this_element_ns_name = (URI != 0) ? ((char *)URI) : "";
657  if (parser->debug()) cerr << "this_element_ns_name: " << this_element_ns_name << endl;
658 
659  if(this_element_ns_name.compare(dap4_ns_name)){
660  if (parser->debug()) cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
661  parser->push_state(not_dap4_element);
662  // return;
663  }
664  }
665 
666 
667  switch (parser->get_state()) {
668  case parser_start:
669  if (is_not(localname, "Dataset"))
670  D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
671 
672  parser->root_ns = URI ? (const char *) URI : "";
673  parser->transfer_xml_attrs(attributes, nb_attributes);
674 
675  if (parser->check_required_attribute(string("name")))
676  parser->dmr()->set_name(parser->xml_attrs["name"].value);
677 
678  if (parser->check_attribute("dapVersion"))
679  parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
680 
681  if (parser->check_attribute("dmrVersion"))
682  parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
683 
684  if (parser->check_attribute("base"))
685  parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
686 
687  if (!parser->root_ns.empty())
688  parser->dmr()->set_namespace(parser->root_ns);
689 
690  // Push the root Group on the stack
691  parser->push_group(parser->dmr()->root());
692 
693  parser->push_state(inside_dataset);
694 
695  break;
696 
697  // Both inside dataset and inside group can have the same stuff.
698  // The difference is that the Dataset holds the root group, which
699  // must be present; other groups are optional
700  case inside_dataset:
701  case inside_group:
702  if (parser->process_enum_def(localname, attributes, nb_attributes))
703  parser->push_state(inside_enum_def);
704  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
705  parser->push_state(inside_dim_def);
706  else if (parser->process_group(localname, attributes, nb_attributes))
707  parser->push_state(inside_group);
708  else if (parser->process_variable(localname, attributes, nb_attributes))
709  // This will push either inside_simple_type or inside_structure
710  // onto the parser state stack.
711  break;
712  else if (parser->process_attribute(localname, attributes, nb_attributes))
713  // This will push either inside_attribute, inside_attribute_container
714  // or inside_otherxml_attribute onto the parser state stack
715  break;
716  else
717  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
718  break;
719 
720  case inside_attribute_container:
721  if (parser->process_attribute(localname, attributes, nb_attributes))
722  break;
723  else
724  D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
725  break;
726 
727  case inside_attribute:
728  if (parser->process_attribute(localname, attributes, nb_attributes))
729  break;
730  else if (strcmp(localname, "Value") == 0)
731  parser->push_state(inside_attribute_value);
732  else
733  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
734  break;
735 
736  case inside_attribute_value:
737  // Attribute values are processed by the end element code.
738  break;
739 
740  case inside_other_xml_attribute:
741  parser->other_xml_depth++;
742 
743  // Accumulate the elements here
744  parser->other_xml.append("<");
745  if (prefix) {
746  parser->other_xml.append((const char *) prefix);
747  parser->other_xml.append(":");
748  }
749  parser->other_xml.append(localname);
750 
751  if (nb_namespaces != 0) {
752  parser->transfer_xml_ns(namespaces, nb_namespaces);
753 
754  for (map<string, string>::iterator i = parser->namespace_table.begin();
755  i != parser->namespace_table.end(); ++i) {
756  parser->other_xml.append(" xmlns");
757  if (!i->first.empty()) {
758  parser->other_xml.append(":");
759  parser->other_xml.append(i->first);
760  }
761  parser->other_xml.append("=\"");
762  parser->other_xml.append(i->second);
763  parser->other_xml.append("\"");
764  }
765  }
766 
767  if (nb_attributes != 0) {
768  parser->transfer_xml_attrs(attributes, nb_attributes);
769  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
770  parser->other_xml.append(" ");
771  if (!i->second.prefix.empty()) {
772  parser->other_xml.append(i->second.prefix);
773  parser->other_xml.append(":");
774  }
775  parser->other_xml.append(i->first);
776  parser->other_xml.append("=\"");
777  parser->other_xml.append(i->second.value);
778  parser->other_xml.append("\"");
779  }
780  }
781 
782  parser->other_xml.append(">");
783  break;
784 
785  case inside_enum_def:
786  // process an EnumConst element
787  if (parser->process_enum_const(localname, attributes, nb_attributes))
788  parser->push_state(inside_enum_const);
789  else
790  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
791  break;
792 
793  case inside_enum_const:
794  // No content; nothing to do
795  break;
796 
797  case inside_dim_def:
798  // No content; nothing to do
799  break;
800 #if 0
801  case inside_dimension:
802  // No content.
803  break;
804 #endif
805  case inside_dim:
806  // No content.
807  break;
808 
809  case inside_map:
810  // No content.
811  break;
812 
813  case inside_simple_type:
814  if (parser->process_attribute(localname, attributes, nb_attributes))
815  break;
816  else if (parser->process_dimension(localname, attributes, nb_attributes))
817  parser->push_state(inside_dim);
818  else if (parser->process_map(localname, attributes, nb_attributes))
819  parser->push_state(inside_map);
820  else
821  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
822  break;
823 
824  case inside_constructor:
825  if (parser->process_variable(localname, attributes, nb_attributes))
826  // This will push either inside_simple_type or inside_structure
827  // onto the parser state stack.
828  break;
829  else if (parser->process_attribute(localname, attributes, nb_attributes))
830  break;
831  else if (parser->process_dimension(localname, attributes, nb_attributes))
832  parser->push_state(inside_dim);
833  else if (parser->process_map(localname, attributes, nb_attributes))
834  parser->push_state(inside_map);
835  else
836  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
837  break;
838 
839  case not_dap4_element:
840  if (parser->debug()) cerr << "Inside non DAP4 element. localname: " << localname << endl;
841  break;
842 
843  case parser_unknown:
844  // FIXME?
845  // *** Never used? If so remove/error
846  parser->push_state(parser_unknown);
847  break;
848 
849  case parser_error:
850  case parser_fatal_error:
851  break;
852 
853  case parser_end:
854  // FIXME Error?
855  break;
856  }
857 
858  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
859 }
860 
861 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
862 {
863  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
864  const char *localname = (const char *) l;
865 
866  if (parser->debug())
867  cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
868 
869  switch (parser->get_state()) {
870  case parser_start:
871  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
872  break;
873 
874  case inside_dataset:
875  if (is_not(localname, "Dataset"))
876  D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
877 
878  parser->pop_state();
879  if (parser->get_state() != parser_start)
880  dmr_fatal_error(parser, "Unexpected state, expected start state.");
881  else {
882  parser->pop_state();
883  parser->push_state(parser_end);
884  }
885  break;
886 
887  case inside_group: {
888  if (is_not(localname, "Group"))
889  D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
890 
891  if (!parser->empty_basetype() || parser->empty_group())
892  D4ParserSax2::dmr_error(parser,
893  "The document did not contain a valid root Group or contained unbalanced tags.");
894 
895  parser->pop_group();
896  parser->pop_state();
897  break;
898  }
899 
900  case inside_attribute_container:
901  if (is_not(localname, "Attribute"))
902  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
903 
904  parser->pop_state();
905  parser->pop_attributes();
906  break;
907 
908  case inside_attribute:
909  if (is_not(localname, "Attribute"))
910  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
911 
912  parser->pop_state();
913  break;
914 
915  case inside_attribute_value: {
916  if (is_not(localname, "Value"))
917  D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
918 
919  parser->pop_state();
920 
921  // The old code added more values using the name and type as
922  // indexes to find the correct attribute. Use get() for that
923  // now. Or fix this code to keep a pointer to the to attribute...
924  D4Attributes *attrs = parser->top_attributes();
925  D4Attribute *attr = attrs->get(parser->dods_attr_name);
926  if (!attr) {
927  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
928  attrs->add_attribute_nocopy(attr);
929  }
930  attr->add_value(parser->char_data);
931 
932  parser->char_data = ""; // Null this after use.
933  break;
934  }
935 
936  case inside_other_xml_attribute: {
937  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
938  parser->pop_state();
939 
940  // The old code added more values using the name and type as
941  // indexes to find the correct attribute. Use get() for that
942  // now. Or fix this code to keep a pointer to the to attribute...
943  D4Attributes *attrs = parser->top_attributes();
944  D4Attribute *attr = attrs->get(parser->dods_attr_name);
945  if (!attr) {
946  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
947  attrs->add_attribute_nocopy(attr);
948  }
949  attr->add_value(parser->other_xml);
950 
951  parser->other_xml = ""; // Null this after use.
952  }
953  else {
954  if (parser->other_xml_depth == 0) {
955  D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
956  localname);
957  break;
958  }
959  parser->other_xml_depth--;
960 
961  parser->other_xml.append("</");
962  if (prefix) {
963  parser->other_xml.append((const char *) prefix);
964  parser->other_xml.append(":");
965  }
966  parser->other_xml.append(localname);
967  parser->other_xml.append(">");
968  }
969  break;
970  }
971 
972  case inside_enum_def:
973  if (is_not(localname, "Enumeration"))
974  D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
975  if (!parser->top_group())
977  "Expected a Group to be the current item, while finishing up an Enumeration.");
978  else {
979  // copy the pointer; not a deep copy
980  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
981  // Set the enum_def to null; next call to enum_def() will
982  // allocate a new object
983  parser->clear_enum_def();
984  parser->pop_state();
985  }
986  break;
987 
988  case inside_enum_const:
989  if (is_not(localname, "EnumConst"))
990  D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
991 
992  parser->pop_state();
993  break;
994 
995  case inside_dim_def: {
996  if (is_not(localname, "Dimension"))
997  D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
998 
999  if (!parser->top_group())
1000  D4ParserSax2::dmr_error(parser,
1001  "Expected a Group to be the current item, while finishing up an Dimension.");
1002 
1003  // FIXME Use the Group on the top of the group stack
1004  // copy the pointer; not a deep copy
1005  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1006  //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
1007  // Set the dim_def to null; next call to dim_def() will
1008  // allocate a new object. Calling 'clear' is important because
1009  // the cleanup method will free dim_def if it's not null and
1010  // we just copied the pointer in the add_dim_nocopy() call
1011  // above.
1012  parser->clear_dim_def();
1013  parser->pop_state();
1014  break;
1015  }
1016 
1017  case inside_simple_type:
1018  if (is_simple_type(get_type(localname))) {
1019  BaseType *btp = parser->top_basetype();
1020  parser->pop_basetype();
1021  parser->pop_attributes();
1022 
1023  BaseType *parent = 0;
1024  if (!parser->empty_basetype())
1025  parent = parser->top_basetype();
1026  else if (!parser->empty_group())
1027  parent = parser->top_group();
1028  else {
1029  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1030  localname);
1031  delete btp;
1032  parser->pop_state();
1033  break;
1034  }
1035 
1036  if (parent->type() == dods_array_c)
1037  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1038  else
1039  parent->add_var_nocopy(btp);
1040  }
1041  else
1042  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1043 
1044  parser->pop_state();
1045  break;
1046 
1047  case inside_dim:
1048  if (is_not(localname, "Dim"))
1049  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1050 
1051  parser->pop_state();
1052  break;
1053 
1054  case inside_map:
1055  if (is_not(localname, "Map"))
1056  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1057 
1058  parser->pop_state();
1059  break;
1060 
1061  case inside_constructor: {
1062  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1063  D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1064  return;
1065  }
1066 
1067  BaseType *btp = parser->top_basetype();
1068  parser->pop_basetype();
1069  parser->pop_attributes();
1070 
1071  BaseType *parent = 0;
1072  if (!parser->empty_basetype())
1073  parent = parser->top_basetype();
1074  else if (!parser->empty_group())
1075  parent = parser->top_group();
1076  else {
1077  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1078  localname);
1079  delete btp;
1080  parser->pop_state();
1081  break;
1082  }
1083 
1084  // TODO Why doesn't this code mirror the simple_var case and test
1085  // for the parent being an array? jhrg 10/13/13
1086  parent->add_var_nocopy(btp);
1087  parser->pop_state();
1088  break;
1089  }
1090 
1091  case not_dap4_element:
1092  if (parser->debug()) cerr << "End of non DAP4 element: " << localname << endl;
1093  parser->pop_state();
1094  break;
1095 
1096  case parser_unknown:
1097  parser->pop_state();
1098  break;
1099 
1100  case parser_error:
1101  case parser_fatal_error:
1102  break;
1103 
1104  case parser_end:
1105  // FIXME Error?
1106  break;
1107  }
1108 
1109  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1110 }
1111 
1115 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1116 {
1117  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1118 
1119  switch (parser->get_state()) {
1120  case inside_attribute_value:
1121  parser->char_data.append((const char *) (ch), len);
1122  DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1123  break;
1124 
1125  case inside_other_xml_attribute:
1126  parser->other_xml.append((const char *) (ch), len);
1127  DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1128  break;
1129 
1130  default:
1131  break;
1132  }
1133 }
1134 
1139 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1140 {
1141  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1142 
1143  switch (parser->get_state()) {
1144  case inside_other_xml_attribute:
1145  parser->other_xml.append((const char *) (ch), len);
1146  break;
1147 
1148  default:
1149  break;
1150  }
1151 }
1152 
1158 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1159 {
1160  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1161 
1162  switch (parser->get_state()) {
1163  case inside_other_xml_attribute:
1164  parser->other_xml.append((const char *) (value), len);
1165  break;
1166 
1167  case parser_unknown:
1168  break;
1169 
1170  default:
1171  D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1172 
1173  break;
1174  }
1175 }
1176 
1181 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1182 {
1183  return xmlGetPredefinedEntity(name);
1184 }
1185 
1196 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1197 {
1198  va_list args;
1199  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1200 
1201  parser->push_state(parser_fatal_error);
1202 
1203  va_start(args, msg);
1204  char str[1024];
1205  vsnprintf(str, 1024, msg, args);
1206  va_end(args);
1207 
1208  int line = xmlSAX2GetLineNumber(parser->d_context);
1209 
1210  if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1211  parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1212 }
1213 
1214 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1215 {
1216  va_list args;
1217  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1218 
1219  parser->push_state(parser_error);
1220 
1221  va_start(args, msg);
1222  char str[1024];
1223  vsnprintf(str, 1024, msg, args);
1224  va_end(args);
1225 
1226  int line = xmlSAX2GetLineNumber(parser->d_context);
1227 
1228  if (!parser->d_error_msg.empty()) parser->d_error_msg += "\n";
1229  parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1230 }
1232 
1236 void D4ParserSax2::cleanup_parse()
1237 {
1238  bool wellFormed = d_context->wellFormed;
1239  bool valid = d_context->valid;
1240 
1241  // d_context->sax = NULL;
1242  xmlFreeParserCtxt(d_context);
1243 
1244  delete d_enum_def;
1245  d_enum_def = 0;
1246 
1247  delete d_dim_def;
1248  d_dim_def = 0;
1249 
1250  // If there's an error, there may still be items on the stack at the
1251  // end of the parse.
1252  while (!btp_stack.empty()) {
1253  delete top_basetype();
1254  pop_basetype();
1255  }
1256 
1257  if (!wellFormed)
1258  throw Error("The DMR was not well formed. " + d_error_msg);
1259  else if (!valid)
1260  throw Error("The DMR was not valid." + d_error_msg);
1261  else if (get_state() == parser_error)
1262  throw Error(d_error_msg);
1263  else if (get_state() == parser_fatal_error)
1264  throw InternalErr(d_error_msg);
1265 }
1266 
1281 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1282 {
1283  d_debug = debug;
1284 
1285  // Code example from libxml2 docs re: read from a stream.
1286 
1287  if (!f.good())
1288  throw Error("Input stream not open or read error");
1289  if (!dest_dmr)
1290  throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1291 
1292  d_dmr = dest_dmr; // dump values here
1293 #if 0
1294  int line_num = 1;
1295  string line;
1296 
1297  // Get the <xml ... ?> line
1298  getline(f, line);
1299  if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1300 
1301  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1302 
1303  d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1304  d_context->validate = true;
1305  push_state(parser_start);
1306 
1307  // Get the first line of stuff
1308  getline(f, line);
1309  ++line_num;
1310 
1311  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1312 
1313  while (!f.eof() && (get_state() != parser_end)) {
1314  xmlParseChunk(d_context, line.c_str(), line.length(), 0);
1315 
1316  // Get the next line
1317  getline(f, line);
1318  ++line_num;
1319 
1320  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1321  }
1322  // This call ends the parse.
1323  xmlParseChunk(d_context, line.c_str(), 0, 1/*terminate*/);
1324 
1325 #else
1326  int line_num = 1;
1327  string line;
1328 
1329  // Get the XML prolog line (looks like: <?xml ... ?> )
1330  getline(f, line);
1331  if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1332 
1333  if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1334 
1335  d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1336  d_context->validate = true;
1337  push_state(parser_start);
1338 
1339  // Get the first chunk of the stuff
1340  long chunk_count = 0;
1341  long chunk_size = 0;
1342 
1343  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1344  chunk_size=f.gcount();
1345  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1346  if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1347 
1348  while(!f.eof() && (get_state() != parser_end)){
1349 
1350  xmlParseChunk(d_context, d_parse_buffer, chunk_size, 0);
1351 
1352  // There is more to read. Get the next chunk
1353  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1354  chunk_size=f.gcount();
1355  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1356  if (debug) cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1357  }
1358 
1359  // This call ends the parse.
1360  xmlParseChunk(d_context, d_parse_buffer, chunk_size, 1/*terminate*/);
1361 #endif
1362 
1363  // This checks that the state on the parser stack is parser_end and throws
1364  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1365  cleanup_parse();
1366 }
1367 
1382 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1383 {
1384  intern(document.c_str(), document.length(), dest_dmr, debug);
1385 }
1386 
1397 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1398 {
1399  if (!(size > 0)) return;
1400 
1401  d_debug = debug;
1402 
1403  // Code example from libxml2 docs re: read from a stream.
1404 
1405  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1406  d_dmr = dest_dmr; // dump values in dest_dmr
1407 
1408  push_state(parser_start);
1409  d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, buffer, size, "stream");
1410  d_context->validate = true;
1411 
1412  // This call ends the parse.
1413  xmlParseChunk(d_context, buffer, 0, 1/*terminate*/);
1414 
1415  // This checks that the state on the parser stack is parser_end and throws
1416  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1417  cleanup_parse();
1418 }
1419 
1420 } // namespace libdap
virtual BaseType * NewVariable(Type t, const string &name) const
static void dmr_end_document(void *parser)
bool is_valid_enum_value(long long value)
Definition: D4EnumDefs.cc:43
void set_namespace(const std::string &ns)
Set the namespace for this DMR.
Definition: DMR.h:154
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:277
D4Group * root()
Definition: DMR.cc:407
static void dmr_start_document(void *parser)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:815
void set_request_xml_base(const std::string &xb)
Definition: DMR.h:148
Type
Identifies the data type.
Definition: Type.h:94
top level DAP object to house generic methods
Definition: AISConnect.cc:30
A class for software fault reporting.
Definition: InternalErr.h:64
static std::string getDapNamespaceString(DAPVersion version)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
void add_dim_nocopy(D4Dimension *dim)
Definition: D4Dimensions.h:160
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:123
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
virtual Type type() const
Returns the type of the class instance.
Definition: BaseType.cc:365
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:694
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:775
virtual D4Attributes * attributes()
Definition: BaseType.cc:599
void set_dap_version(const std::string &version_string)
Definition: DMR.cc:419
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
virtual std::string FQN() const
Definition: D4Group.cc:185
D4Attribute * get(const string &fqn)
void add_map(D4Map *map)
Definition: D4Maps.h:115
A class for error processing.
Definition: Error.h:92
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition: D4Group.h:97
static void dmr_fatal_error(void *parser, const char *msg,...)
bool is_integer_type(Type t)
Definition: util.cc:900
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition: D4Group.h:82