libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
D4ParserSax2.cc
Go to the documentation of this file.
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2012 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27// #define DODS_DEBUG 1
28
29#include <iostream>
30#include <sstream>
31
32#include <cassert>
33#include <cstdarg>
34#include <cstring>
35
36#include <libxml2/libxml/parserInternals.h>
37
38#include "DMR.h"
39
40#include "Array.h"
41#include "BaseType.h"
42#include "D4Attributes.h"
43#include "D4BaseTypeFactory.h"
44#include "D4Enum.h"
45#include "D4Group.h"
46#include "D4Maps.h"
47
48#include "D4ParserSax2.h"
49#include "DapXmlNamespaces.h"
50
51#include "debug.h"
52#include "util.h"
53
54namespace libdap {
55
56static const char *states[] = {"parser_start",
57
58 "inside_dataset",
59
60 // inside_group is the state just after parsing the start of a Group
61 // element.
62 "inside_group",
63
64 "inside_attribute_container", "inside_attribute", "inside_attribute_value",
65 "inside_other_xml_attribute",
66
67 "inside_enum_def", "inside_enum_const",
68
69 "inside_dim_def",
70
71 // This covers Byte, ..., Url, Opaque
72 "inside_simple_type",
73
74 // "inside_array",
75 "inside_dim", "inside_map",
76
77 "inside_constructor",
78
79 "not_dap4_element",
80
81 "parser_unknown", "parser_error", "parser_fatal_error",
82
83 "parser_end"};
84
85static bool is_not(const char *name, const char *tag) { return strcmp(name, tag) != 0; }
86
95D4EnumDef *D4ParserSax2::enum_def() {
96 if (!d_enum_def)
97 d_enum_def = new D4EnumDef;
98
99 return d_enum_def;
100}
101
108D4Dimension *D4ParserSax2::dim_def() {
109 if (!d_dim_def)
110 d_dim_def = new D4Dimension;
111
112 return d_dim_def;
113}
114
120void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes) {
121 if (!xml_attrs.empty())
122 xml_attrs.clear(); // erase old attributes
123
124 // Make a value using the attribute name and the prefix, namespace URI
125 // and the value. The prefix might be null.
126 unsigned int index = 0;
127 for (int i = 0; i < nb_attributes; ++i, index += 5) {
128 xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
129 XMLAttribute(attributes + index + 1)));
130
131 DBG(cerr << "XML Attribute '" << (const char *)attributes[index]
132 << "': " << xml_attrs[(const char *)attributes[index]].value << endl);
133 }
134}
135
142void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces) {
143 // make a value with the prefix and namespace URI. The prefix might be null.
144 for (int i = 0; i < nb_namespaces; ++i) {
145 namespace_table.insert(map<string, string>::value_type(
146 namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "", (const char *)namespaces[i * 2 + 1]));
147 }
148}
149
156bool D4ParserSax2::check_required_attribute(const string &attr) {
157 if (xml_attrs.find(attr) == xml_attrs.end()) {
158 dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
159 return false;
160 } else
161 return true;
162}
163
170bool D4ParserSax2::check_attribute(const string &attr) { return (xml_attrs.find(attr) != xml_attrs.end()); }
171
172bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes) {
173 if (is_not(name, "Dimension"))
174 return false;
175
176 transfer_xml_attrs(attrs, nb_attributes);
177
178 if (!(check_required_attribute("name") && check_required_attribute("size"))) {
179 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
180 return false;
181 }
182
183 // This getter (dim_def) allocates a new object if needed.
184 dim_def()->set_name(xml_attrs["name"].value);
185 try {
186 dim_def()->set_size(xml_attrs["size"].value);
187 } catch (Error &e) {
188 dmr_error(this, "%s", e.get_error_message().c_str());
189 return false;
190 }
191
192 return true;
193}
194
212bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes) {
213 if (is_not(name, "Dim"))
214 return false;
215
216 transfer_xml_attrs(attrs, nb_attributes);
217
218 if (check_attribute("size") && check_attribute("name")) {
219 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
220 return false;
221 }
222 if (!(check_attribute("size") || check_attribute("name"))) {
223 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
224 return false;
225 }
226
227 if (!top_basetype()->is_vector_type()) {
228 // Make the top BaseType* an array
229 BaseType *b = top_basetype();
230 pop_basetype();
231
232 Array *a = static_cast<Array *>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
233 a->set_is_dap4(true);
234 a->add_var_nocopy(b);
235 a->set_attributes_nocopy(b->attributes());
236 // trick: instead of popping b's attributes, copying them and then pushing
237 // a's copy, just move the pointer (but make sure there's only one object that
238 // references that pointer).
239 b->set_attributes_nocopy(0);
240
241 push_basetype(a);
242 }
243
244 assert(top_basetype()->is_vector_type());
245
246 Array *a = static_cast<Array *>(top_basetype());
247 if (check_attribute("size")) {
248
249 a->append_dim_ll(strtoll(xml_attrs["size"].value.c_str(), nullptr, 10));
250#if 0
251 a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
252#endif
253 return true;
254 } else if (check_attribute("name")) {
255 string name = xml_attrs["name"].value;
256
257 D4Dimension *dim = 0;
258 if (name[0] == '/') // lookup the Dimension in the root group
259 dim = dmr()->root()->find_dim(name);
260 else // get enclosing Group and lookup Dimension there
261 dim = top_group()->find_dim(name);
262
263 if (!dim)
264 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
265 a->append_dim(dim);
266 return true;
267 }
268
269 return false;
270}
271
272bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes) {
273 if (is_not(name, "Map"))
274 return false;
275
276 transfer_xml_attrs(attrs, nb_attributes);
277
278 if (!check_attribute("name")) {
279 dmr_error(this, "The 'name' attribute must be used in a Map element.");
280 return false;
281 }
282
283 if (!top_basetype()->is_vector_type()) {
284 // Make the top BaseType* an array
285 BaseType *b = top_basetype();
286 pop_basetype();
287
288 Array *a = static_cast<Array *>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
289 a->set_is_dap4(true);
290 a->add_var_nocopy(b);
291 a->set_attributes_nocopy(b->attributes());
292 // trick: instead of popping b's attributes, copying them and then pushing
293 // a's copy, just move the pointer (but make sure there's only one object that
294 // references that pointer).
295 b->set_attributes_nocopy(0);
296
297 push_basetype(a);
298 }
299
300 assert(top_basetype()->is_vector_type());
301
302 Array *a = static_cast<Array *>(top_basetype());
303
304 string map_name = xml_attrs["name"].value;
305 if (xml_attrs["name"].value[0] != '/')
306 map_name = top_group()->FQN() + map_name;
307
308 Array *map_source = 0; // The array variable that holds the data for the Map
309
310 if (map_name[0] == '/') // lookup the Map in the root group
311 map_source = dmr()->root()->find_map_source(map_name);
312 else // get enclosing Group and lookup Map there
313 map_source = top_group()->find_map_source(map_name);
314
315 // Change: If the parser is in 'strict' mode (the default) and the Array named by
316 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
317 // mode), then this is not an error. However, the Array referenced by the Map will
318 // be null. This is a change in the parser's behavior to accommodate requests for
319 // Arrays that include Maps that do not also include the Map(s) in the request.
320 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
321 if (!map_source && d_strict)
322 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
323
324 a->maps()->add_map(new D4Map(map_name, map_source));
325
326 return true;
327}
328
329bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes) {
330 if (is_not(name, "Group"))
331 return false;
332
333 transfer_xml_attrs(attrs, nb_attributes);
334
335 if (!check_required_attribute("name")) {
336 dmr_error(this, "The required attribute 'name' was missing from a Group element.");
337 return false;
338 }
339
340 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
341 if (!btp) {
342 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
343 return false;
344 }
345
346 D4Group *grp = static_cast<D4Group *>(btp);
347
348 // Need to set this to get the D4Attribute behavior in the type classes
349 // shared between DAP2 and DAP4. jhrg 4/18/13
350 grp->set_is_dap4(true);
351
352 // link it up and change the current group
353 D4Group *parent = top_group();
354 if (!parent) {
355 dmr_fatal_error(this, "No Group on the Group stack.");
356 return false;
357 }
358
359 grp->set_parent(parent);
360 parent->add_group_nocopy(grp);
361
362 push_group(grp);
363 push_attributes(grp->attributes());
364 return true;
365}
366
373inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes) {
374 if (is_not(name, "Attribute"))
375 return false;
376
377 // These methods set the state to parser_error if a problem is found.
378 transfer_xml_attrs(attrs, nb_attributes);
379
380 // add error
381 if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
382 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
383 return false;
384 }
385
386 if (xml_attrs["type"].value == "Container") {
387 push_state(inside_attribute_container);
388
389 DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
390 D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
391
392 D4Attributes *tos = top_attributes();
393 // add return
394 if (!tos) {
395 delete child;
396 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
397 return false;
398 }
399
400 tos->add_attribute_nocopy(child);
401 push_attributes(child->attributes());
402 } else if (xml_attrs["type"].value == "OtherXML") {
403 push_state(inside_other_xml_attribute);
404
405 dods_attr_name = xml_attrs["name"].value;
406 dods_attr_type = xml_attrs["type"].value;
407 } else {
408 push_state(inside_attribute);
409
410 dods_attr_name = xml_attrs["name"].value;
411 dods_attr_type = xml_attrs["type"].value;
412 }
413
414 return true;
415}
416
422inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes) {
423 if (is_not(name, "Enumeration"))
424 return false;
425
426 transfer_xml_attrs(attrs, nb_attributes);
427
428 if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
429 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
430 return false;
431 }
432
433 Type t = get_type(xml_attrs["basetype"].value.c_str());
434 if (!is_integer_type(t)) {
435 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
436 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
437 return false;
438 }
439
440 // This getter allocates a new object if needed.
441 string enum_def_path = xml_attrs["name"].value;
442#if 0
443 // Use FQNs when things are referenced, not when they are defined
444 if (xml_attrs["name"].value[0] != '/')
445 enum_def_path = top_group()->FQN() + enum_def_path;
446#endif
447 enum_def()->set_name(enum_def_path);
448 enum_def()->set_type(t);
449
450 return true;
451}
452
453inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes) {
454 if (is_not(name, "EnumConst"))
455 return false;
456
457 // These methods set the state to parser_error if a problem is found.
458 transfer_xml_attrs(attrs, nb_attributes);
459
460 if (!(check_required_attribute("name") && check_required_attribute("value"))) {
461 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
462 return false;
463 }
464
465 istringstream iss(xml_attrs["value"].value);
466 long long value = 0;
467 iss >> skipws >> value;
468 if (iss.fail() || iss.bad()) {
469 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
470 xml_attrs["value"].value.c_str());
471 } else if (!enum_def()->is_valid_enum_value(value)) {
472 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
473 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
474 } else {
475 // unfortunate choice of names... args are 'label' and 'value'
476 enum_def()->add_value(xml_attrs["name"].value, value);
477 }
478
479 return true;
480}
481
487inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes) {
488 Type t = get_type(name);
489 if (is_simple_type(t)) {
490 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
491 return true;
492 } else {
493 switch (t) {
494 case dods_structure_c:
495 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
496 return true;
497
498 case dods_sequence_c:
499 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
500 return true;
501
502 default:
503 return false;
504 }
505 }
506}
507
515void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes) {
516 transfer_xml_attrs(attrs, nb_attributes);
517
518 if (check_required_attribute("name")) {
519 BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
520 if (!btp) {
521 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
522 return;
523 }
524
525 if ((t == dods_enum_c) && check_required_attribute("enum")) {
526 D4EnumDef *enum_def = 0;
527 string enum_path = xml_attrs["enum"].value;
528 if (enum_path[0] == '/')
529 enum_def = dmr()->root()->find_enum_def(enum_path);
530 else
531 enum_def = top_group()->find_enum_def(enum_path);
532
533 if (!enum_def)
534 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
535
536 static_cast<D4Enum *>(btp)->set_enumeration(enum_def);
537 }
538
539 btp->set_is_dap4(true); // see comment above
540 push_basetype(btp);
541
542 push_attributes(btp->attributes());
543
544 push_state(s);
545 }
546}
547
554
560 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
561 parser->d_error_msg = "";
562 parser->char_data = "";
563
564 // Set this in intern_helper so that the loop test for the parser_end
565 // state works for the first iteration. It seems like XMLParseChunk calls this
566 // function on it's first run. jhrg 9/16/13
567 // parser->push_state(parser_start);
568
569 parser->push_attributes(parser->dmr()->root()->attributes());
570
571 if (parser->debug())
572 cerr << "Parser start state: " << states[parser->get_state()] << endl;
573}
574
578 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
579
580 if (parser->debug())
581 cerr << "Parser end state: " << states[parser->get_state()] << endl;
582
583 if (parser->get_state() != parser_end)
584 D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
585
586 // If we've found any sort of error, don't make the DMR; intern() will
587 // take care of the error.
588 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
589 return;
590
591 if (!parser->empty_basetype() || parser->empty_group())
593 "The document did not contain a valid root Group or contained unbalanced tags.");
594
595 parser->pop_group(); // leave the stack 'clean'
596 parser->pop_attributes();
597}
598
612void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
613 int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
614 int /*nb_defaulted*/, const xmlChar **attributes) {
615 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
616 const char *localname = (const char *)l;
617
618 if (parser->debug())
619 cerr << "Start element " << localname << " prefix: " << (prefix ? (char *)prefix : "null")
620 << " ns: " << (URI ? (char *)URI : "null") << " (state: " << states[parser->get_state()] << ")" << endl;
621
622 if (parser->get_state() != parser_error) {
623 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
624 if (parser->debug())
625 cerr << "dap4_ns_name: " << dap4_ns_name << endl;
626
627 string this_element_ns_name = (URI != 0) ? ((char *)URI) : "";
628 if (parser->debug())
629 cerr << "this_element_ns_name: " << this_element_ns_name << endl;
630
631 if (this_element_ns_name.compare(dap4_ns_name)) {
632 if (parser->debug())
633 cerr << "Start of non DAP4 element: " << localname << " detected." << endl;
634 parser->push_state(not_dap4_element);
635 // return;
636 }
637 }
638
639 switch (parser->get_state()) {
640 case parser_start:
641 if (is_not(localname, "Dataset"))
642 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.",
643 localname);
644
645 parser->root_ns = URI ? (const char *)URI : "";
646 parser->transfer_xml_attrs(attributes, nb_attributes);
647
648 if (parser->check_required_attribute(string("name")))
649 parser->dmr()->set_name(parser->xml_attrs["name"].value);
650
651 if (parser->check_attribute("dapVersion"))
652 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
653
654 if (parser->check_attribute("dmrVersion"))
655 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
656
657 if (parser->check_attribute("base"))
658 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
659
660 if (!parser->root_ns.empty())
661 parser->dmr()->set_namespace(parser->root_ns);
662
663 // Push the root Group on the stack
664 parser->push_group(parser->dmr()->root());
665
666 parser->push_state(inside_dataset);
667
668 break;
669
670 // Both inside dataset and inside group can have the same stuff.
671 // The difference is that the Dataset holds the root group, which
672 // must be present; other groups are optional
673 case inside_dataset:
674 case inside_group:
675 if (parser->process_enum_def(localname, attributes, nb_attributes))
676 parser->push_state(inside_enum_def);
677 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
678 parser->push_state(inside_dim_def);
679 else if (parser->process_group(localname, attributes, nb_attributes))
680 parser->push_state(inside_group);
681 else if (parser->process_variable(localname, attributes, nb_attributes))
682 // This will push either inside_simple_type or inside_structure
683 // onto the parser state stack.
684 break;
685 else if (parser->process_attribute(localname, attributes, nb_attributes))
686 // This will push either inside_attribute, inside_attribute_container
687 // or inside_otherxml_attribute onto the parser state stack
688 break;
689 else
691 parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
692 localname);
693 break;
694
695 case inside_attribute_container:
696 if (parser->process_attribute(localname, attributes, nb_attributes))
697 break;
698 else
699 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
700 break;
701
702 case inside_attribute:
703 if (parser->process_attribute(localname, attributes, nb_attributes))
704 break;
705 else if (strcmp(localname, "Value") == 0)
706 parser->push_state(inside_attribute_value);
707 else
708 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
709 break;
710
711 case inside_attribute_value:
712 // Attribute values are processed by the end element code.
713 break;
714
715 case inside_other_xml_attribute:
716 parser->other_xml_depth++;
717
718 // Accumulate the elements here
719 parser->other_xml.append("<");
720 if (prefix) {
721 parser->other_xml.append((const char *)prefix);
722 parser->other_xml.append(":");
723 }
724 parser->other_xml.append(localname);
725
726 if (nb_namespaces != 0) {
727 parser->transfer_xml_ns(namespaces, nb_namespaces);
728
729 for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
730 ++i) {
731 parser->other_xml.append(" xmlns");
732 if (!i->first.empty()) {
733 parser->other_xml.append(":");
734 parser->other_xml.append(i->first);
735 }
736 parser->other_xml.append("=\"");
737 parser->other_xml.append(i->second);
738 parser->other_xml.append("\"");
739 }
740 }
741
742 if (nb_attributes != 0) {
743 parser->transfer_xml_attrs(attributes, nb_attributes);
744 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
745 parser->other_xml.append(" ");
746 if (!i->second.prefix.empty()) {
747 parser->other_xml.append(i->second.prefix);
748 parser->other_xml.append(":");
749 }
750 parser->other_xml.append(i->first);
751 parser->other_xml.append("=\"");
752 parser->other_xml.append(i->second.value);
753 parser->other_xml.append("\"");
754 }
755 }
756
757 parser->other_xml.append(">");
758 break;
759
760 case inside_enum_def:
761 // process an EnumConst element
762 if (parser->process_enum_const(localname, attributes, nb_attributes))
763 parser->push_state(inside_enum_const);
764 else
765 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
766 break;
767
768 case inside_enum_const:
769 // No content; nothing to do
770 break;
771
772 case inside_dim_def:
773 // No content; nothing to do
774 break;
775#if 0
776 case inside_dimension:
777 // No content.
778 break;
779#endif
780 case inside_dim:
781 // No content.
782 break;
783
784 case inside_map:
785 // No content.
786 break;
787
788 case inside_simple_type:
789 if (parser->process_attribute(localname, attributes, nb_attributes))
790 break;
791 else if (parser->process_dimension(localname, attributes, nb_attributes))
792 parser->push_state(inside_dim);
793 else if (parser->process_map(localname, attributes, nb_attributes))
794 parser->push_state(inside_map);
795 else
796 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
797 break;
798
799 case inside_constructor:
800 if (parser->process_variable(localname, attributes, nb_attributes))
801 // This will push either inside_simple_type or inside_structure
802 // onto the parser state stack.
803 break;
804 else if (parser->process_attribute(localname, attributes, nb_attributes))
805 break;
806 else if (parser->process_dimension(localname, attributes, nb_attributes))
807 parser->push_state(inside_dim);
808 else if (parser->process_map(localname, attributes, nb_attributes))
809 parser->push_state(inside_map);
810 else
811 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.",
812 localname);
813 break;
814
815 case not_dap4_element:
816 if (parser->debug())
817 cerr << "Inside non DAP4 element. localname: " << localname << endl;
818 break;
819
820 case parser_unknown:
821 // FIXME?
822 // *** Never used? If so remove/error
823 parser->push_state(parser_unknown);
824 break;
825
826 case parser_error:
827 case parser_fatal_error:
828 break;
829
830 case parser_end:
831 // FIXME Error?
832 break;
833 }
834
835 if (parser->debug())
836 cerr << "Start element exit state: " << states[parser->get_state()] << endl;
837}
838
839void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI) {
840 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
841 const char *localname = (const char *)l;
842
843 if (parser->debug())
844 cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
845
846 switch (parser->get_state()) {
847 case parser_start:
848 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
849 break;
850
851 case inside_dataset:
852 if (is_not(localname, "Dataset"))
853 D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
854
855 parser->pop_state();
856 if (parser->get_state() != parser_start)
857 dmr_fatal_error(parser, "Unexpected state, expected start state.");
858 else {
859 parser->pop_state();
860 parser->push_state(parser_end);
861 }
862 break;
863
864 case inside_group: {
865 if (is_not(localname, "Group"))
866 D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
867
868 if (!parser->empty_basetype() || parser->empty_group())
870 "The document did not contain a valid root Group or contained unbalanced tags.");
871
872 parser->pop_group();
873 parser->pop_state();
874 break;
875 }
876
877 case inside_attribute_container:
878 if (is_not(localname, "Attribute"))
879 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
880
881 parser->pop_state();
882 parser->pop_attributes();
883 break;
884
885 case inside_attribute:
886 if (is_not(localname, "Attribute"))
887 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
888
889 parser->pop_state();
890 break;
891
892 case inside_attribute_value: {
893 if (is_not(localname, "Value"))
894 D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
895
896 parser->pop_state();
897
898 // The old code added more values using the name and type as
899 // indexes to find the correct attribute. Use get() for that
900 // now. Or fix this code to keep a pointer to the to attribute...
901 D4Attributes *attrs = parser->top_attributes();
902 D4Attribute *attr = attrs->get(parser->dods_attr_name);
903 if (!attr) {
904 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
905 attrs->add_attribute_nocopy(attr);
906 }
907 attr->add_value(parser->char_data);
908
909 parser->char_data = ""; // Null this after use.
910 break;
911 }
912
913 case inside_other_xml_attribute: {
914 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *)URI) {
915 parser->pop_state();
916
917 // The old code added more values using the name and type as
918 // indexes to find the correct attribute. Use get() for that
919 // now. Or fix this code to keep a pointer to the to attribute...
920 D4Attributes *attrs = parser->top_attributes();
921 D4Attribute *attr = attrs->get(parser->dods_attr_name);
922 if (!attr) {
923 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
924 attrs->add_attribute_nocopy(attr);
925 }
926 attr->add_value(parser->other_xml);
927
928 parser->other_xml = ""; // Null this after use.
929 } else {
930 if (parser->other_xml_depth == 0) {
931 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
932 localname);
933 break;
934 }
935 parser->other_xml_depth--;
936
937 parser->other_xml.append("</");
938 if (prefix) {
939 parser->other_xml.append((const char *)prefix);
940 parser->other_xml.append(":");
941 }
942 parser->other_xml.append(localname);
943 parser->other_xml.append(">");
944 }
945 break;
946 }
947
948 case inside_enum_def:
949 if (is_not(localname, "Enumeration"))
950 D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
951 if (!parser->top_group())
953 parser, "Expected a Group to be the current item, while finishing up an Enumeration.");
954 else {
955 // copy the pointer; not a deep copy
956 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
957 // Set the enum_def to null; next call to enum_def() will
958 // allocate a new object
959 parser->clear_enum_def();
960 parser->pop_state();
961 }
962 break;
963
964 case inside_enum_const:
965 if (is_not(localname, "EnumConst"))
966 D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
967
968 parser->pop_state();
969 break;
970
971 case inside_dim_def: {
972 if (is_not(localname, "Dimension"))
973 D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
974
975 if (!parser->top_group())
977 "Expected a Group to be the current item, while finishing up an Dimension.");
978
979 // FIXME Use the Group on the top of the group stack
980 // copy the pointer; not a deep copy
981 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
982 // parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
983 // Set the dim_def to null; next call to dim_def() will
984 // allocate a new object. Calling 'clear' is important because
985 // the cleanup method will free dim_def if it's not null and
986 // we just copied the pointer in the add_dim_nocopy() call
987 // above.
988 parser->clear_dim_def();
989 parser->pop_state();
990 break;
991 }
992
993 case inside_simple_type:
994 if (is_simple_type(get_type(localname))) {
995 BaseType *btp = parser->top_basetype();
996 parser->pop_basetype();
997 parser->pop_attributes();
998
999 BaseType *parent = 0;
1000 if (!parser->empty_basetype())
1001 parent = parser->top_basetype();
1002 else if (!parser->empty_group())
1003 parent = parser->top_group();
1004 else {
1005 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1006 localname);
1007 delete btp;
1008 parser->pop_state();
1009 break;
1010 }
1011
1012 if (parent->type() == dods_array_c)
1013 static_cast<Array *>(parent)->prototype()->add_var_nocopy(btp);
1014 else
1015 parent->add_var_nocopy(btp);
1016 } else
1017 D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1018
1019 parser->pop_state();
1020 break;
1021
1022 case inside_dim:
1023 if (is_not(localname, "Dim"))
1024 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1025
1026 parser->pop_state();
1027 break;
1028
1029 case inside_map:
1030 if (is_not(localname, "Map"))
1031 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1032
1033 parser->pop_state();
1034 break;
1035
1036 case inside_constructor: {
1037 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1038 D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1039 return;
1040 }
1041
1042 BaseType *btp = parser->top_basetype();
1043 parser->pop_basetype();
1044 parser->pop_attributes();
1045
1046 BaseType *parent = 0;
1047 if (!parser->empty_basetype())
1048 parent = parser->top_basetype();
1049 else if (!parser->empty_group())
1050 parent = parser->top_group();
1051 else {
1052 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1053 localname);
1054 delete btp;
1055 parser->pop_state();
1056 break;
1057 }
1058
1059 // TODO Why doesn't this code mirror the simple_var case and test
1060 // for the parent being an array? jhrg 10/13/13
1061 parent->add_var_nocopy(btp);
1062 parser->pop_state();
1063 break;
1064 }
1065
1066 case not_dap4_element:
1067 if (parser->debug())
1068 cerr << "End of non DAP4 element: " << localname << endl;
1069 parser->pop_state();
1070 break;
1071
1072 case parser_unknown:
1073 parser->pop_state();
1074 break;
1075
1076 case parser_error:
1077 case parser_fatal_error:
1078 break;
1079
1080 case parser_end:
1081 // FIXME Error?
1082 break;
1083 }
1084
1085 if (parser->debug())
1086 cerr << "End element exit state: " << states[parser->get_state()] << endl;
1087}
1088
1092void D4ParserSax2::dmr_get_characters(void *p, const xmlChar *ch, int len) {
1093 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
1094
1095 switch (parser->get_state()) {
1096 case inside_attribute_value:
1097 parser->char_data.append((const char *)(ch), len);
1098 DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1099 break;
1100
1101 case inside_other_xml_attribute:
1102 parser->other_xml.append((const char *)(ch), len);
1103 DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1104 break;
1105
1106 default:
1107 break;
1108 }
1109}
1110
1115void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len) {
1116 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
1117
1118 switch (parser->get_state()) {
1119 case inside_other_xml_attribute:
1120 parser->other_xml.append((const char *)(ch), len);
1121 break;
1122
1123 default:
1124 break;
1125 }
1126}
1127
1133void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len) {
1134 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
1135
1136 switch (parser->get_state()) {
1137 case inside_other_xml_attribute:
1138 parser->other_xml.append((const char *)(value), len);
1139 break;
1140
1141 case parser_unknown:
1142 break;
1143
1144 default:
1145 D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1146
1147 break;
1148 }
1149}
1150
1155xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar *name) { return xmlGetPredefinedEntity(name); }
1156
1167void D4ParserSax2::dmr_fatal_error(void *p, const char *msg, ...) {
1168 va_list args;
1169 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
1170
1171 parser->push_state(parser_fatal_error);
1172
1173 va_start(args, msg);
1174 char str[1024];
1175 vsnprintf(str, 1024, msg, args);
1176 va_end(args);
1177
1178 int line = xmlSAX2GetLineNumber(parser->d_context);
1179
1180 if (!parser->d_error_msg.empty())
1181 parser->d_error_msg += "\n";
1182 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1183}
1184
1185void D4ParserSax2::dmr_error(void *p, const char *msg, ...) {
1186 va_list args;
1187 D4ParserSax2 *parser = static_cast<D4ParserSax2 *>(p);
1188
1189 parser->push_state(parser_error);
1190
1191 va_start(args, msg);
1192 char str[1024];
1193 vsnprintf(str, 1024, msg, args);
1194 va_end(args);
1195
1196 int line = xmlSAX2GetLineNumber(parser->d_context);
1197
1198 if (!parser->d_error_msg.empty())
1199 parser->d_error_msg += "\n";
1200 parser->d_error_msg += "At line " + long_to_string(line) + ": " + string(str);
1201}
1202
1203
1207void D4ParserSax2::cleanup_parse() {
1208 bool wellFormed = d_context->wellFormed;
1209 bool valid = d_context->valid;
1210
1211 // d_context->sax = NULL;
1212 xmlFreeParserCtxt(d_context);
1213
1214 delete d_enum_def;
1215 d_enum_def = 0;
1216
1217 delete d_dim_def;
1218 d_dim_def = 0;
1219
1220 // If there's an error, there may still be items on the stack at the
1221 // end of the parse.
1222 while (!btp_stack.empty()) {
1223 delete top_basetype();
1224 pop_basetype();
1225 }
1226
1227 if (!wellFormed)
1228 throw Error("The DMR was not well formed. " + d_error_msg);
1229 else if (!valid)
1230 throw Error("The DMR was not valid." + d_error_msg);
1231 else if (get_state() == parser_error)
1232 throw Error(d_error_msg);
1233 else if (get_state() == parser_fatal_error)
1234 throw InternalErr(d_error_msg);
1235}
1236
1251void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug) {
1252 d_debug = debug;
1253
1254 // Code example from libxml2 docs re: read from a stream.
1255
1256 if (!f.good())
1257 throw Error("Input stream not open or read error");
1258 if (!dest_dmr)
1259 throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1260
1261 d_dmr = dest_dmr; // dump values here
1262#if 0
1263 int line_num = 1;
1264 string line;
1265
1266 // Get the <xml ... ?> line
1267 getline(f, line);
1268 if (line.length() == 0) throw Error("No input found while parsing the DMR.");
1269
1270 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1271
1272 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1273 d_context->validate = true;
1274 push_state(parser_start);
1275
1276 // Get the first line of stuff
1277 getline(f, line);
1278 ++line_num;
1279
1280 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1281
1282 while (!f.eof() && (get_state() != parser_end)) {
1283 xmlParseChunk(d_context, line.c_str(), line.length(), 0);
1284
1285 // Get the next line
1286 getline(f, line);
1287 ++line_num;
1288
1289 if (debug) cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1290 }
1291 // This call ends the parse.
1292 xmlParseChunk(d_context, line.c_str(), 0, 1/*terminate*/);
1293
1294#else
1295 int line_num = 1;
1296 string line;
1297
1298 // Get the XML prolog line (looks like: <?xml ... ?> )
1299 getline(f, line);
1300 if (line.length() == 0)
1301 throw Error("No input found while parsing the DMR.");
1302
1303 if (debug)
1304 cerr << "line: (" << line_num << "): " << endl << line << endl << endl;
1305
1306 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, line.c_str(), line.length(), "stream");
1307 d_context->validate = true;
1308 push_state(parser_start);
1309
1310 // Get the first chunk of the stuff
1311 long chunk_count = 0;
1312 long chunk_size = 0;
1313
1314 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1315 chunk_size = f.gcount();
1316 d_parse_buffer[chunk_size] =
1317 0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1318 if (debug)
1319 cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1320
1321 while (!f.eof() && (get_state() != parser_end)) {
1322
1323 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 0);
1324
1325 // There is more to read. Get the next chunk
1326 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1327 chunk_size = f.gcount();
1328 d_parse_buffer[chunk_size] = 0; // null terminate the string. We can do it this way because the buffer is +1
1329 // bigger than D4_PARSE_BUFF_SIZE
1330 if (debug)
1331 cerr << "chunk: (" << chunk_count++ << "): " << endl << d_parse_buffer << endl << endl;
1332 }
1333
1334 // This call ends the parse.
1335 xmlParseChunk(d_context, d_parse_buffer, chunk_size, 1 /*terminate*/);
1336#endif
1337
1338 // This checks that the state on the parser stack is parser_end and throws
1339 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1340 cleanup_parse();
1341}
1342
1357void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug) {
1358 intern(document.c_str(), document.length(), dest_dmr, debug);
1359}
1360
1371void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug) {
1372 if (!(size > 0))
1373 return;
1374
1375 d_debug = debug;
1376
1377 // Code example from libxml2 docs re: read from a stream.
1378
1379 if (!dest_dmr)
1380 throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1381 d_dmr = dest_dmr; // dump values in dest_dmr
1382
1383 push_state(parser_start);
1384 d_context = xmlCreatePushParserCtxt(&d_dmr_sax_parser, this, buffer, size, "stream");
1385 d_context->validate = true;
1386
1387 // This call ends the parse.
1388 xmlParseChunk(d_context, buffer, 0, 1 /*terminate*/);
1389
1390 // This checks that the state on the parser stack is parser_end and throws
1391 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1392 cleanup_parse();
1393}
1394
1395} // namespace libdap
@ attr_container_c
#define D4_PARSE_BUFF_SIZE
A multidimensional array of identical data types.
Definition Array.h:121
void add_var_nocopy(BaseType *v, Part p=nil) override
Definition Array.cc:452
The basic data type for the DODS DAP types.
Definition BaseType.h:118
virtual D4Attributes * attributes()
Definition BaseType.cc:507
virtual void add_var_nocopy(BaseType *bt, Part part=nil)
Definition BaseType.cc:699
virtual Type type() const
Returns the type of the class instance.
Definition BaseType.cc:329
void add_value(const string &value)
D4Attribute * get(const string &fqn)
void add_attribute_nocopy(D4Attribute *attr)
void add_dim_nocopy(D4Dimension *dim)
void add_enum_nocopy(D4EnumDef *enum_def)
Definition D4EnumDefs.h:151
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition D4Group.h:84
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition D4Group.h:100
void intern(istream &f, DMR *dest_dmr, bool debug=false)
void set_dap_version(const std::string &version_string)
Definition DMR.cc:239
void set_name(const std::string &n)
Definition DMR.h:126
D4Group * root()
Definition DMR.cc:228
void set_dmr_version(const std::string &v)
Definition DMR.h:152
void set_namespace(const std::string &ns)
Set the namespace for this DMR.
Definition DMR.h:164
void set_request_xml_base(const std::string &xb)
Definition DMR.h:158
static std::string getDapNamespaceString(DAPVersion version)
A class for error processing.
Definition Error.h:92
A class for software fault reporting.
Definition InternalErr.h:61
#define DBG(x)
Definition debug.h:58
static void dmr_start_document(void *parser)
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_error(void *parser, const char *msg,...)
static void dmr_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
top level DAP object to house generic methods
Definition AISConnect.cc:30
Type
Identifies the data type.
Definition Type.h:94
@ dods_group_c
Definition Type.h:122
@ dods_sequence_c
Definition Type.h:108
@ dods_enum_c
Definition Type.h:120
@ dods_structure_c
Definition Type.h:106
@ dods_array_c
Definition Type.h:107
string long_to_string(long val, int base)
Definition util.cc:946
D4AttributeType StringToD4AttributeType(string s)
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition util.cc:766
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition util.cc:688
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition util.cc:805
bool is_integer_type(Type t)
Definition util.cc:888
ObjectType get_type(const string &value)
Definition mime_util.cc:300