bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
DmrppParserSax2.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2012 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27#include <iostream>
28#include <sstream>
29
30#include <cstring>
31#include <cstdarg>
32#include <cassert>
33
34#include <libxml/parserInternals.h>
35
36#include <libdap/DMR.h>
37
38#include <libdap/BaseType.h>
39#include <libdap/Array.h>
40#include <libdap/D4Group.h>
41#include <libdap/D4Attributes.h>
42#include <libdap/D4Maps.h>
43#include <libdap/D4Enum.h>
44#include <libdap/D4BaseTypeFactory.h>
45
46#include <libdap/DapXmlNamespaces.h>
47#include <libdap/util.h>
48
49#include <BESInternalError.h>
50#include <BESDebug.h>
51#include <BESCatalog.h>
52#include <BESCatalogUtils.h>
53#include <BESCatalogList.h>
54#include <BESUtil.h>
55#include <TheBESKeys.h>
56#include <BESRegex.h>
57
58#include "DmrppRequestHandler.h"
59#include "DMRpp.h"
60#include "DmrppParserSax2.h"
61#include "DmrppCommon.h"
62#include "DmrppStr.h"
63#include "DmrppNames.h"
64#include "DmrppArray.h"
65
66#include "CurlUtils.h"
67#include "HttpNames.h"
68
69#include "Base64.h"
70
71#define FIVE_12K 524288
72#define ONE_MB 1048576
73#define MAX_INPUT_LINE_LENGTH ONE_MB
74#define INCLUDE_BESDEBUG_ISSET 0
75
76#define prolog std::string("DmrppParserSax2::").append(__func__).append("() - ")
77
78static const string dmrpp_namespace = "http://xml.opendap.org/dap/dmrpp/1.0.0#";
79
80
81using namespace libdap;
82using namespace std;
84
85namespace dmrpp {
86
87static const char *states[] = {
88 "parser_start",
89 "inside_dataset",
90 // inside_group is the state just after parsing the start of a Group
91 // element.
92 "inside_group",
93 "inside_attribute_container",
94 "inside_attribute",
95 "inside_attribute_value",
96 "inside_other_xml_attribute",
97 "inside_enum_def",
98 "inside_enum_const",
99 "inside_dim_def",
100 // This covers Byte, ..., Url, Opaque
101 "inside_simple_type",
102 // "inside_array",
103 "inside_dim",
104 "inside_map",
105 "inside_constructor",
106 "not_dap4_element",
107 "inside_dmrpp_object",
108 "inside_dmrpp_chunkDimensionSizes_element",
109 "inside_dmrpp_compact_element",
110 "parser_unknown",
111 "parser_error",
112 "parser_fatal_error",
113 "parser_end"
114 };
115
116static bool is_not(const char *name, const char *tag)
117{
118 return strcmp(name, tag) != 0;
119}
120
121
130D4EnumDef *
131DmrppParserSax2::enum_def()
132{
133 if (!d_enum_def) d_enum_def = new D4EnumDef;
134
135 return d_enum_def;
136}
137
144D4Dimension *
145DmrppParserSax2::dim_def()
146{
147 if (!d_dim_def) d_dim_def = new D4Dimension;
148
149 return d_dim_def;
150}
151
152/* Search through the attribute array for a given attribute name.
153 * If the name is found, return the string value for that attribute
154 * @param name: Search for this name
155 * @param attributes: Array that holds the attribute values to search
156 * @param num_attributes: Number of attributes
157 * @return string value of attribute; the empty string if the name was not found
158 */
159string DmrppParserSax2::get_attribute_val(const string &name, const xmlChar **attributes, int num_attributes)
160{
161 unsigned int index = 0;
162 for (int i = 0; i < num_attributes; ++i, index += 5) {
163 if (strncmp(name.c_str(), (const char *)attributes[index], name.size()) == 0) {
164 return string((const char *)attributes[index+3], (const char *)attributes[index+4]);
165 }
166 }
167 return "";
168}
169
170#if 0
176void DmrppParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
177{
178 if (!xml_attrs.empty()) xml_attrs.clear(); // erase old attributes
179
180 // Make a value using the attribute name and the prefix, namespace URI
181 // and the value. The prefix might be null.
182 unsigned int index = 0;
183 for (int i = 0; i < nb_attributes; ++i, index += 5) {
184 xml_attrs.insert(
185 map<string, XMLAttribute>::value_type(string((const char *) attributes[index]),
186 XMLAttribute(attributes + index + 1)));
187
188 BESDEBUG(PARSER, prolog <<
189 "XML Attribute '" << (const char *)attributes[index] << "': " << xml_attrs[(const char *)attributes[index]].value << endl);
190 }
191}
192#endif
193
200void DmrppParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
201{
202 // make a value with the prefix and namespace URI. The prefix might be null.
203 for (int i = 0; i < nb_namespaces; ++i) {
204 namespace_table.insert(
205 map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *) namespaces[i * 2] : "",
206 (const char *) namespaces[i * 2 + 1]));
207 }
208}
209
210#if 0
217bool DmrppParserSax2::check_required_attribute(const string & attr)
218{
219 if (xml_attrs.find(attr) == xml_attrs.end()) {
220 dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
221 return false;
222 }
223 else
224 return true;
225}
226#endif
227
228/*
229 * An improved version of the previous check_required_attribute.
230 * Searches for an attribute name within the attribute array.
231 * @param name: The attribute name to search for
232 * @param attributes: The attribute array
233 * @param num_attributes: The number of attributes
234 * @return success: true
235 * failure: false
236 */
237bool DmrppParserSax2::check_required_attribute(const string &name, const xmlChar **attributes, int num_attributes)
238{
239 unsigned int index = 0;
240 for (int i = 0; i < num_attributes; ++i, index += 5) {
241 if (strncmp(name.c_str(), (const char *)attributes[index], name.size()) == 0) {
242 return true;
243 }
244 }
245
246 dmr_error(this, "Required attribute '%s' not found.", name.c_str());
247 return false;
248}
249
250#if 0
257bool DmrppParserSax2::check_attribute(const string & attr)
258{
259 return (xml_attrs.find(attr) != xml_attrs.end());
260}
261#endif
262
273bool DmrppParserSax2::check_attribute(const string &name, const xmlChar **attributes, int num_attributes)
274{
275 unsigned int index = 0;
276 for (int i = 0; i < num_attributes; ++i, index += 5) {
277 if (strncmp(name.c_str(), (const char *)attributes[index], name.size()) == 0) {
278 return true;
279 }
280 }
281 return false;
282}
283
284bool DmrppParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
285{
286 if (is_not(name, "Dimension")) return false;
287
288#if 0
289 transfer_xml_attrs(attrs, nb_attributes);
290#endif
291
292#if 0
293 if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("size", attrs, nb_attributes))) {
294 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
295 return false;
296 }
297#endif
298
299 if (!check_required_attribute("name", attrs, nb_attributes)) {
300 dmr_error(this, "The required attribute 'name' was missing from a Dimension element.");
301 return false;
302 }
303
304 if (!check_required_attribute("size", attrs, nb_attributes)) {
305 dmr_error(this, "The required attribute 'size' was missing from a Dimension element.");
306 return false;
307 }
308
309 // This getter (dim_def) allocates a new object if needed.
310 dim_def()->set_name(get_attribute_val("name", attrs, nb_attributes));
311 try {
312 dim_def()->set_size(get_attribute_val("size", attrs, nb_attributes));
313 }
314 catch (Error &e) {
315 dmr_error(this, e.get_error_message().c_str());
316 return false;
317 }
318
319 return true;
320}
321
339bool DmrppParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
340{
341 if (is_not(name, "Dim")) return false;
342
343#if 0
344 transfer_xml_attrs(attrs, nb_attributes);
345#endif
346#if 0
347 if (check_attribute("size", attrs, nb_attributes) && check_attribute("name", attrs, nb_attributes)) {
348 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
349 return false;
350 }
351 if (!(check_attribute("size", attrs, nb_attributes) || check_attribute("name", attrs, nb_attributes))) {
352 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
353 return false;
354 }
355#endif
356 bool has_size = check_attribute("size", attrs, nb_attributes);
357 bool has_name = check_attribute("name", attrs, nb_attributes);
358 if (has_size && has_name) {
359 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
360 return false;
361 }
362 if (!has_size && !has_name) {
363 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
364 return false;
365 }
366
367
368 if (!top_basetype()->is_vector_type()) {
369 // Make the top BaseType* an array
370 BaseType *b = top_basetype();
371 pop_basetype();
372
373 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
374 a->set_is_dap4(true);
375 a->add_var_nocopy(b);
376 a->set_attributes_nocopy(b->attributes());
377 // trick: instead of popping b's attributes, copying them and then pushing
378 // a's copy, just move the pointer (but make sure there's only one object that
379 // references that pointer).
380 b->set_attributes_nocopy(0);
381
382 push_basetype(a);
383 }
384
385 assert(top_basetype()->is_vector_type());
386
387 Array *a = static_cast<Array*>(top_basetype());
388 if (has_size) {
389 size_t dim_size = stoll(get_attribute_val("size", attrs, nb_attributes));
390 BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << dim_size << endl);
391 a->append_dim_ll(dim_size); // low budget code for now. jhrg 8/20/13, modified to use new function. kln 9/7/19
392 return true;
393 }
394 else if (has_name) {
395 string name = get_attribute_val("name", attrs, nb_attributes);
396 BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name << endl);
397
398 D4Dimension *dim = 0;
399 if (name[0] == '/') // lookup the Dimension in the root group
400 dim = dmr()->root()->find_dim(name);
401 else
402 // get enclosing Group and lookup Dimension there
403 dim = top_group()->find_dim(name);
404
405 if (!dim)
406 throw BESInternalError("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
407 a->append_dim(dim);
408 return true;
409 }
410 return false;
411}
412
413
414bool DmrppParserSax2::process_dmrpp_compact_start(const char *name){
415 if ( strcmp(name, "compact") == 0) {
416 BESDEBUG(PARSER, prolog << "DMR++ compact element. localname: " << name << endl);
417 BaseType *bt = top_basetype();
418 if (!bt) throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
419 DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
420 if (!dc)
421 throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
422 dc->set_compact(true);
423 return true;
424 }
425 else {
426 return false;
427 }
428}
429
430
431void DmrppParserSax2::process_dmrpp_compact_end(const char *localname)
432{
433 BESDEBUG(PARSER, prolog << "BEGIN DMR++ compact element. localname: " << localname << endl);
434 if (is_not(localname, "compact"))
435 return;
436
437 BaseType *target = top_basetype();
438 if (!target)
439 throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
440 BESDEBUG(PARSER, prolog << "BaseType: " << target->type_name() << " " << target->name() << endl);
441
442 if (target->type() != dods_array_c)
443 throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
444
445 DmrppCommon *dc = dynamic_cast<DmrppCommon*>(target); // Get the Dmrpp common info
446 if (!dc)
447 throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
448
449 dc->set_compact(true);
450
451 // DmrppParserSax2::dmr_error(this, "Expected an end value tag; found '%s' instead.", localname);
452
453 std::string data(char_data);
454 BESDEBUG(PARSER, prolog << "Read compact element text. size: " << data.size() << " length: " << data.size() << " value: '" << data << "'" << endl);
455
456 std::vector <u_int8_t> decoded = base64::Base64::decode(data);
457
458 switch (target->var()->type()) {
459 case dods_array_c:
460 throw BESInternalError("Parser state has been corrupted. An Array may not be the template for an Array.", __FILE__, __LINE__);
461 break;
462
463 case dods_byte_c:
464 case dods_char_c:
465 case dods_int8_c:
466 case dods_uint8_c:
467 case dods_int16_c:
468 case dods_uint16_c:
469 case dods_int32_c:
470 case dods_uint32_c:
471 case dods_int64_c:
472 case dods_uint64_c:
473
474 case dods_enum_c:
475
476 case dods_float32_c:
477 case dods_float64_c:
478 target->val2buf(reinterpret_cast<void *>(decoded.data()));
479 target->set_read_p(true);
480 break;
481
482 case dods_str_c:
483 case dods_url_c:
484 {
485 std::string str(decoded.begin(), decoded.end());
486 DmrppArray *st = dynamic_cast<DmrppArray *>(target);
487 if(!st){
488 stringstream msg;
489 msg << prolog << "The target BaseType MUST be an array. and it's a " << target->type_name();
490 BESDEBUG(MODULE, msg.str() << endl);
491 throw BESInternalError(msg.str(),__FILE__,__LINE__);
492 }
493 st->val2buf(&str);
494 st->set_read_p(true);
495 }
496 break;
497
498 default:
499 throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
500 break;
501 }
502 char_data = ""; // Null this after use.
503
504 BESDEBUG(PARSER, prolog << "END" << endl);
505}
506
507bool DmrppParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
508{
509 if (is_not(name, "Map")) return false;
510
511#if 0
512 transfer_xml_attrs(attrs, nb_attributes);
513#endif
514
515 if (!check_attribute("name", attrs, nb_attributes)) {
516 dmr_error(this, "The 'name' attribute must be used in a Map element.");
517 return false;
518 }
519
520 if (!top_basetype()->is_vector_type()) {
521 // Make the top BaseType* an array
522 BaseType *b = top_basetype();
523 pop_basetype();
524
525 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
526 a->set_is_dap4(true);
527 a->add_var_nocopy(b);
528 a->set_attributes_nocopy(b->attributes());
529 // trick: instead of popping b's attributes, copying them and then pushing
530 // a's copy, just move the pointer (but make sure there's only one object that
531 // references that pointer).
532 b->set_attributes_nocopy(0);
533
534 push_basetype(a);
535 }
536
537 assert(top_basetype()->is_vector_type());
538
539 Array *a = static_cast<Array*>(top_basetype());
540
541 string map_name = get_attribute_val("name", attrs, nb_attributes);
542 if (get_attribute_val("name", attrs, nb_attributes).at(0) != '/') map_name = top_group()->FQN() + map_name;
543
544 Array *map_source = 0; // The array variable that holds the data for the Map
545
546 if (map_name[0] == '/') // lookup the Map in the root group
547 map_source = dmr()->root()->find_map_source(map_name);
548 else
549 // get enclosing Group and lookup Map there
550 map_source = top_group()->find_map_source(map_name);
551
552 // Change: If the parser is in 'strict' mode (the default) and the Array named by
553 // the Map cannot be found, it is an error. If 'strict' mode is false (permissive
554 // mode), then this is not an error. However, the Array referenced by the Map will
555 // be null. This is a change in the parser's behavior to accommodate requests for
556 // Arrays that include Maps that do not also include the Map(s) in the request.
557 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
558 if (!map_source && d_strict)
559 throw BESInternalError("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
560
561 a->maps()->add_map(new D4Map(map_name, map_source));
562
563 return true;
564}
565
566bool DmrppParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
567{
568 if (is_not(name, "Group")) return false;
569
570#if 0
571 transfer_xml_attrs(attrs, nb_attributes);
572#endif
573
574 if (!check_required_attribute("name", attrs, nb_attributes)) {
575 dmr_error(this, "The required attribute 'name' was missing from a Group element.");
576 return false;
577 }
578
579 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val("name", attrs, nb_attributes));
580 if (!btp) {
581 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", get_attribute_val("name", attrs, nb_attributes).c_str());
582 return false;
583 }
584
585 D4Group *grp = static_cast<D4Group*>(btp);
586
587 // Need to set this to get the D4Attribute behavior in the type classes
588 // shared between DAP2 and DAP4. jhrg 4/18/13
589 grp->set_is_dap4(true);
590
591 // link it up and change the current group
592 D4Group *parent = top_group();
593 if (!parent) {
594 dmr_fatal_error(this, "No Group on the Group stack.");
595 return false;
596 }
597
598 grp->set_parent(parent);
599 parent->add_group_nocopy(grp);
600
601 push_group(grp);
602 push_attributes(grp->attributes());
603 return true;
604}
605
612inline bool DmrppParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
613{
614 if (is_not(name, "Attribute")) return false;
615
616#if 0
617 // These methods set the state to parser_error if a problem is found.
618 transfer_xml_attrs(attrs, nb_attributes);
619#endif
620
621 // add error
622 if (!(check_required_attribute(string("name"), attrs, nb_attributes) && check_required_attribute(string("type"), attrs, nb_attributes))) {
623 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
624 return false;
625 }
626
627 if (get_attribute_val("type", attrs, nb_attributes) == "Container") {
628 push_state(inside_attribute_container);
629
630 BESDEBUG(PARSER, prolog << "Pushing attribute container " << get_attribute_val("name", attrs, nb_attributes) << endl);
631 D4Attribute *child = new D4Attribute(get_attribute_val("name", attrs, nb_attributes), attr_container_c);
632
633 D4Attributes *tos = top_attributes();
634 // add return
635 if (!tos) {
636 delete child;
637 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
638 return false;
639 }
640
641 tos->add_attribute_nocopy(child);
642 push_attributes(child->attributes());
643 }
644 else if (get_attribute_val("type", attrs, nb_attributes) == "OtherXML") {
645 push_state(inside_other_xml_attribute);
646
647 dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
648 dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
649 }
650 else {
651 push_state(inside_attribute);
652
653 dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
654 dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
655 }
656
657 return true;
658}
659
665inline bool DmrppParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
666{
667 if (is_not(name, "Enumeration")) return false;
668
669#if 0
670 transfer_xml_attrs(attrs, nb_attributes);
671#endif
672
673 if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("basetype", attrs, nb_attributes))) {
674 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
675 return false;
676 }
677
678 Type t = get_type(get_attribute_val("basetype", attrs, nb_attributes).c_str());
679 if (!is_integer_type(t)) {
680 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
681 get_attribute_val("name", attrs, nb_attributes).c_str(), get_attribute_val("basetype", attrs, nb_attributes).c_str());
682 return false;
683 }
684
685 // This getter allocates a new object if needed.
686 string enum_def_path = get_attribute_val("name", attrs, nb_attributes);
687#if 0
688 // Use FQNs when things are referenced, not when they are defined
689 if (xml_attrs["name"].value[0] != '/')
690 enum_def_path = top_group()->FQN() + enum_def_path;
691#endif
692 enum_def()->set_name(enum_def_path);
693 enum_def()->set_type(t);
694
695 return true;
696}
697
698inline bool DmrppParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
699{
700 if (is_not(name, "EnumConst")) return false;
701
702#if 0
703 // These methods set the state to parser_error if a problem is found.
704 transfer_xml_attrs(attrs, nb_attributes);
705#endif
706
707 if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("value", attrs, nb_attributes))) {
708 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
709 return false;
710 }
711
712 istringstream iss(get_attribute_val("value", attrs, nb_attributes));
713 long long value = 0;
714 iss >> skipws >> value;
715 if (iss.fail() || iss.bad()) {
716 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
717 get_attribute_val("value", attrs, nb_attributes).c_str());
718 }
719 else if (!enum_def()->is_valid_enum_value(value)) {
720 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
721 get_attribute_val("value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
722 }
723 else {
724 // unfortunate choice of names... args are 'label' and 'value'
725 enum_def()->add_value(get_attribute_val("name", attrs, nb_attributes), value);
726 }
727
728 return true;
729}
730
736inline bool DmrppParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
737{
738 Type t = get_type(name);
739 if (is_simple_type(t)) {
740 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
741 return true;
742 }
743 else {
744 switch (t) {
745 case dods_structure_c:
746 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
747 return true;
748
749 case dods_sequence_c:
750 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
751 return true;
752
753 default:
754 return false;
755 }
756 }
757}
758
766void DmrppParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
767{
768#if 0
769 transfer_xml_attrs(attrs, nb_attributes);
770#endif
771
772 if (check_required_attribute("name", attrs, nb_attributes)) {
773 BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val("name", attrs, nb_attributes));
774 if (!btp) {
775 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
776 return;
777 }
778
779 if ((t == dods_enum_c) && check_required_attribute("enum", attrs, nb_attributes)) {
780 D4EnumDef *enum_def = 0;
781 string enum_path = get_attribute_val("enum", attrs, nb_attributes);
782 if (enum_path[0] == '/')
783 enum_def = dmr()->root()->find_enum_def(enum_path);
784 else
785 enum_def = top_group()->find_enum_def(enum_path);
786
787 if (!enum_def) dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
788
789 static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
790 }
791
792 btp->set_is_dap4(true); // see comment above
793 push_basetype(btp);
794
795 push_attributes(btp->attributes());
796
797 push_state(s);
798 }
799}
800
807
812{
813 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
814 parser->error_msg = "";
815 parser->char_data = "";
816
817 // Set this in intern_helper so that the loop test for the parser_end
818 // state works for the first iteration. It seems like XMLParseChunk calls this
819 // function on it's first run. jhrg 9/16/13
820 // parser->push_state(parser_start);
821
822 parser->push_attributes(parser->dmr()->root()->attributes());
823
824 BESDEBUG(PARSER, prolog << "Parser start state: " << states[parser->get_state()] << endl);
825}
826
830{
831 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
832
833 BESDEBUG(PARSER, prolog << "Parser end state: " << states[parser->get_state()] << endl);
834
835 if (parser->get_state() != parser_end)
836 DmrppParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
837
838 // If we've found any sort of error, don't make the DMR; intern() will
839 // take care of the error.
840 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error) return;
841
842 if (!parser->empty_basetype() || parser->empty_group())
843 DmrppParserSax2::dmr_error(parser,
844 "The document did not contain a valid root Group or contained unbalanced tags.");
845
846#if INCLUDE_BESDEBUG_ISSET
847 if(BESDebug::IsSet(PARSER)){
848 ostream *os = BESDebug::GetStrm();
849 *os << prolog << "parser->top_group() BEGIN " << endl;
850 parser->top_group()->dump(*os);
851 *os << endl << prolog << "parser->top_group() END " << endl;
852 }
853#endif
854
855 parser->pop_group(); // leave the stack 'clean'
856 parser->pop_attributes();
857}
858
859void DmrppParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
860 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
861{
862 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
863 const char *localname = reinterpret_cast<const char *>(l);
864
865 string this_element_ns_name(URI ? (char *) URI : "null");
866
867 if (parser->get_state() != parser_error) {
868 string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
869 BESDEBUG(PARSER, prolog << "dap4_ns_name: " << dap4_ns_name << endl);
870
871 if (this_element_ns_name == dmrpp_namespace) {
872 if (strcmp(localname, "chunkDimensionSizes") == 0) {
873 BESDEBUG(PARSER, prolog << "Found dmrpp:chunkDimensionSizes element. Pushing state." << endl);
874 parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
875 }
876 else if (strcmp(localname, "compact") == 0) {
877 BESDEBUG(PARSER, prolog << "Found dmrpp:compact element. Pushing state." << endl);
878 parser->push_state(inside_dmrpp_compact_element);
879 }
880 else {
881 BESDEBUG(PARSER,
882 prolog << "Start of element in dmrpp namespace: " << localname << " detected." << endl);
883 parser->push_state(inside_dmrpp_object);
884 }
885 }
886 else if (this_element_ns_name != dap4_ns_name) {
887 BESDEBUG(PARSER, prolog << "Start of non DAP4 element: " << localname << " detected." << endl);
888 parser->push_state(not_dap4_element);
889 }
890 }
891
892 BESDEBUG(PARSER, prolog << "Start element " << localname << " prefix: " << (prefix ? (char *) prefix : "null") << " ns: "
893 << this_element_ns_name << " (state: " << states[parser->get_state()] << ")" << endl);
894
895 switch (parser->get_state()) {
896 case parser_start:
897 if (is_not(localname, "Dataset"))
898 DmrppParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.",
899 localname);
900
901 parser->root_ns = URI ? (const char *) URI : "";
902
903#if 0
904 parser->transfer_xml_attrs(attributes, nb_attributes);
905#endif
906
907 if (parser->check_required_attribute(string("name"), attributes, nb_attributes))
908 parser->dmr()->set_name(parser->get_attribute_val("name", attributes, nb_attributes));
909
910 // Record the DMR++ builder version number. For now, if this is present, we have a 'new'
911 // DMR++ and if it is not present, we have an old DMR++. One (the?) important difference
912 // between the two is that the new version has the order of the filters correct and the
913 // current version of the handler code _expects_ this. The old version of the DMR++ had
914 // the order reversed (at least for most - all? - data). So we have this kludge to enable
915 // those old DMR++ files to work. See DmrppCommon::set_filter() for the other half of the
916 // hack. Note that the attribute 'version' is in the dmrpp xml namespace. jhrg 11/9/21
917 if (parser->check_attribute("version", attributes, nb_attributes)) {
918 auto dmrpp = dynamic_cast<DMRpp*>(parser->dmr());
919 if (dmrpp)
920 dmrpp->set_version(parser->get_attribute_val("version", attributes, nb_attributes));
921 DmrppRequestHandler::d_emulate_original_filter_order_behavior = false;
922 }
923 else {
924 DmrppRequestHandler::d_emulate_original_filter_order_behavior = true;
925 }
926
927 if (parser->check_attribute("dapVersion", attributes, nb_attributes))
928 parser->dmr()->set_dap_version(parser->get_attribute_val("dapVersion", attributes, nb_attributes));
929
930 if (parser->check_attribute("dmrVersion", attributes, nb_attributes))
931 parser->dmr()->set_dmr_version(parser->get_attribute_val("dmrVersion", attributes, nb_attributes));
932
933 if (parser->check_attribute("base", attributes, nb_attributes)) {
934 parser->dmr()->set_request_xml_base(parser->get_attribute_val("base", attributes, nb_attributes));
935 }
936 BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << parser->dmr()->request_xml_base() << "'" << endl);
937
938 if (parser->check_attribute("href", attributes, nb_attributes)) {
939 bool trusted = false;
940 if (parser->check_attribute("trust", attributes, nb_attributes)) {
941 string value = parser->get_attribute_val("trust", attributes, nb_attributes);
942 trusted = value == "true";
943 }
944 string href = parser->get_attribute_val("href", attributes, nb_attributes);
945 parser->dmrpp_dataset_href = shared_ptr<http::url>(new http::url(href,trusted));
946 BESDEBUG(PARSER, prolog << "Processed 'href' value into data_url. href: " << parser->dmrpp_dataset_href->str() << (trusted?"(trusted)":"") << endl);
947
948 //######################################################################################################
949 // Stop parser EffectiveUrl resolution (ndp - 08/27/2021)
950 // I dropped this because:
951 // - The Chunk::get_data_url() method calls EffectiveUrlCache::TheCache()->get_effective_url(data_url)
952 // - EffectiveUrlCache::TheCache()->get_effective_url(data_url) method is thread safe
953 // - By dropping these calls from the parser, which is in a single threaded section of the code we can
954 // resolve the URL during a multithreaded operation (reading the chunks) and reduce the overall
955 // time cost of resolving all of the chunk URLs with concurrency.
956 // -----------------------------------------------------------------------------------------------------
957 //BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Dataset URL: " << parser->dmrpp_dataset_href->str() << endl);
958 //auto effective_url = EffectiveUrlCache::TheCache()->get_effective_url(parser->dmrpp_dataset_href);
959 //BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url->str() << endl);
960 //######################################################################################################
961
962 }
963 BESDEBUG(PARSER, prolog << "Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href->str() << "'" << endl);
964
965 if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
966
967 // Push the root Group on the stack
968 parser->push_group(parser->dmr()->root());
969
970 parser->push_state(inside_dataset);
971
972 break;
973
974 // Both inside dataset and inside group can have the same stuff.
975 // The difference is that the Dataset holds the root group, which
976 // must be present; other groups are optional
977 case inside_dataset:
978 case inside_group:
979 if (parser->process_enum_def(localname, attributes, nb_attributes))
980 parser->push_state(inside_enum_def);
981 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
982 parser->push_state(inside_dim_def);
983 else if (parser->process_group(localname, attributes, nb_attributes))
984 parser->push_state(inside_group);
985 else if (parser->process_variable(localname, attributes, nb_attributes))
986 // This will push either inside_simple_type or inside_structure
987 // onto the parser state stack.
988 break;
989 else if (parser->process_attribute(localname, attributes, nb_attributes))
990 // This will push either inside_attribute, inside_attribute_container
991 // or inside_otherxml_attribute onto the parser state stack
992 break;
993 else
994 DmrppParserSax2::dmr_error(parser,
995 "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
996 localname);
997 break;
998
999 case inside_attribute_container:
1000 if (parser->process_attribute(localname, attributes, nb_attributes))
1001 break;
1002 else
1003 DmrppParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
1004 break;
1005
1006 case inside_attribute:
1007 if (parser->process_attribute(localname, attributes, nb_attributes))
1008 break;
1009 else if (strcmp(localname, "Value") == 0)
1010 parser->push_state(inside_attribute_value);
1011 else
1012 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
1013 break;
1014
1015 case inside_attribute_value:
1016 // Attribute values are processed by the end element code.
1017 break;
1018
1019 case inside_other_xml_attribute:
1020 parser->other_xml_depth++;
1021
1022 // Accumulate the elements here
1023 parser->other_xml.append("<");
1024 if (prefix) {
1025 parser->other_xml.append((const char *) prefix);
1026 parser->other_xml.append(":");
1027 }
1028 parser->other_xml.append(localname);
1029
1030 if (nb_namespaces != 0) {
1031 parser->transfer_xml_ns(namespaces, nb_namespaces);
1032
1033 for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
1034 ++i) {
1035 parser->other_xml.append(" xmlns");
1036 if (!i->first.empty()) {
1037 parser->other_xml.append(":");
1038 parser->other_xml.append(i->first);
1039 }
1040 parser->other_xml.append("=\"");
1041 parser->other_xml.append(i->second);
1042 parser->other_xml.append("\"");
1043 }
1044 }
1045
1046 if (nb_attributes != 0) {
1047#if 0
1048 parser->transfer_xml_attrs(attributes, nb_attributes);
1049#endif
1050 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
1051 parser->other_xml.append(" ");
1052 if (!i->second.prefix.empty()) {
1053 parser->other_xml.append(i->second.prefix);
1054 parser->other_xml.append(":");
1055 }
1056 parser->other_xml.append(i->first);
1057 parser->other_xml.append("=\"");
1058 parser->other_xml.append(i->second.value);
1059 parser->other_xml.append("\"");
1060 }
1061 }
1062
1063 parser->other_xml.append(">");
1064 break;
1065
1066 case inside_enum_def:
1067 // process an EnumConst element
1068 if (parser->process_enum_const(localname, attributes, nb_attributes))
1069 parser->push_state(inside_enum_const);
1070 else
1071 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
1072 break;
1073
1074 case inside_enum_const:
1075 // No content; nothing to do
1076 break;
1077
1078 case inside_dim_def:
1079 // No content; nothing to do
1080 break;
1081
1082 case inside_dim:
1083 // No content.
1084 break;
1085
1086 case inside_map:
1087 // No content.
1088 break;
1089
1090 case inside_simple_type:
1091 if (parser->process_attribute(localname, attributes, nb_attributes))
1092 break;
1093 else if (parser->process_dimension(localname, attributes, nb_attributes))
1094 parser->push_state(inside_dim);
1095 else if (parser->process_map(localname, attributes, nb_attributes))
1096 parser->push_state(inside_map);
1097 else
1098 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
1099 break;
1100
1101 case inside_constructor:
1102 if (parser->process_variable(localname, attributes, nb_attributes))
1103 // This will push either inside_simple_type or inside_structure
1104 // onto the parser state stack.
1105 break;
1106 else if (parser->process_attribute(localname, attributes, nb_attributes))
1107 break;
1108 else if (parser->process_dimension(localname, attributes, nb_attributes))
1109 parser->push_state(inside_dim);
1110 else if (parser->process_map(localname, attributes, nb_attributes))
1111 parser->push_state(inside_map);
1112 else
1113 DmrppParserSax2::dmr_error(parser,
1114 "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
1115 break;
1116
1117 case not_dap4_element:
1118 BESDEBUG(PARSER, prolog << "SKIPPING unexpected element. localname: " << localname << "namespace: "
1119 << this_element_ns_name << endl);
1120 break;
1121
1122 case inside_dmrpp_compact_element:
1123 if (parser->process_dmrpp_compact_start(localname)) {
1124 BESDEBUG(PARSER, prolog << "Call to parser->process_dmrpp_compact_start() completed." << endl);
1125 }
1126 break;
1127
1128 case inside_dmrpp_object: {
1129 BESDEBUG(PARSER, prolog << "Inside dmrpp namespaced element. localname: " << localname << endl);
1130 assert(this_element_ns_name == dmrpp_namespace);
1131
1132#if 0
1133 parser->transfer_xml_attrs(attributes, nb_attributes); // load up xml_attrs
1134#endif
1135
1136 BaseType *bt = parser->top_basetype();
1137 if (!bt) throw BESInternalError("Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
1138
1139 DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
1140 if (!dc)
1141 throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1142
1143 // Ingest the dmrpp:chunks element and it attributes
1144 if (strcmp(localname, "chunks") == 0) {
1145 BESDEBUG(PARSER, prolog << "DMR++ chunks element. localname: " << localname << endl);
1146
1147 if (parser->check_attribute("compressionType", attributes, nb_attributes)) {
1148 string compression_type_string(parser->get_attribute_val("compressionType", attributes, nb_attributes));
1149 dc->ingest_compression_type(compression_type_string);
1150
1151 BESDEBUG(PARSER, prolog << "Processed attribute 'compressionType=\"" <<
1152 compression_type_string << "\"'" << endl);
1153 }
1154 else {
1155 BESDEBUG(PARSER, prolog << "There was no 'compressionType' attribute associated with the variable '"
1156 << bt->type_name() << " " << bt->name() << "'" << endl);
1157 }
1158
1159 if (parser->check_attribute("byteOrder", attributes, nb_attributes)) {
1160 string byte_order_string(parser->get_attribute_val("byteOrder", attributes, nb_attributes));
1161 dc->ingest_byte_order(byte_order_string);
1162
1163 BESDEBUG(PARSER, prolog << "Processed attribute 'byteOrder=\"" << byte_order_string << "\"'" << endl);
1164 }
1165 else {
1166 BESDEBUG(PARSER, prolog << "There was no 'byteOrder' attribute associated with the variable '" << bt->type_name()
1167 << " " << bt->name() << "'" << endl);
1168 }
1169 }
1170 // Ingest an dmrpp:chunk element and its attributes
1171 else if (strcmp(localname, "chunk") == 0) {
1172 string data_url_str = "unknown_data_location";
1173 shared_ptr<http::url> data_url;
1174
1175 if (parser->check_attribute("href", attributes, nb_attributes)) {
1176 bool trusted = false;
1177 if (parser->check_attribute("trust", attributes, nb_attributes)) {
1178 string value = parser->get_attribute_val("trust", attributes, nb_attributes);
1179 trusted = value == "true";
1180 }
1181
1182 // This is the chunk elements href that we check.
1183 data_url_str = parser->get_attribute_val("href", attributes, nb_attributes);
1184 data_url = shared_ptr<http::url> ( new http::url(data_url_str,trusted));
1185 BESDEBUG(PARSER, prolog << "Processed 'href' value into data_url. href: " << data_url->str() << (trusted?"":"(trusted)") << endl);
1186 //######################################################################################################
1187 // Stop parser EffectiveUrl resolution (ndp - 08/27/2021)
1188 // I dropped this because:
1189 // - The Chunk::get_data_url() method calls EffectiveUrlCache::TheCache()->get_effective_url(data_url)
1190 // - EffectiveUrlCache::TheCache()->get_effective_url(data_url) method is thread safe
1191 // - By dropping these calls from the parser, which is in a single threaded section of the code, we can
1192 // resolve the URL during a multi-threaded operation (reading the chunks) and reduce the overall
1193 // time cost of resolving all of the chunk URLs with concurrency.
1194 // -----------------------------------------------------------------------------------------------------
1195 // We may have to cache the last accessed/redirect URL for data_url here because this URL
1196 // may be unique to this chunk.
1197
1198 //BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Chunk URL: " << data_url->str() << endl);
1199 //auto effective_url = EffectiveUrlCache::TheCache()->get_effective_url(data_url);
1200 //BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url->str() << endl);
1201 //######################################################################################################
1202
1203 }
1204 else {
1205 BESDEBUG(PARSER, prolog << "No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl);
1206 // This bit of magic sets the URL used to get the data and it's
1207 // magic in part because it may be a file or an http URL
1208 data_url = parser->dmrpp_dataset_href;
1209 // We don't have to conditionally cache parser->dmrpp_dataset_href here because that was
1210 // done in the evaluation of the parser_start case.
1211 BESDEBUG(PARSER, prolog << "Processing dmrpp:href into data_url. dmrpp:href='" << data_url->str() << "'" << endl);
1212 }
1213
1214 if (data_url->protocol() != HTTP_PROTOCOL && data_url->protocol() != HTTPS_PROTOCOL && data_url->protocol() != FILE_PROTOCOL) {
1215 BESDEBUG(PARSER, prolog << "data_url does NOT start with 'http://', 'https://' or 'file://'. "
1216 "Retrieving default catalog root directory" << endl);
1217
1218 // Now we try to find the default catalog. If we can't find it we punt and leave it be.
1219 BESCatalog *defcat = BESCatalogList::TheCatalogList()->default_catalog();
1220 if (!defcat) {
1221 BESDEBUG(PARSER, prolog << "Not able to find the default catalog." << endl);
1222 }
1223 else {
1224 // Found the catalog so we get the root dir; make a file URL.
1225 BESCatalogUtils *utils = BESCatalogList::TheCatalogList()->default_catalog()->get_catalog_utils();
1226
1227 BESDEBUG(PARSER, prolog << "Found default catalog root_dir: '" << utils->get_root_dir() << "'" << endl);
1228
1229 data_url_str = BESUtil::assemblePath(utils->get_root_dir(), data_url_str, true);
1230 data_url_str = FILE_PROTOCOL + data_url_str;
1231 data_url = shared_ptr<http::url> ( new http::url(data_url_str));
1232 }
1233 }
1234
1235 BESDEBUG(PARSER, prolog << "Processed data_url: '" << data_url->str() << "'" << endl);
1236
1237 unsigned long long offset = 0;
1238 unsigned long long size = 0;
1239 string chunk_position_in_array("");
1240 std::string byte_order = dc->get_byte_order();
1241
1242 if (parser->check_required_attribute("offset", attributes, nb_attributes)) {
1243 istringstream offset_ss(parser->get_attribute_val("offset", attributes, nb_attributes));
1244 offset_ss >> offset;
1245 BESDEBUG(PARSER, prolog << "Processed attribute 'offset=\"" << offset << "\"'" << endl);
1246 }
1247 else {
1248 dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'offset'.");
1249 }
1250
1251 if (parser->check_required_attribute("nBytes", attributes, nb_attributes)) {
1252 istringstream size_ss(parser->get_attribute_val("nBytes", attributes, nb_attributes));
1253 size_ss >> size;
1254 BESDEBUG(PARSER, prolog << "Processed attribute 'nBytes=\"" << size << "\"'" << endl);
1255 }
1256 else {
1257 dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'size'.");
1258 }
1259
1260 if (parser->check_attribute("chunkPositionInArray", attributes, nb_attributes)) {
1261 istringstream chunk_position_ss(parser->get_attribute_val("chunkPositionInArray", attributes, nb_attributes));
1262 chunk_position_in_array = chunk_position_ss.str();
1263 BESDEBUG(PARSER, prolog << "Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl);
1264 }
1265 else {
1266 BESDEBUG(PARSER, prolog << "No attribute 'chunkPositionInArray' located" << endl);
1267 }
1268
1269 dc->add_chunk(data_url, byte_order, size, offset, chunk_position_in_array);
1270 }
1271 }
1272 break;
1273
1274 case inside_dmrpp_chunkDimensionSizes_element:
1275 // The dmrpp:chunkDimensionSizes value is processed by the end element code.
1276 break;
1277
1278 case parser_unknown:
1279 case parser_error:
1280 case parser_fatal_error:
1281 break;
1282
1283 case parser_end:
1284 // FIXME Error?
1285 break;
1286 }
1287
1288 BESDEBUG(PARSER, prolog << "Start element exit state: " << states[parser->get_state()] << endl);
1289}
1290
1291void DmrppParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
1292{
1293 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1294 const char *localname = (const char *) l;
1295
1296 BESDEBUG(PARSER, prolog << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl);
1297
1298 switch (parser->get_state()) {
1299 case parser_start:
1300 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
1301 break;
1302
1303 case inside_dataset:
1304 if (is_not(localname, "Dataset"))
1305 DmrppParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
1306
1307 parser->pop_state();
1308 if (parser->get_state() != parser_start)
1309 dmr_fatal_error(parser, "Unexpected state, expected start state.");
1310 else {
1311 parser->pop_state();
1312 parser->push_state(parser_end);
1313 }
1314 break;
1315
1316 case inside_group: {
1317 if (is_not(localname, "Group"))
1318 DmrppParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
1319
1320 if (!parser->empty_basetype() || parser->empty_group())
1321 DmrppParserSax2::dmr_error(parser,
1322 "The document did not contain a valid root Group or contained unbalanced tags.");
1323
1324 parser->pop_group();
1325 parser->pop_state();
1326 break;
1327 }
1328
1329 case inside_attribute_container:
1330 if (is_not(localname, "Attribute"))
1331 DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1332
1333 parser->pop_state();
1334 parser->pop_attributes();
1335 break;
1336
1337 case inside_attribute:
1338 if (is_not(localname, "Attribute"))
1339 DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1340
1341 parser->pop_state();
1342 break;
1343
1344 case inside_attribute_value: {
1345 if (is_not(localname, "Value"))
1346 DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1347
1348 parser->pop_state();
1349
1350 // The old code added more values using the name and type as
1351 // indexes to find the correct attribute. Use get() for that
1352 // now. Or fix this code to keep a pointer to the to attribute...
1353 D4Attributes *attrs = parser->top_attributes();
1354 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1355 if (!attr) {
1356 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1357 attrs->add_attribute_nocopy(attr);
1358 }
1359 attr->add_value(parser->char_data);
1360
1361 parser->char_data = ""; // Null this after use.
1362 break;
1363 }
1364
1365 case inside_other_xml_attribute: {
1366 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
1367 parser->pop_state();
1368
1369 // The old code added more values using the name and type as
1370 // indexes to find the correct attribute. Use get() for that
1371 // now. Or fix this code to keep a pointer to the to attribute...
1372 D4Attributes *attrs = parser->top_attributes();
1373 D4Attribute *attr = attrs->get(parser->dods_attr_name);
1374 if (!attr) {
1375 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1376 attrs->add_attribute_nocopy(attr);
1377 }
1378 attr->add_value(parser->other_xml);
1379
1380 parser->other_xml = ""; // Null this after use.
1381 }
1382 else {
1383 if (parser->other_xml_depth == 0) {
1384 DmrppParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
1385 localname);
1386 break;
1387 }
1388 parser->other_xml_depth--;
1389
1390 parser->other_xml.append("</");
1391 if (prefix) {
1392 parser->other_xml.append((const char *) prefix);
1393 parser->other_xml.append(":");
1394 }
1395 parser->other_xml.append(localname);
1396 parser->other_xml.append(">");
1397 }
1398 break;
1399 }
1400
1401 case inside_enum_def:
1402 if (is_not(localname, "Enumeration"))
1403 DmrppParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
1404 if (!parser->top_group())
1406 "Expected a Group to be the current item, while finishing up an Enumeration.");
1407 else {
1408 // copy the pointer; not a deep copy
1409 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1410 // Set the enum_def to null; next call to enum_def() will
1411 // allocate a new object
1412 parser->clear_enum_def();
1413 parser->pop_state();
1414 }
1415 break;
1416
1417 case inside_enum_const:
1418 if (is_not(localname, "EnumConst"))
1419 DmrppParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
1420
1421 parser->pop_state();
1422 break;
1423
1424 case inside_dim_def: {
1425 if (is_not(localname, "Dimension"))
1426 DmrppParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1427
1428 if (!parser->top_group())
1429 DmrppParserSax2::dmr_error(parser,
1430 "Expected a Group to be the current item, while finishing up an Dimension.");
1431
1432 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1433 // Set the dim_def to null; next call to dim_def() will
1434 // allocate a new object. Calling 'clear' is important because
1435 // the cleanup method will free dim_def if it's not null and
1436 // we just copied the pointer in the add_dim_nocopy() call
1437 // above.
1438 parser->clear_dim_def();
1439 parser->pop_state();
1440 break;
1441 }
1442
1443 case inside_simple_type:
1444 if (is_simple_type(get_type(localname))) {
1445 BaseType *btp = parser->top_basetype();
1446 parser->pop_basetype();
1447 parser->pop_attributes();
1448 BaseType *parent = 0;
1449 if (!parser->empty_basetype())
1450 parent = parser->top_basetype();
1451 else if (!parser->empty_group())
1452 parent = parser->top_group();
1453 else {
1454 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1455 localname);
1456 delete btp;
1457 parser->pop_state();
1458 break;
1459 }
1460 if (parent->type() == dods_array_c)
1461 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1462 else
1463 parent->add_var_nocopy(btp);
1464 }
1465 else
1466 DmrppParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1467
1468 parser->pop_state();
1469 break;
1470
1471 case inside_dim:
1472 if (is_not(localname, "Dim"))
1473 DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1474
1475 parser->pop_state();
1476 break;
1477
1478 case inside_map:
1479 if (is_not(localname, "Map"))
1480 DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1481
1482 parser->pop_state();
1483 break;
1484
1485 case inside_constructor: {
1486 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1487 DmrppParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1488 return;
1489 }
1490 BaseType *btp = parser->top_basetype();
1491 parser->pop_basetype();
1492 parser->pop_attributes();
1493 BaseType *parent = 0;
1494 if (!parser->empty_basetype())
1495 parent = parser->top_basetype();
1496 else if (!parser->empty_group())
1497 parent = parser->top_group();
1498 else {
1499 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1500 localname);
1501 delete btp;
1502 parser->pop_state();
1503 break;
1504 }
1505 // TODO Why doesn't this code mirror the simple_var case and test
1506 // for the parent being an array? jhrg 10/13/13
1507 parent->add_var_nocopy(btp);
1508 parser->pop_state();
1509 break;
1510 }
1511
1512 case not_dap4_element:
1513 BESDEBUG(PARSER, prolog << "End of non DAP4 element: " << localname << endl);
1514 parser->pop_state();
1515 break;
1516
1517#if 1
1518 case inside_dmrpp_compact_element: {
1519 parser->process_dmrpp_compact_end(localname);
1520 BESDEBUG(PARSER, prolog << "End of dmrpp compact element: " << localname << endl);
1521 parser->pop_state();
1522 break;
1523 }
1524#endif
1525
1526 case inside_dmrpp_object: {
1527 BESDEBUG(PARSER, prolog << "End of dmrpp namespace element: " << localname << endl);
1528 parser->pop_state();
1529 break;
1530 }
1531
1532 case inside_dmrpp_chunkDimensionSizes_element: {
1533 BESDEBUG(PARSER, prolog << "End of chunkDimensionSizes element. localname: " << localname << endl);
1534
1535 if (is_not(localname, "chunkDimensionSizes"))
1536 DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1537 DmrppCommon *dc = dynamic_cast<DmrppCommon*>(parser->top_basetype()); // Get the Dmrpp common info
1538 if (!dc)
1539 throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1540 string element_text(parser->char_data);
1541 BESDEBUG(PARSER, prolog << "chunkDimensionSizes element_text: '" << element_text << "'" << endl);
1542 dc->parse_chunk_dimension_sizes(element_text);
1543 parser->char_data = ""; // Null this after use.
1544 parser->pop_state();
1545 break;
1546 }
1547
1548 case parser_unknown:
1549 parser->pop_state();
1550 break;
1551
1552 case parser_error:
1553 case parser_fatal_error:
1554 break;
1555
1556 case parser_end:
1557 // FIXME Error?
1558 break;
1559 }
1560
1561
1562 BESDEBUG(PARSER, prolog << "End element exit state: " << states[parser->get_state()] <<
1563 " ("<<parser->get_state()<<")"<< endl);
1564}
1565
1569void DmrppParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1570{
1571 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1572
1573 switch (parser->get_state()) {
1574 case inside_attribute_value:
1575 case inside_dmrpp_chunkDimensionSizes_element:
1576 case inside_dmrpp_compact_element:
1577 parser->char_data.append((const char *) (ch), len);
1578 BESDEBUG(PARSER, prolog << "Characters[" << parser->char_data.size() << "]" << parser->char_data << "'" << endl);
1579 break;
1580
1581 case inside_other_xml_attribute:
1582 parser->other_xml.append((const char *) (ch), len);
1583 BESDEBUG(PARSER, prolog << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1584 break;
1585
1586 default:
1587 break;
1588 }
1589}
1590
1595void DmrppParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1596{
1597 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1598
1599 switch (parser->get_state()) {
1600 case inside_other_xml_attribute:
1601 parser->other_xml.append((const char *) (ch), len);
1602 break;
1603
1604 default:
1605 break;
1606 }
1607}
1608
1614void DmrppParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1615{
1616 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1617
1618 switch (parser->get_state()) {
1619 case inside_other_xml_attribute:
1620 parser->other_xml.append((const char *) (value), len);
1621 break;
1622
1623 case parser_unknown:
1624 break;
1625
1626 default:
1627 DmrppParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1628
1629 break;
1630 }
1631}
1632
1637xmlEntityPtr DmrppParserSax2::dmr_get_entity(void *, const xmlChar * name)
1638{
1639 return xmlGetPredefinedEntity(name);
1640}
1641
1652void DmrppParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1653{
1654 va_list args;
1655 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1656
1657 parser->push_state(parser_fatal_error);
1658
1659 va_start(args, msg);
1660 char str[1024];
1661 vsnprintf(str, 1024, msg, args);
1662 va_end(args);
1663
1664 int line = xmlSAX2GetLineNumber(parser->context);
1665
1666 if (!parser->error_msg.empty()) parser->error_msg += "\n";
1667 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1668}
1669
1670void DmrppParserSax2::dmr_error(void *p, const char *msg, ...)
1671{
1672 va_list args;
1673 DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1674
1675 parser->push_state(parser_error);
1676
1677 va_start(args, msg);
1678 char str[1024];
1679 vsnprintf(str, 1024, msg, args);
1680 va_end(args);
1681
1682 int line = xmlSAX2GetLineNumber(parser->context);
1683
1684 if (!parser->error_msg.empty()) parser->error_msg += "\n";
1685 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1686}
1688
1692void DmrppParserSax2::cleanup_parse()
1693{
1694 bool wellFormed = context->wellFormed;
1695 bool valid = context->valid;
1696
1697 // context->sax = NULL;
1698 // Leak. Removed the above. jhrg 6/19/19
1699 xmlFreeParserCtxt(context);
1700
1701 delete d_enum_def;
1702 d_enum_def = 0;
1703
1704 delete d_dim_def;
1705 d_dim_def = 0;
1706
1707 // If there's an error, there may still be items on the stack at the
1708 // end of the parse.
1709 while (!btp_stack.empty()) {
1710 delete top_basetype();
1711 pop_basetype();
1712 }
1713
1714 if (!wellFormed)
1715 throw BESInternalError("The DMR was not well formed. " + error_msg,__FILE__,__LINE__);
1716 else if (!valid)
1717 throw BESInternalError("The DMR was not valid." + error_msg,__FILE__,__LINE__);
1718 else if (get_state() == parser_error)
1719 throw BESInternalError(error_msg,__FILE__,__LINE__);
1720 else if (get_state() == parser_fatal_error) throw BESInternalError(error_msg,__FILE__,__LINE__);
1721}
1722
1736void DmrppParserSax2::intern(istream &f, DMR *dest_dmr)
1737{
1738 // Code example from libxml2 docs re: read from a stream.
1739
1740 if (!f.good()) throw BESInternalError(prolog + "ERROR - Supplied istream instance not open or read error",__FILE__,__LINE__);
1741 if (!dest_dmr) throw BESInternalError(prolog + "THe supplied DMR object pointer is null", __FILE__, __LINE__);
1742
1743 d_dmr = dest_dmr; // dump values here
1744
1745 int line_num = 1;
1746 string line;
1747
1748 // Get the XML prolog line (looks like: <?xml ... ?> )
1749 getline(f, line);
1750 if (line.size() == 0) throw BESInternalError(prolog + "ERROR - No input found when parsing the DMR++",__FILE__,__LINE__);
1751
1752 BESDEBUG(PARSER, prolog << "line: (" << line_num << "): " << endl << line << endl << endl);
1753
1754 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, line.c_str(), line.size(), "stream");
1755 context->validate = true;
1756 push_state(parser_start);
1757
1758 // Get the first chunk of the stuff
1759 long chunk_count = 0;
1760 long chunk_size = 0;
1761
1762 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1763 chunk_size=f.gcount();
1764 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1765 BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1766 BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1767
1768 while(!f.eof() && (get_state() != parser_end)){
1769
1770 xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1771
1772 // There is more to read. Get the next chunk
1773 f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1774 chunk_size=f.gcount();
1775 d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1776 BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1777 BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1778 }
1779
1780 // This call ends the parse.
1781 xmlParseChunk(context, d_parse_buffer, chunk_size, 1/*terminate*/); // libxml2 call
1782
1783 // This checks that the state on the parser stack is parser_end and throws
1784 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1785 cleanup_parse();
1786}
1787
1788
1789
1798void DmrppParserSax2::intern(const string &document, DMR *dest_dmr)
1799{
1800 intern(document.c_str(), document.size(), dest_dmr);
1801}
1802
1811void DmrppParserSax2::intern(const char *buffer, int size, DMR *dest_dmr)
1812{
1813 if (!(size > 0)) return;
1814
1815 // Code example from libxml2 docs re: read from a stream.
1816
1817 if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1818 d_dmr = dest_dmr; // dump values in dest_dmr
1819
1820 push_state(parser_start);
1821 context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, buffer, size, "stream");
1822 context->validate = true;
1823
1824 // This call ends the parse.
1825 xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1826
1827 // This checks that the state on the parser stack is parser_end and throws
1828 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1829 cleanup_parse();
1830}
1831
1832} // namespace dmrpp
virtual BESCatalog * default_catalog() const
The the default catalog.
const std::string & get_root_dir() const
Get the root directory of the catalog.
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition BESDebug.h:145
static std::ostream * GetStrm()
return the debug stream
Definition BESDebug.h:165
exception thrown if internal error encountered
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:804
void intern(std::istream &f, libdap::DMR *dest_dmr)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_fatal_error(void *parser, const char *msg,...)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_start_document(void *parser)
Type
Type of JSON value.
Definition rapidjson.h:664