bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
NCMLParser.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29#include "config.h"
30#include "NCMLParser.h" // ncml_module
31
32#include "AggregationElement.h" // ncml_module
33#include "AggregationUtil.h" // agg_util
34#include <BESConstraintFuncs.h>
35#include <BESDataDDSResponse.h>
36#include <BESDDSResponse.h>
37#include <BESDebug.h>
38#include <BESStopWatch.h>
39#include "DDSLoader.h" // ncml_module
40#include "DimensionElement.h" // ncml_module
41#include <libdap/AttrTable.h> // libdap
42#include <libdap/BaseType.h> // libdap
43#include <libdap/DAS.h> // libdap
44#include <libdap/DDS.h> // libdap
45//#include <libdap/mime_util.h>
46#include <libdap/Structure.h> // libdap
47#include <map>
48#include <memory>
49#include "NCMLDebug.h" // ncml_module
50#include "NCMLElement.h" // ncml_module
51#include "NCMLUtil.h" // ncml_module
52#include "NetcdfElement.h" // ncml_module
53#include "OtherXMLParser.h" // ncml_module
54#include <libdap/parser.h> // libdap for the type checking...
55#include "SaxParserWrapper.h" // ncml_module
56#include <sstream>
57
58// For extra debug spew for now.
59#define DEBUG_NCML_PARSER_INTERNALS 1
60#define MODULE "ncml"
61#define prolog std::string("NCMLParser::").append(__func__).append("() - ")
62
63using namespace agg_util;
64
65namespace ncml_module {
66
67// From the DAP 2 guide....
68static const unsigned int MAX_DAP_STRING_SIZE = 32767;
69
70// Consider filling this with a compilation flag.
71/* static */bool NCMLParser::sThrowExceptionOnUnknownElements = true;
72
73// An attribute or variable with type "Structure" will match this string.
74const string NCMLParser::STRUCTURE_TYPE("Structure");
75
76// Just cuz I hate magic -1. Used in _currentParseLine
77static const int NO_CURRENT_PARSE_LINE_NUMBER = -1;
78
80// Helper class.
81AttrTableLazyPtr::AttrTableLazyPtr(const NCMLParser& parser, AttrTable* pAT/*=0*/) :
82 _parser(parser), _pAttrTable(pAT), _loaded(pAT)
83{
84}
85
86AttrTableLazyPtr::~AttrTableLazyPtr()
87{
88 _pAttrTable = 0;
89 _loaded = false;
90}
91
92AttrTable*
94{
95 if (!_loaded) {
96 const_cast<AttrTableLazyPtr*>(this)->loadAndSetAttrTable();
97 }
98 return _pAttrTable;
99}
100
101void AttrTableLazyPtr::set(AttrTable* pAT)
102{
103 _pAttrTable = pAT;
104 if (pAT) {
105 _loaded = true;
106 }
107 else {
108 _loaded = false;
109 }
110}
111
113{
114 // force it to load next get().
115 _pAttrTable = 0;
116 _loaded = false;
117}
118
119void AttrTableLazyPtr::loadAndSetAttrTable()
120{
121 set(0);
122 NetcdfElement* pDataset = _parser.getCurrentDataset();
123 if (pDataset) {
124 // The lazy load actually occurs in here
125 DDS* pDDS = pDataset->getDDS();
126 if (pDDS) {
127 set(&(pDDS->get_attr_table()));
128 _loaded = true;
129 }
130 }
131}
132
135
137 _filename(""), _loader(loader), _responseType(DDSLoader::eRT_RequestDDX), _response(0), _rootDataset(0), _currentDataset(
138 0), _pVar(0), _pCurrentTable(*this, 0), _elementStack(), _scope(), _namespaceStack(), _pOtherXMLParser(0), _currentParseLine(
139 NO_CURRENT_PARSE_LINE_NUMBER)
140{
141 BESDEBUG(MODULE, prolog << "Created NCMLParser." << endl);
142}
143
144NCMLParser::~NCMLParser()
145{
146 // clean other stuff up
147 cleanup();
148}
149
151{
152 // Parse into a newly created object.
154
155 // Parse into the response. We still got it in the unique_ptr in this scope, so we're safe
156 // on exception since the unique_ptr in this func will cleanup the memory.
157 parseInto(ncmlFilename, responseType, response.get());
158
159 // Relinquish it to the caller
160 return response;
161}
162
163void NCMLParser::parseInto(const string& ncmlFilename, DDSLoader::ResponseType responseType, BESDapResponse* response)
164{
165 BES_STOPWATCH_START(MODULE, prolog + "Timer");
166
167 VALID_PTR(response);
168 NCML_ASSERT_MSG(DDSLoader::checkResponseIsValidType(responseType, response),
169 "NCMLParser::parseInto: got wrong response object for given type.");
170
171 _responseType = responseType;
172 _response = response;
173
174 if (parsing()) {
175 THROW_NCML_INTERNAL_ERROR("Illegal Operation: NCMLParser::parse called while already parsing!");
176 }
177
178 BESDEBUG(MODULE, prolog << "Beginning NcML parse of file=" << ncmlFilename << endl);
179
180 // In case we care.
181 _filename = ncmlFilename;
182
183 // Invoke the libxml sax parser
184 SaxParserWrapper parser(*this);
185
186 parser.parse(ncmlFilename);
187
188 // Prepare for a new parse, making sure it's all cleaned up (with the exception of the _ddsResponse
189 // which where's about to send off)
190 resetParseState();
191
192 // we're done with it.
193 _response = 0;
194}
195
197{
198 return !_filename.empty();
199}
200
202{
203 return _currentParseLine;
204}
205
208{
209 return _namespaceStack;
210}
211
212void NCMLParser::onStartDocument()
213{
214 BESDEBUG(MODULE, prolog << "onStartDocument." << endl);
215}
216
217void NCMLParser::onEndDocument()
218{
219 BESDEBUG(MODULE, prolog << "onEndDocument." << endl);
220}
221
222void NCMLParser::onStartElement(const std::string& name, const XMLAttributeMap& attrs)
223{
224 // If we have a proxy set for OtherXML, pass calls there.
225 if (isParsingOtherXML()) {
226 VALID_PTR(_pOtherXMLParser);
227 _pOtherXMLParser->onStartElement(name, attrs);
228 }
229 else // Otherwise do the standard NCML parse
230 {
231 processStartNCMLElement(name, attrs);
232 }
233}
234
235// Local helper for below...
236// Sees whether we are closing the element on top
237// of the NCMLElement stack and that we're not parsing
238// OtherXML, or if we are that its depth is now zero.
239static bool shouldStopOtherXMLParse(NCMLElement* top, const string& closingElement, OtherXMLParser& rProxyParser)
240{
241 // If the stack top element name is the same as the element we are closing...
242 // and the parse depth is 0, then we're done.
243 // We MUST check the parse depth in case the other XML has an Attribute in it!
244 // We want to be sure we're closing the right one.
245 if (top->getTypeName() == closingElement && rProxyParser.getParseDepth() == 0) {
246 return true;
247 }
248 else // we're not done.
249 {
250 return false;
251 }
252}
253
254void NCMLParser::onEndElement(const std::string& name)
255{
256 NCMLElement* elt = getCurrentElement();
257 VALID_PTR(elt);
258
259 // First, handle the OtherXML proxy parsing case
260 if (isParsingOtherXML()) {
261 VALID_PTR(_pOtherXMLParser);
262 // If we're closing the element that caused the OtherXML parse...
263 if (shouldStopOtherXMLParse(elt, name, *_pOtherXMLParser)) {
264 // Then we want to clear the proxy from this and
265 // call the end on the top of the element stack.
266 // We assume it has access to the OtherXML parser
267 // and will use the data.
268 _pOtherXMLParser = 0;
269 processEndNCMLElement(name);
270 }
271 else {
272 // Pass through to proxy
273 _pOtherXMLParser->onEndElement(name);
274 }
275 }
276 else // Do the regular NCMLElement call.
277 {
278 // Call the regular NCMLElement end element.
279 processEndNCMLElement(name);
280 }
281}
282
283void NCMLParser::onStartElementWithNamespace(const std::string& localname, const std::string& prefix,
284 const std::string& uri, const XMLAttributeMap& attributes, const XMLNamespaceMap& namespaces)
285{
286 // If we have a proxy set for OtherXML, pass calls there.
287 if (isParsingOtherXML()) {
288 VALID_PTR(_pOtherXMLParser);
289 _pOtherXMLParser->onStartElementWithNamespace(localname, prefix, uri, attributes, namespaces);
290 }
291 else // Otherwise do the standard NCML parse
292 // but keep the namespaces on the stack. We don't do this for OtherXML.
293 {
294 _namespaceStack.push(namespaces);
295 processStartNCMLElement(localname, attributes);
296 }
297}
298
299void NCMLParser::onEndElementWithNamespace(const std::string& localname, const std::string& prefix,
300 const std::string& uri)
301{
302 NCMLElement* elt = getCurrentElement();
303 VALID_PTR(elt);
304
305 // First, handle the OtherXML proxy parsing case
306 if (isParsingOtherXML()) {
307 VALID_PTR(_pOtherXMLParser);
308 // If we're closing the element that caused the OtherXML parse...
309 if (shouldStopOtherXMLParse(elt, localname, *_pOtherXMLParser)) {
310 // Then we want to clear the proxy from this and
311 // call the end on the top of the element stack.
312 // We assume it has access to the OtherXML parser
313 // and will use the data.
314 _pOtherXMLParser = 0;
315 processEndNCMLElement(localname);
316 }
317 else {
318 // Pass through to proxy
319 _pOtherXMLParser->onEndElementWithNamespace(localname, prefix, uri);
320 }
321 }
322 else // Do the regular NCMLElement call.
323 {
324 // Call the regular NCMLElement end element.
325 processEndNCMLElement(localname);
326 _namespaceStack.pop();
327 }
328}
329
330void NCMLParser::onCharacters(const std::string& content)
331{
332 // If we're parsing OtherXML, send the call to the proxy.
333 if (isParsingOtherXML()) {
334 VALID_PTR(_pOtherXMLParser);
335 _pOtherXMLParser->onCharacters(content);
336 }
337 else // Standard NCML parse
338 {
339 // If we got an element on the stack, hand it off. Otherwise, do nothing.
340 NCMLElement* elt = getCurrentElement();
341 if (elt) {
342 elt->handleContent(content);
343 }
344 }
345}
346
347void NCMLParser::onParseWarning(std::string msg)
348{
349 // TODO We may want to make a flag for considering warnings errors as well.
350 BESDEBUG(MODULE, prolog << "PARSE WARNING: LibXML msg={" << msg << "}. Attempting to continue parse." << endl);
351}
352
353void NCMLParser::onParseError(std::string msg)
354{
355 // Pretty much have to give up on malformed XML.
356 THROW_NCML_PARSE_ERROR(getParseLineNumber(), "libxml SAX2 parser error! msg={" + msg + "} Terminating parse!");
357}
358
360{
361 _currentParseLine = line;
362 // BESDEBUG(MODULE, prolog << "******** Now parsing line: " << line << endl);
363}
364
366// Non-public Implemenation
367
368bool NCMLParser::isScopeAtomicAttribute() const
369{
370 return (!_scope.empty()) && (_scope.topType() == ScopeStack::ATTRIBUTE_ATOMIC);
371}
372
373bool NCMLParser::isScopeAttributeContainer() const
374{
375 return (!_scope.empty()) && (_scope.topType() == ScopeStack::ATTRIBUTE_CONTAINER);
376}
377
378bool NCMLParser::isScopeSimpleVariable() const
379{
380 return (!_scope.empty()) && (_scope.topType() == ScopeStack::VARIABLE_ATOMIC);
381}
382
383bool NCMLParser::isScopeCompositeVariable() const
384{
385 return (!_scope.empty()) && (_scope.topType() == ScopeStack::VARIABLE_CONSTRUCTOR);
386}
387
388bool NCMLParser::isScopeVariable() const
389{
390 return (isScopeSimpleVariable() || isScopeCompositeVariable());
391}
392
393bool NCMLParser::isScopeGlobal() const
394{
395 return withinNetcdf() && _scope.empty();
396}
397
398// TODO Clean up these next two calls with a parser state or something....
399// Dynamic casting all the time isn't super fast or clean if not needed...
400bool NCMLParser::isScopeNetcdf() const
401{
402 // see if the last thing parsed was <netcdf>
403 return (!_elementStack.empty() && dynamic_cast<NetcdfElement*>(_elementStack.back()));
404}
405
406bool NCMLParser::isScopeAggregation() const
407{
408 // see if the last thing parsed was <netcdf>
409 return (!_elementStack.empty() && dynamic_cast<AggregationElement*>(_elementStack.back()));
410}
411
412bool NCMLParser::withinNetcdf() const
413{
414 return _currentDataset != 0;
415}
416
417bool NCMLParser::withinVariable() const
418{
419 return withinNetcdf() && _pVar;
420}
421
422agg_util::DDSLoader&
423NCMLParser::getDDSLoader() const
424{
425 return _loader;
426}
427
429NCMLParser::getCurrentDataset() const
430{
431 return _currentDataset;
432}
433
435NCMLParser::getRootDataset() const
436{
437 return _rootDataset;
438}
439
440DDS*
441NCMLParser::getDDSForCurrentDataset() const
442{
443 NetcdfElement* dataset = getCurrentDataset();
444 NCML_ASSERT_MSG(dataset, "getDDSForCurrentDataset() called when we're not processing a <netcdf> location!");
445 return dataset->getDDS();
446}
447
448void NCMLParser::pushCurrentDataset(NetcdfElement* dataset)
449{
450 VALID_PTR(dataset);
451 // The first one we get is the root It's special!
452 // We tell it to use the top level response object for the
453 // parser, since that's what ultimately is returned
454 // and we don't want the root making its own we need to copy.
455 bool thisIsRoot = !_rootDataset;
456 if (thisIsRoot) {
457 _rootDataset = dataset;
458 VALID_PTR(_response);
459 _rootDataset->borrowResponseObject(_response);
460 }
461 else {
462 addChildDatasetToCurrentDataset(dataset);
463 }
464
465 // Also invalidates the AttrTable so it gets cached again.
466 setCurrentDataset(dataset);
467
468 // TODO: What do we do with the scope stack for a nested dataset?!
469}
470
471void NCMLParser::popCurrentDataset(NetcdfElement* dataset)
472{
473 if (dataset && dataset != _currentDataset) {
474 THROW_NCML_INTERNAL_ERROR(
475 "NCMLParser::popCurrentDataset(): the dataset we expect on the top of the stack is not correct!");
476 }
477
478 dataset = getCurrentDataset();
479 VALID_PTR(dataset);
480
481 // If it's the root, we're done and need to clear up the state.
482 if (dataset == _rootDataset) {
483 _rootDataset->unborrowResponseObject(_response);
484 _rootDataset = 0;
485 setCurrentDataset(0);
486 }
487 else {
488 // If it's not the root, it should have a parent, so go get it and make that the new current.
489 NetcdfElement* parentDataset = dataset->getParentDataset();
490 NCML_ASSERT_MSG(parentDataset, "NCMLParser::popCurrentDataset() got non-root dataset, but it had no parent!!");
491 setCurrentDataset(parentDataset);
492 }
493}
494
495void NCMLParser::setCurrentDataset(NetcdfElement* dataset)
496{
497 if (dataset) {
498 // Make sure it's state is ready to go with operations before making it current
499 NCML_ASSERT(dataset->isValid());
500 _currentDataset = dataset;
501 // We don't set the current attr table, rather it is lazy eval
502 // from getCurrentAttrTable() only if called. This call tells it to do that.
503 _pCurrentTable.invalidate();
504
505 // UNLESS it's the root dataset, which we want to force to load
506 // since a passthrough file will generate an empty metadata set otherwise
507 // since the table is never requested.
508 if (_currentDataset == _rootDataset) {
509 // Force it to cache so we actually laod the metadata for the root set.
510 // Chidl sets are aggregations so we don't load those unless needed.
511 _pCurrentTable.set(_pCurrentTable.get());
512 }
513 }
514 else {
515 BESDEBUG(MODULE, prolog << "NCMLParser::setCurrentDataset(): setting to NULL..." << endl);
516 _currentDataset = 0;
517 _pCurrentTable.invalidate();
518 }
519}
520
521void NCMLParser::addChildDatasetToCurrentDataset(NetcdfElement* dataset)
522{
523 VALID_PTR(dataset);
524
525 AggregationElement* agg = _currentDataset->getChildAggregation();
526 if (!agg) {
527 THROW_NCML_INTERNAL_ERROR(
528 "NCMLParser::addChildDatasetToCurrentDataset(): current dataset has no aggregation element! We can't add it!");
529 }
530
531 // This will add as strong ref to dataset from agg (child) and a weak to agg from dataset (parent)
532 agg->addChildDataset(dataset);
533
534 // Force the dataset to create an internal response object for the request type we're processing
535 dataset->createResponseObject(_responseType);
536}
537
538bool NCMLParser::parsingDataRequest() const
539{
540 const BESDataDDSResponse* const pDataDDSResponse = dynamic_cast<const BESDataDDSResponse* const >(_response);
541 return (pDataDDSResponse);
542}
543
544void NCMLParser::loadLocation(const std::string& location, agg_util::DDSLoader::ResponseType responseType,
545 BESDapResponse* response)
546{
547 VALID_PTR(response);
548 _loader.loadInto(location, responseType, response);
549}
550
551void NCMLParser::resetParseState()
552{
553 _filename = "";
554 _pVar = 0;
555 _pCurrentTable.set(0);
556
557 _scope.clear();
558
559 // Not that this matters...
560 _responseType = DDSLoader::eRT_RequestDDX;
561
562 // We never own the memory in this, so just clear it.
563 _response = 0;
564
565 // We don't own these either.
566 _rootDataset = 0;
567 _currentDataset = 0;
568
569 // Cleanup any memory in the _elementStack
570 clearElementStack();
571
572 _namespaceStack.clear();
573
574 // just in case
575 _loader.cleanup();
576
577 // In case we had one, null it. The setter is in charge of the memory.
578 _pOtherXMLParser = 0;
579}
580
581bool NCMLParser::isNameAlreadyUsedAtCurrentScope(const std::string& name)
582{
583 return (getVariableInCurrentVariableContainer(name) || attributeExistsAtCurrentScope(name));
584}
585
586BaseType*
587NCMLParser::getVariableInCurrentVariableContainer(const string& name)
588{
589 return getVariableInContainer(name, _pVar);
590}
591
592BaseType*
593NCMLParser::getVariableInContainer(const string& varName, BaseType* pContainer)
594{
595 // BaseType::btp_stack varContext;
596 if (pContainer) {
597 // @@@ Old code... recurses and uses dots as field separators... Not good.
598 //return pContainer->var(varName, varContext);
599 // It has to be a Constructor!
600 Constructor* pCtor = dynamic_cast<Constructor*>(pContainer);
601 if (!pCtor) {
602 BESDEBUG(MODULE,
603 "WARNING: NCMLParser::getVariableInContainer: " "Expected a BaseType of subclass Constructor, but didn't get it!" << endl);
604 return 0;
605 }
606 else {
608 }
609 }
610 else {
611 return getVariableInDDS(varName);
612 }
613}
614
615// Not that this should take a fully qualified one too, but without a scoping operator (.) it will
616// just search the top level variables.
617BaseType*
618NCMLParser::getVariableInDDS(const string& varName)
619{
620 // BaseType::btp_stack varContext;
621 // return getDDSForCurrentDataset()->var(varName, varContext);
622 DDS* pDDS = getDDSForCurrentDataset();
623 if (pDDS) {
625 }
626 else {
627 return 0;
628 }
629}
630
631void NCMLParser::addCopyOfVariableAtCurrentScope(BaseType& varTemplate)
632{
633 // make sure the name is free
634 if (isNameAlreadyUsedAtCurrentScope(varTemplate.name())) {
635 THROW_NCML_PARSE_ERROR(getParseLineNumber(), "NCMLParser::addNewVariableAtCurrentScope:"
636 " Cannot add variable since a variable or attribute of the same name exists at current scope."
637 " Name= " + varTemplate.name());
638 }
639
640 // Also an internal error if the caller tries it.
641 if (!(isScopeCompositeVariable() || isScopeGlobal())) {
642 THROW_NCML_INTERNAL_ERROR(
643 "NCMLParser::addNewVariableAtCurrentScope: current scope not valid for adding variable. Scope="
644 + getTypedScopeString());
645 }
646
647 // OK, we know we can add it now. But to what?
648 if (_pVar) // Constructor variable
649 {
650 NCML_ASSERT_MSG(_pVar->is_constructor_type(), "Expected _pVar is a container type!");
651 _pVar->add_var(&varTemplate);
652 }
653 else // Top level DDS for current dataset
654 {
655 BESDEBUG(MODULE,
656 "Adding new variable to DDS top level. Variable name=" << varTemplate.name() << " and typename=" << varTemplate.type_name() << endl);
657 DDS* pDDS = getDDSForCurrentDataset();
658 pDDS->add_var(&varTemplate);
659 }
660}
661
662void NCMLParser::deleteVariableAtCurrentScope(const string& name)
663{
664 if (!(isScopeCompositeVariable() || isScopeGlobal())) {
665 THROW_NCML_INTERNAL_ERROR(
666 "NCMLParser::deleteVariableAtCurrentScope called when we do not have a variable container at current scope!");
667 }
668
669 if (_pVar) // In container?
670 {
671 // Given interfaces, unfortunately it needs to be a Structure or we can't do this operation.
672 Structure* pVarContainer = dynamic_cast<Structure*>(_pVar);
673 if (!pVarContainer) {
674 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
675 "NCMLParser::deleteVariableAtCurrentScope called with _pVar not a "
676 "Structure class variable! "
677 "We can only delete variables from top DDS or within a Structure now. scope="
678 + getTypedScopeString());
679 }
680 // First, make sure it exists so we can warn if not. The call fails silently.
681 BaseType* pToBeNuked = pVarContainer->var(name);
682 if (!pToBeNuked) {
683 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
684 "Tried to remove variable from a Structure, but couldn't find the variable with name=" + name
685 + "at scope=" + getScopeString());
686 }
687 // Silently fails, so assume it worked.
688 pVarContainer->del_var(name);
689 }
690 else // Global
691 {
692 // we better have a DDS if we get here!
693 DDS* pDDS = getDDSForCurrentDataset();
694 VALID_PTR(pDDS);
695 pDDS->del_var(name);
696 }
697}
698
699BaseType*
700NCMLParser::getCurrentVariable() const
701{
702 return _pVar;
703}
704
705void NCMLParser::setCurrentVariable(BaseType* pVar)
706{
707 _pVar = pVar;
708 if (pVar) // got a variable
709 {
710 setCurrentAttrTable(&(pVar->get_attr_table()));
711 }
712 else if (getDDSForCurrentDataset()) // null pvar but we have a dds, use global table
713 {
714 DDS* dds = getDDSForCurrentDataset();
715 setCurrentAttrTable(&(dds->get_attr_table()));
716 }
717 else // just clear it out, no context
718 {
719 setCurrentAttrTable(0);
720 }
721}
722
723bool NCMLParser::typeCheckDAPVariable(const BaseType& var, const string& expectedType)
724{
725 // Match all types.
726 if (expectedType.empty()) {
727 return true;
728 }
729 else {
730 // If the type specifies a Structure, it better be a Constructor type.
731 if (expectedType == STRUCTURE_TYPE) {
732 // Calls like is_constructor_type really should be const...
733 BaseType& varSemanticConst = const_cast<BaseType&>(var);
734 return varSemanticConst.is_constructor_type();
735 }
736 else {
737 return (var.type_name() == expectedType);
738 }
739 }
740}
741
742AttrTable*
743NCMLParser::getCurrentAttrTable() const
744{
745 // will load the DDS of current dataset if required.
746 // The end result of calling AttrTableLazyPtr::get() is that the NCMLParser
747 // field '_pAttrTable' points to the DDS' AttrTable.
748 return _pCurrentTable.get();
749}
750
751void NCMLParser::setCurrentAttrTable(AttrTable* pAT)
752{
753 _pCurrentTable.set(pAT);
754}
755
756AttrTable*
757NCMLParser::getGlobalAttrTable() const
758{
759 AttrTable* pAT = 0;
760 DDS* pDDS = getDDSForCurrentDataset();
761 if (pDDS) {
762 pAT = &(pDDS->get_attr_table());
763 }
764 return pAT;
765}
766
767bool NCMLParser::attributeExistsAtCurrentScope(const string& name) const
768{
769 // Lookup the given attribute in the current table.
770 AttrTable::Attr_iter attr;
771 bool foundIt = findAttribute(name, attr);
772 return foundIt;
773}
774
775bool NCMLParser::findAttribute(const string& name, AttrTable::Attr_iter& attr) const
776{
777 AttrTable* pAT = getCurrentAttrTable();
778 if (pAT) {
779 attr = pAT->simple_find(name);
780 return (attr != pAT->attr_end());
781 }
782 else {
783 return false;
784 }
785}
786
787int NCMLParser::tokenizeAttrValues(vector<string>& tokens, const string& values, const string& dapAttrTypeName,
788 const string& separator)
789{
790 // Convert the type string into a DAP AttrType to be sure
791 AttrType dapType = String_to_AttrType(dapAttrTypeName);
792 if (dapType == Attr_unknown) {
793 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
794 "Attempting to tokenize attribute value failed since"
795 " we found an unknown internal DAP type=" + dapAttrTypeName
796 + " for the current fully qualified attribute=" + _scope.getScopeString());
797 }
798
799 // If we're valid type, tokenize us according to type.
800 int numTokens = tokenizeValuesForDAPType(tokens, values, dapType, separator);
801 if (numTokens == 0 && ((dapType == Attr_string) || (dapType == Attr_url) || (dapType == Attr_other_xml))) {
802 tokens.push_back(""); // 0 tokens will cause a problem later, so push empty string!
803 }
804
805 // Now type check the tokens are valid strings for the type.
807
808#if DEBUG_NCML_PARSER_INTERNALS
809
810 if (separator != NCMLUtil::WHITESPACE) {
811 BESDEBUG(MODULE, prolog << "Got non-default separators for tokenize. separator=\"" << separator << "\"" << endl);
812 }
813
814 string msg;
815 for (unsigned int i = 0; i < tokens.size(); i++) {
816 if (i > 0) {
817 msg += ",";
818 }
819 msg += "\"";
820 msg += tokens[i];
821 msg += "\"";
822 }
823 BESDEBUG(MODULE, prolog << "Tokenize got " << numTokens << " tokens:\n" << msg << endl);
824
825#endif // DEBUG_NCML_PARSER_INTERNALS
826
827 return numTokens;
828}
829
830int NCMLParser::tokenizeValuesForDAPType(vector<string>& tokens, const string& values, AttrType dapType,
831 const string& separator)
832{
833 tokens.resize(0); // Start empty.
834 int numTokens = 0;
835
836 if (dapType == Attr_unknown) {
837 // Do out best to recover....
838 BESDEBUG(MODULE,
839 "Warning: tokenizeValuesForDAPType() got unknown DAP type! Attempting to continue..." << endl);
840 tokens.push_back(values);
841 numTokens = 1;
842 }
843 else if (dapType == Attr_container) {
844 // Not supposed to have values, just push empty string....
845 BESDEBUG(MODULE, prolog << "Warning: tokenizeValuesForDAPType() got container type, we should not have values!" << endl);
846 tokens.push_back("");
847 numTokens = 1;
848 }
849 else if (dapType == Attr_string) {
850 // Don't use whitespace as default separator for strings.
851 // If they explicitly set it, then fine.
852 // We don't trim strings either. All whitespace, trailing or leading, is left.
853 numTokens = NCMLUtil::tokenize(values, tokens, separator);
854 }
855 else // For all other atomic types, do a split on separator
856 {
857 // Use whitespace as default if sep not set
858 string sep = ((separator.empty()) ? (NCMLUtil::WHITESPACE) : (separator));
859 numTokens = NCMLUtil::tokenize(values, tokens, sep);
860 NCMLUtil::trimAll(tokens);
861 }
862 return numTokens;
863}
864
866
867// Used below to convert NcML data type to a DAP data type.
868typedef std::map<string, string> TypeConverter;
869
870// If true, we allow the specification of a DAP scalar type
871// in a location expecting an NcML type.
872static const bool ALLOW_DAP_TYPES_AS_NCML_TYPES = true;
873
874/*
875 * Causes a small memory leak that shows up in Valgrind but is ignored as the leak does not as grow since
876 * TypeConverter object is only allocated once per process. SBL 10.31.19
877 *
878 * Ncml DataType:
879 <xsd:enumeration value="char"/>
880 <xsd:enumeration value="byte"/>
881 <xsd:enumeration value="short"/>
882 <xsd:enumeration value="int"/>
883 <xsd:enumeration value="long"/>
884 <xsd:enumeration value="float"/>
885 <xsd:enumeration value="double"/>
886 <xsd:enumeration value="String"/>
887 <xsd:enumeration value="string"/>
888 <xsd:enumeration value="Structure"/>
889 */
890static TypeConverter* makeTypeConverter()
891{
892 TypeConverter* ptc = new TypeConverter();
893 TypeConverter& tc = *ptc;
894 // NcML to DAP conversions
895 tc["char"] = "Byte"; // char is a C char, let's use a Byte and special parse it as a char not numeric
896 tc["byte"] = "Int16"; // Since NcML byte's can be signed, we must promote them to not lose the sign bit.
897 tc["short"] = "Int16";
898 tc["int"] = "Int32";
899 tc["long"] = "Int32"; // not sure of this one
900 tc["float"] = "Float32";
901 tc["double"] = "Float64";
902 tc["string"] = "String"; // allow lower case.
903 tc["String"] = "String";
904 tc["Structure"] = "Structure";
905 tc["structure"] = "Structure"; // allow lower case for this as well
906
907 // If we allow DAP types to be specified directly,
908 // then make them be passthroughs in the converter...
909 if (ALLOW_DAP_TYPES_AS_NCML_TYPES) {
910 tc["Byte"] = "Byte"; // DAP Byte can fit in Byte tho, unlike NcML "byte"!
911 tc["Int16"] = "Int16";
912 tc["UInt16"] = "UInt16";
913 tc["Int32"] = "Int32";
914 tc["UInt32"] = "UInt32";
915 tc["Float32"] = "Float32";
916 tc["Float64"] = "Float64";
917 // allow both url cases due to old bug where "Url" is returned in dds rather then DAP2 spec "URL"
918 tc["Url"] = "URL";
919 tc["URL"] = "URL";
920 tc["OtherXML"] = "OtherXML"; // Pass it through
921 }
922
923 return ptc;
924}
925
926// Singleton
927static const TypeConverter& getTypeConverter()
928{
929 static TypeConverter* singleton = 0;
930 if (!singleton) {
931 singleton = makeTypeConverter();
932 }
933 return *singleton;
934}
935
936#if 0 // Unused right now... might be later, but I hate compiler warnings.
937// Is the given type a DAP type?
938static bool isDAPType(const string& type)
939{
940 return (String_to_AttrType(type) != Attr_unknown);
941}
942#endif // 0
943
944/* static */
945string NCMLParser::convertNcmlTypeToCanonicalType(const string& ncmlType)
946{
947
948#if 0
949 // OLD WAY - Disallows attributes that do not specify type
950 NCML_ASSERT_MSG(!daType.empty(), "Logic error: convertNcmlTypeToCanonicalType disallows empty() input.");
951#endif
952
953 // NEW WAY - If the attribute does not specify a type them the type is defaulted to "string"
954 string daType = ncmlType;
955 if(daType.empty())
956 daType = "string";
957
958 const TypeConverter& tc = getTypeConverter();
959 TypeConverter::const_iterator it = tc.find(daType);
960
961 if (it == tc.end()) {
962 return ""; // error condition
963 }
964 else {
965 return it->second;
966 }
967}
968
969void NCMLParser::checkDataIsValidForCanonicalTypeOrThrow(const string& type, const vector<string>& tokens) const
970{
971 /* Byte
972 Int16
973 UInt16
974 Int32
975 UInt32
976 Float32
977 Float64
978 String
979 URL
980 OtherXML
981 */
982 bool valid = true;
984 vector<string>::const_iterator endIt = tokens.end();
985 for (it = tokens.begin(); it != endIt; ++it) {
986 if (type == "Byte") {
987 valid &= check_byte(it->c_str());
988 }
989 else if (type == "Int16") {
990 valid &= check_int16(it->c_str());
991 }
992 else if (type == "UInt16") {
993 valid &= check_uint16(it->c_str());
994 }
995 else if (type == "Int32") {
996 valid &= check_int32(it->c_str());
997 }
998 else if (type == "UInt32") {
999 valid &= check_uint32(it->c_str());
1000 }
1001 else if (type == "Float32") {
1002 valid &= check_float32(it->c_str());
1003 }
1004 else if (type == "Float64") {
1005 valid &= check_float64(it->c_str());
1006 }
1007 // Doh! The DAP2 specifies case as "URL" but internally libdap uses "Url" Allow both...
1008 else if (type == "URL" || type == "Url" || type == "String") {
1009 // TODO the DAP call check_url is currently a noop. do we want to check for well-formed URL?
1010 // This isn't an NcML type now, so straight up NcML users might enter URL as String anyway.
1011 valid &= (it->size() <= MAX_DAP_STRING_SIZE);
1012 if (!valid) {
1013 std::stringstream msg;
1014 msg << "Invalid Value: The " << type << " attribute value (not shown) exceeded max string length of "
1015 << MAX_DAP_STRING_SIZE << " at scope=" << _scope.getScopeString() << endl;
1016 THROW_NCML_PARSE_ERROR(getParseLineNumber(), msg.str());
1017 }
1018
1019 valid &= NCMLUtil::isAscii(*it);
1020 if (!valid) {
1021 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1022 "Invalid Value: The " + type + " attribute value (not shown) has an invalid non-ascii character.");
1023 }
1024 }
1025
1026 // For OtherXML, there's nothing to check so just say it's OK.
1027 // The SAX parser checks it for wellformedness already,
1028 // but ultimately it's just an arbitrary string...
1029 else if (type == "OtherXML") {
1030 valid &= true;
1031 }
1032
1033 else {
1034 // We probably shouldn't get here, but...
1035 THROW_NCML_INTERNAL_ERROR("checkDataIsValidForCanonicalType() got unknown data type=" + type);
1036 }
1037
1038 // Early throw so we know which token it was.
1039 if (!valid) {
1040 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1041 "Invalid Value given for type=" + type + " with value=" + (*it)
1042 + " was invalidly formed or out of range" + _scope.getScopeString());
1043 }
1044 }
1045 // All is good if we get here.
1046}
1047
1048void NCMLParser::clearAllAttrTables(DDS* dds)
1049{
1050 if (!dds) {
1051 return;
1052 }
1053
1054 // Blow away the global attribute table.
1055 dds->get_attr_table().erase();
1056
1057 // Hit all variables, recursing on containers.
1058 for (DDS::Vars_iter it = dds->var_begin(); it != dds->var_end(); ++it) {
1059 // this will clear not only *it's table, but it's children if it's composite.
1060 clearVariableMetadataRecursively(*it);
1061 }
1062}
1063
1064void NCMLParser::clearVariableMetadataRecursively(BaseType* var)
1065{
1066 VALID_PTR(var);
1067 // clear the table
1068 var->get_attr_table().erase();
1069
1070 if (var->is_constructor_type()) {
1071 Constructor *compositeVar = dynamic_cast<Constructor*>(var);
1072 if (!compositeVar) {
1073 THROW_NCML_INTERNAL_ERROR(
1074 "clearVariableMetadataRecursively: Unexpected cast error on dynamic_cast<Constructor*>");
1075 }
1076 for (Constructor::Vars_iter it = compositeVar->var_begin(); it != compositeVar->var_end(); ++it) {
1077 clearVariableMetadataRecursively(*it);
1078 }
1079 }
1080}
1081
1082void NCMLParser::enterScope(const string& name, ScopeStack::ScopeType type)
1083{
1084 _scope.push(name, type);
1085 BESDEBUG(MODULE, prolog << "Entering scope: " << _scope.top().getTypedName() << endl);
1086 BESDEBUG(MODULE, prolog << "New scope=\"" << _scope.getScopeString() << "\"" << endl);
1087}
1088
1089void NCMLParser::exitScope()
1090{
1091 NCML_ASSERT_MSG(!_scope.empty(), "Logic Error: Scope Stack Underflow!");
1092 BESDEBUG(MODULE, prolog << "Exiting scope " << _scope.top().getTypedName() << endl);
1093 _scope.pop();
1094 BESDEBUG(MODULE, prolog << "New scope=\"" << _scope.getScopeString() << "\"" << endl);
1095}
1096
1097void NCMLParser::printScope() const
1098{
1099 BESDEBUG(MODULE, prolog << "Scope=\"" << _scope.getScopeString() << "\"" << endl);
1100}
1101
1102string NCMLParser::getScopeString() const
1103{
1104 return _scope.getScopeString();
1105}
1106
1107string NCMLParser::getTypedScopeString() const
1108{
1109 return _scope.getTypedScopeString();
1110}
1111
1112int NCMLParser::getScopeDepth() const
1113{
1114 return _scope.size();
1115}
1116void NCMLParser::pushElement(NCMLElement* elt)
1117{
1118 VALID_PTR(elt);
1119 _elementStack.push_back(elt);
1120 elt->ref(); // up the count!
1121}
1122
1123void NCMLParser::popElement()
1124{
1125 NCMLElement* elt = _elementStack.back();
1126 _elementStack.pop_back();
1127
1128 // Keep the toString around if we plan to nuke him
1129 string infoOnDeletedDude = ((elt->getRefCount() == 1) ? (elt->toString()) : (string("")));
1130
1131 // Drop the ref count. If that forced a delete, print out the saved string.
1132 if (elt->unref() == 0) {
1133 BESDEBUG("ncml:memory",
1134 "NCMLParser::popElement: ref count hit 0 so we deleted element=" << infoOnDeletedDude << endl);
1135 }
1136}
1137
1139NCMLParser::getCurrentElement() const
1140{
1141 if (_elementStack.empty()) {
1142 return 0;
1143 }
1144 else {
1145 return _elementStack.back();
1146 }
1147}
1148
1149void NCMLParser::clearElementStack()
1150{
1151 while (!_elementStack.empty()) {
1152 NCMLElement* elt = _elementStack.back();
1153 _elementStack.pop_back();
1154 // unref() them... The Factory will take care of dangling memory...
1155 elt->unref();
1156 }
1157 _elementStack.resize(0);
1158}
1159
1160void NCMLParser::processStartNCMLElement(const std::string& name, const XMLAttributeMap& attrs)
1161{
1162 // Store it in a shared ptr in case this function exceptions before we store it in the element stack.
1163 RCPtr<NCMLElement> elt = _elementFactory.makeElement(name, attrs, *this);
1164
1165 // If we actually created an element of the given type name
1166 if (elt.get()) {
1167 elt->handleBegin();
1168 // tell the container to push the raw element, which will also ref() it on success
1169 // otherwise ~RCPtr will unref() to 0 and thus nuke it on exception.
1170 pushElement(elt.get());
1171 }
1172 else // Unknown element...
1173 {
1174 if (sThrowExceptionOnUnknownElements) {
1175 THROW_NCML_PARSE_ERROR(getParseLineNumber(),
1176 "Unknown element type=" + name + " found in NcML parse with scope=" + _scope.getScopeString());
1177 }
1178 else {
1179 BESDEBUG(MODULE, prolog << "Start of <" << name << "> element. Element unsupported, ignoring." << endl);
1180 }
1181 }
1182}
1183
1184void NCMLParser::processEndNCMLElement(const std::string& name)
1185{
1186 NCMLElement* elt = getCurrentElement();
1187 VALID_PTR(elt);
1188
1189 // If it matches the one on the top of the stack, then process and pop.
1190 if (elt->getTypeName() == name) {
1191 elt->handleEnd();
1192 popElement(); // handles delete
1193 }
1194 else // the names don't match, so just ignore it.
1195 {
1196 BESDEBUG(MODULE, prolog << "End of <" << name << "> element unsupported currently, ignoring." << endl);
1197 }
1198}
1199
1200const DimensionElement*
1201NCMLParser::getDimensionAtLexicalScope(const string& dimName) const
1202{
1203 const DimensionElement* ret = 0;
1204 if (getCurrentDataset()) {
1205 ret = getCurrentDataset()->getDimensionInFullScope(dimName);
1206 }
1207 return ret;
1208}
1209
1210string NCMLParser::printAllDimensionsAtLexicalScope() const
1211{
1212 string ret("");
1213 NetcdfElement* dataset = getCurrentDataset();
1214 while (dataset) {
1215 ret += dataset->printDimensions();
1216 dataset = dataset->getParentDataset();
1217 }
1218 return ret;
1219}
1220
1221void NCMLParser::enterOtherXMLParsingState(OtherXMLParser* pOtherXMLParser)
1222{
1223 BESDEBUG(MODULE, prolog << "Entering state for parsing OtherXML!" << endl);
1224 _pOtherXMLParser = pOtherXMLParser;
1225}
1226
1227bool NCMLParser::isParsingOtherXML() const
1228{
1229 return _pOtherXMLParser;
1230}
1231
1232void NCMLParser::cleanup()
1233{
1234 // The only memory we own is the _response, which is in an unique_ptr so will
1235 // either be returned to caller in parse() and cleared, or else
1236 // delete'd by our dtor via unique_ptr
1237
1238 // All other objects point into _response temporarily, so nothing to destroy there.
1239
1240 // Just for completeness.
1241 resetParseState();
1242}
1243
1244} // namespace ncml_module
1245
Represents an OPeNDAP DAP response object within the BES.
static libdap::BaseType * getVariableNoRecurse(const libdap::DDS &dds, const std::string &name)
static std::unique_ptr< BESDapResponse > makeResponseForType(ResponseType type)
Definition DDSLoader.cc:427
static bool checkResponseIsValidType(ResponseType type, BESDapResponse *pResponse)
Definition DDSLoader.cc:466
void set(AttrTable *pAT)
AttrTable * get() const
Definition NCMLParser.cc:93
Base class for NcML element concrete classes.
Definition NCMLElement.h:61
virtual const std::string & getTypeName() const =0
virtual void handleContent(const std::string &content)
virtual void onParseWarning(std::string msg)
virtual void onEndElement(const std::string &name)
const XMLNamespaceStack & getXMLNamespaceStack() const
virtual void onParseError(std::string msg)
int getParseLineNumber() const
virtual void onStartElement(const std::string &name, const XMLAttributeMap &attrs)
static string convertNcmlTypeToCanonicalType(const string &ncmlType)
virtual void onStartElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri, const XMLAttributeMap &attributes, const XMLNamespaceMap &namespaces)
void checkDataIsValidForCanonicalTypeOrThrow(const string &type, const vector< string > &tokens) const
Make sure the given tokens are valid for the listed type. For example, makes sure floats are well for...
void parseInto(const string &ncmlFilename, agg_util::DDSLoader::ResponseType responseType, BESDapResponse *response)
Same as parse, but the response object to parse into is passed down by the caller rather than created...
static const string STRUCTURE_TYPE
Definition NCMLParser.h:577
virtual void onEndElementWithNamespace(const std::string &localname, const std::string &prefix, const std::string &uri)
std::unique_ptr< BESDapResponse > parse(const std::string &ncmlFilename, agg_util::DDSLoader::ResponseType type)
Parse the NcML filename, returning a newly allocated DDS response containing the underlying dataset t...
virtual void onCharacters(const std::string &content)
NCMLParser(agg_util::DDSLoader &loader)
Create a structure that can parse an NCML filename and returned a transformed response of requested t...
virtual void setParseLineNumber(int line)
static int tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters=" \t")
Definition NCMLUtil.cc:58
static const std::string WHITESPACE
Definition NCMLUtil.h:80
static void trimAll(std::vector< std::string > &tokens, const std::string &trimChars=WHITESPACE)
Definition NCMLUtil.cc:127
static bool isAscii(const std::string &str)
Definition NCMLUtil.cc:94
Concrete class for NcML <netcdf> element.
virtual const libdap::DDS * getDDS() const
Wrapper for libxml SAX parser C callbacks into C++.
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
STL class.
STL iterator class.
STL class.
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...