bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
SaxParserWrapper.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29
30#include "config.h"
31
32#include "SaxParserWrapper.h"
33
34#include <exception>
35#include <iostream>
36#include <libxml/parser.h>
37#include <libxml/xmlstring.h>
38#include <cstdio> // for vsnprintf
39#include <string>
40
41#include "BESDebug.h"
42#include "BESError.h"
43#include "BESInternalError.h"
44#include "BESInternalFatalError.h"
45#include "BESSyntaxUserError.h"
46#include "BESForbiddenError.h"
47#include "BESNotFoundError.h"
48#include "NCMLDebug.h"
49#include "SaxParser.h"
50#include "XMLHelpers.h"
51
52// Toggle to tell the parser to use the Sax2 start/end element
53// calls with namespace information.
54// [ TODO We probably want to remove the non-namespace pathways at some point,
55// but I will leave them here for now in case there's issues ]
56#define NCML_PARSER_USE_SAX2_NAMESPACES 1
57
58using namespace std;
59using namespace ncml_module;
60
62// Helpers
63
64#if NCML_PARSER_USE_SAX2_NAMESPACES
65static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
66static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
67{
68 attrMap.clear();
69 for (int i = 0; i < num_attributes; ++i) {
70 XMLAttribute attr;
71 attr.fromSAX2NamespaceAttributes(attributes);
72 attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
73 attrMap.addAttribute(attr);
74 }
75 return num_attributes;
76}
77#else
78// Assumes the non-namespace calls, so attrs is stride 2 {name,value}
79static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
80{
81 attrMap.clear();
82 int count=0;
83 while (attrs && *attrs != NULL)
84 {
85 XMLAttribute attr;
86 attr.localname = XMLUtil::xmlCharToString(*attrs);
87 attr.value = XMLUtil::xmlCharToString(*(attrs+1));
88 attrMap.addAttribute(attr);
89 attrs += 2;
90 count++;
91 }
92 return count;
93}
94#endif // NCML_PARSER_USE_SAX2_NAMESPACES
95
97// Callback we will register that just pass on to our C++ engine
98//
99// NOTE WELL: New C handlers need to follow the given
100// other examples in order to avoid memory leaks
101// in libxml during an exception!
102
103// To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
104// set up a proper error handling structure around the main call.
105// The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
106// So for example, a safe handler call to SaxParser would look like:
107// static void ncmlStartDocument(void* userData)
108//{
109// BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
110// parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
111// END_SAFE_HANDLER_CALL; // end the error handling wrapper
112//}
113
114#define BEGIN_SAFE_PARSER_BLOCK(argName) { \
115 SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
116 if (_spw_->isExceptionState()) \
117 { \
118 return; \
119 } \
120 else \
121 { \
122 try \
123 { \
124 SaxParser& parser = _spw_->getParser(); \
125 parser.setParseLineNumber(_spw_->getCurrentParseLine());
126
127// This is required after the end of the actual calls to the parser.
128#define END_SAFE_PARSER_BLOCK } \
129 catch (BESError& theErr) \
130 { \
131 BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
132 BESInternalError _badness_("ParseError: " + theErr.get_message() , theErr.get_file(), theErr.get_line());\
133 _spw_->deferException(_badness_); \
134 } \
135 catch (std::exception& ex) \
136 { \
137 BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
138 BESInternalError _badness_("ParseError: " + string(ex.what()), __FILE__, __LINE__);\
139 _spw_->deferException(_badness_); \
140 } \
141 catch (...) \
142 { \
143 BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
144 BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
145 _spw_->deferException(_badness_); \
146 } \
147 } \
148}
149
151// Our C SAX callbacks, wrapped carefully.
152
153static void ncmlStartDocument(void* userData)
154{
155 BEGIN_SAFE_PARSER_BLOCK(userData)
156
157 parser.onStartDocument();
158
159 END_SAFE_PARSER_BLOCK
160}
161
162static void ncmlEndDocument(void* userData)
163{
164 BEGIN_SAFE_PARSER_BLOCK(userData)
165
166 parser.onEndDocument();
167
168 END_SAFE_PARSER_BLOCK
169}
170
171#if !NCML_PARSER_USE_SAX2_NAMESPACES
172
173static void ncmlStartElement(void * userData,
174 const xmlChar * name,
175 const xmlChar ** attrs)
176{
177 // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
178 BEGIN_SAFE_PARSER_BLOCK(1)
179
180 string nameS = XMLUtil::xmlCharToString(name);
181 XMLAttributeMap map;
182 toXMLAttributeMapNoNamespaces(map, attrs);
183
184 // These args will be valid for the scope of the call.
185 parser.onStartElement(nameS, map);
186
187 END_SAFE_PARSER_BLOCK
188}
189
190static void ncmlEndElement(void * userData,
191 const xmlChar * name)
192{
193 BEGIN_SAFE_PARSER_BLOCK(1)
194
195 string nameS = XMLUtil::xmlCharToString(name);
196 parser.onEndElement(nameS);
197
198 END_SAFE_PARSER_BLOCK
199}
200#endif // !NCML_PARSER_USE_SAX2_NAMESPACES
201
202#if NCML_PARSER_USE_SAX2_NAMESPACES
203static
204void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
205 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
206 const xmlChar **attributes)
207{
208 // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
209 BEGIN_SAFE_PARSER_BLOCK(userData)
210
211 BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
212
213 XMLAttributeMap attrMap;
214 toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
215
216 XMLNamespaceMap nsMap;
217 nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
218
219 // These args will be valid for the scope of the call.
220 string localnameString = XMLUtil::xmlCharToString(localname);
221 string prefixString = XMLUtil::xmlCharToString(prefix);
222 string uriString = XMLUtil::xmlCharToString(URI);
223
224 parser.onStartElementWithNamespace(
225 localnameString,
226 prefixString,
227 uriString,
228 attrMap,
229 nsMap);
230
231 END_SAFE_PARSER_BLOCK
232}
233
234static
235void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
236{
237 BEGIN_SAFE_PARSER_BLOCK(userData)
238
239 string localnameString = XMLUtil::xmlCharToString(localname);
240 string prefixString = XMLUtil::xmlCharToString(prefix);
241 string uriString = XMLUtil::xmlCharToString(URI);
242 parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
243
244 END_SAFE_PARSER_BLOCK
245}
246#endif // NCML_PARSER_USE_SAX2_NAMESPACES
247
248static void ncmlCharacters(void* userData, const xmlChar* content, int len)
249{
250 BEGIN_SAFE_PARSER_BLOCK(userData)
251
252 // len is since the content string might not be null terminated,
253 // so we have to build out own and pass it up special....
254 // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
255 string characters("");
256 characters.reserve(len);
257 const xmlChar* contentEnd = content+len;
258 while(content != contentEnd)
259 {
260 characters += (const char)(*content++);
261 }
262
263 parser.onCharacters(characters);
264
265 END_SAFE_PARSER_BLOCK
266}
267
268static void ncmlWarning(void* userData, const char* msg, ...)
269{
270 BEGIN_SAFE_PARSER_BLOCK(userData)
271
272 BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
273
274 char buffer[1024];
275 va_list(args);
276 va_start(args, msg);
277 unsigned int len = sizeof(buffer);
278 vsnprintf(buffer, len, msg, args);
279 va_end(args);
280 parser.onParseWarning(string(buffer));
281
282 END_SAFE_PARSER_BLOCK
283}
284
285static void ncmlFatalError(void* userData, const char* msg, ...)
286{
287 BEGIN_SAFE_PARSER_BLOCK(userData)
288
289 BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
290
291 char buffer[1024];
292 va_list(args);
293 va_start(args, msg);
294 unsigned int len = sizeof(buffer);
295 vsnprintf(buffer, len, msg, args);
296 va_end(args);
297 parser.onParseError(string(buffer));
298
299 END_SAFE_PARSER_BLOCK
300}
301
303// class SaxParserWrapper impl
304
305SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
306 _parser(parser), _handler(), _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
307{
308}
309
310SaxParserWrapper::~SaxParserWrapper()
311{
312 // Really not much to do... everything cleans itself up.
313 _state = NOT_PARSING;
314
315 // Leak fix. jhrg 6/21/19
316 cleanupParser();
317}
318
319bool SaxParserWrapper::parse(const string& ncmlFilename)
320{
321 // It's illegal to call this until it's done.
322 if (_state == PARSING) {
323 throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
324 }
325
326 // OK, now we're parsing
327 _state = PARSING;
328
329 setupParser();
330
331 bool success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
332
333 // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
334 if (isExceptionState()) {
336 }
337
338 // Otherwise, we're also done parsing.
339 _state = NOT_PARSING;
340 return success;
341}
342
344{
345 _state = EXCEPTION;
346 _errorType = theErr.get_bes_error_type();
347 _errorMsg = theErr.get_message();
348 _errorLine = theErr.get_line();
349 _errorFile = theErr.get_file();
350}
351
352// HACK admittedly a little gross, but it's weird to have to copy an exception
353// and this seemed the safest way rather than making dynamic storage, etc.
355{
356 // Clear our state out so we can parse again though.
357 _state = NOT_PARSING;
358
359 switch (_errorType) {
360 case BES_INTERNAL_ERROR:
361 throw BESInternalError(_errorMsg, _errorFile, _errorLine);
362
363 case BES_INTERNAL_FATAL_ERROR:
364 throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
365
366 case BES_SYNTAX_USER_ERROR:
367 throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
368
369 case BES_FORBIDDEN_ERROR:
370 throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
371
372 case BES_NOT_FOUND_ERROR:
373 throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
374
375 default:
376 throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
377 }
378}
379
381{
382#if 0
383 if (_context) {
384 return xmlSAX2GetLineNumber(_context);
385 }
386 else {
387 return -1;
388 }
389#endif
390 return -1; //FIXME part of leak fix. jhrg 6.21.19
391}
392
393static void setAllHandlerCBToNulls(xmlSAXHandler& h)
394{
395 h.internalSubset = 0;
396 h.isStandalone = 0;
397 h.hasInternalSubset = 0;
398 h.hasExternalSubset = 0;
399 h.resolveEntity = 0;
400 h.getEntity = 0;
401 h.entityDecl = 0;
402 h.notationDecl = 0;
403 h.attributeDecl = 0;
404 h.elementDecl = 0;
405 h.unparsedEntityDecl = 0;
406 h.setDocumentLocator = 0;
407 h.startDocument = 0;
408 h.endDocument = 0;
409 h.startElement = 0;
410 h.endElement = 0;
411 h.reference = 0;
412 h.characters = 0;
413 h.ignorableWhitespace = 0;
414 h.processingInstruction = 0;
415 h.comment = 0;
416 h.warning = 0;
417 h.error = 0;
418 h.fatalError = 0;
419 h.getParameterEntity = 0;
420 h.cdataBlock = 0;
421 h.externalSubset = 0;
422
423 // unsigned int initialized; magic number the init should fill in
424 /* The following fields are extensions available only on version 2 */
425 // void *_private; //i'd assume i don't set this either...
426 h.startElementNs = 0;
427 h.endElementNs = 0;
428 h.serror = 0;
429}
430
431void SaxParserWrapper::setupParser()
432{
433 // setup the handler for version 2,
434 // which sets an internal version magic number
435 // into _handler.initialized
436 // but which doesn't clear the handlers to 0.
437 xmlSAXVersion(&_handler, 2);
438
439 // Initialize all handlers to 0 by hand to start
440 // so we don't blow those internal magic numbers.
441 setAllHandlerCBToNulls(_handler);
442
443 // Put our static functions into the handler
444 _handler.startDocument = ncmlStartDocument;
445 _handler.endDocument = ncmlEndDocument;
446 _handler.warning = ncmlWarning;
447 _handler.error = ncmlFatalError;
448 _handler.fatalError = ncmlFatalError;
449 _handler.characters = ncmlCharacters;
450
451 // We'll use one or the other until we're sure it works.
452#if NCML_PARSER_USE_SAX2_NAMESPACES
453 _handler.startElement = 0;
454 _handler.endElement = 0;
455 _handler.startElementNs = ncmlSax2StartElementNs;
456 _handler.endElementNs = ncmlSax2EndElementNs;
457#else
458 _handler.startElement = ncmlStartElement;
459 _handler.endElement = ncmlEndElement;
460 _handler.startElementNs = 0;
461 _handler.endElementNs = 0;
462#endif // NCML_PARSER_USE_SAX2_NAMESPACES
463}
464
465// Leak fix. jhrg 6/21/19
466void SaxParserWrapper::cleanupParser() noexcept
467{
468}
Base exception class for the BES with basic string message.
Definition BESError.h:66
unsigned int get_line() const
get the line number where the exception was thrown
Definition BESError.h:148
unsigned int get_bes_error_type() const
Return the return code for this error class.
Definition BESError.h:174
std::string get_file() const
get the file name where the exception was thrown
Definition BESError.h:140
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition SaxParser.h:48
void addAttribute(const XMLAttribute &attribute)
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition XMLHelpers.cc:94