libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
parser-util.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 // (c) COPYRIGHT URI/MIT 1995-1999
27 // Please read the full copyright statement in the file COPYRIGHT_URI.
28 //
29 // Authors:
30 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu>
31 
32 // These functions are utility functions used by the various DAP parsers (the
33 // DAS, DDS and constraint expression parsers).
34 // jhrg 9/7/95
35 
36 #include "config.h"
37 
38 #include <cerrno>
39 #include <cassert>
40 #include <cstring>
41 #include <cmath>
42 #include <cstdlib>
43 
44 #include <iostream>
45 #include <sstream>
46 
47 // We wrap VC++ 6.x strtod() to account for a short comming
48 // in that function in regards to "NaN".
49 #ifdef WIN32
50 #include <limits>
51 double w32strtod(const char *, char **);
52 #endif
53 
54 #include "Error.h"
55 #include "debug.h"
56 #include "parser.h" // defines constants such as ID_MAX
57 #include "dods-limits.h"
58 #include "util.h" // Jose Garcia: for append_long_to_string.
59 
60 using std::cerr;
61 using std::endl;
62 
63 #ifdef WIN32
64 // VC++ 6.x strtod() doesn't recognize "NaN". Account for it
65 // by wrapping it around a check for the Nan string. Use of
66 // the product is obsolete as of 1/2007, but it is unknown if
67 // the issue is still there in later releases of that product.
68 // ROM - 01/2007
69 double w32strtod(const char *val, char **ptr)
70 {
71  // Convert the two char arrays to compare to strings.
72  string *sval = new string(val);
73  string *snan = new string("NaN");
74 
75  // If val doesn't contain "NaN|Nan|nan|etc", use strtod as
76  // provided.
77  if (stricmp(sval->c_str(), snan->c_str()) != 0)
78  return (strtod(val, ptr));
79 
80  // But if it does, return the bit pattern for Nan and point
81  // the parsing ptr arg at the trailing '\0'.
82  *ptr = (char *) val + strlen(val);
83  return (std::numeric_limits < double >::quiet_NaN());
84 }
85 #endif
86 
87 namespace libdap {
88 
89 // Deprecated, but still used by the HDF4 EOS server code.
90 void
91 parse_error(parser_arg * arg, const char *msg, const int line_num,
92  const char *context)
93 {
94  // Jose Garcia
95  // This assert(s) is (are) only for developing purposes
96  // For production servers remove it by compiling with NDEBUG
97  assert(arg);
98  assert(msg);
99 
100  arg->set_status(FALSE);
101 
102  string oss = "";
103 
104  if (line_num != 0) {
105  oss += "Error parsing the text on line ";
106  append_long_to_string(line_num, 10, oss);
107  }
108  else {
109  oss += "Parse error.";
110  }
111 
112  if (context)
113  oss += (string) " at or near: " + context + (string) "\n" + msg
114  + (string) "\n";
115  else
116  oss += (string) "\n" + msg + (string) "\n";
117 
118  arg->set_error(new Error(unknown_error, oss));
119 }
120 
121 void
122 parse_error(const char *msg, const int line_num, const char *context)
123 {
124  // Jose Garcia
125  // This assert(s) is (are) only for developing purposes
126  // For production servers remove it by compiling with NDEBUG
127  assert(msg);
128 
129  string oss = "";
130 
131  if (line_num != 0) {
132  oss += "Error parsing the text on line ";
133  append_long_to_string(line_num, 10, oss);
134  }
135  else {
136  oss += "Parse error.";
137  }
138 
139  if (context)
140  oss += (string) " at or near: " + context + (string) "\n" + msg
141  + (string) "\n";
142  else
143  oss += (string) "\n" + msg + (string) "\n";
144 
145  throw Error(malformed_expr, oss);
146 }
147 
148 // context comes from the parser and will always be a char * unless the
149 // parsers change dramatically.
150 void
151 parse_error(const string & msg, const int line_num, const char *context)
152 {
153  parse_error(msg.c_str(), line_num, context);
154 }
155 
156 void save_str(char *dst, const char *src, const int line_num)
157 {
158  if (strlen(src) >= ID_MAX)
159  parse_error(string("The word `") + string(src)
160  + string("' is too long (it should be no longer than ")
161  + long_to_string(ID_MAX) + string(")."), line_num);
162 
163  strncpy(dst, src, ID_MAX);
164  dst[ID_MAX - 1] = '\0'; /* in case ... */
165 }
166 
167 void save_str(string & dst, const char *src, const int)
168 {
169  dst = src;
170 }
171 
172 bool is_keyword(string id, const string & keyword)
173 {
174  downcase(id);
175  id = prune_spaces(id);
176  DBG(cerr << "is_keyword: " << keyword << " = " << id << endl);
177  return id == keyword;
178 }
179 
190 int check_byte(const char *val)
191 {
192  char *ptr;
193  long v = strtol(val, &ptr, 0);
194 
195  if ((v == 0 && val == ptr) || *ptr != '\0') {
196  return FALSE;
197  }
198 
199  DBG(cerr << "v: " << v << endl);
200 
201  // We're very liberal here with values. Anything that can fit into 8 bits
202  // is allowed through. Clients will have to deal with the fact that the
203  // ASCII representation for the value might need to be tweaked. This is
204  // especially the case for Java clients where Byte datatypes are
205  // signed. 3/20/2000 jhrg
206  if ((v < 0 && v < DODS_SCHAR_MIN)
207  || (v > 0 && static_cast < unsigned long >(v) > DODS_UCHAR_MAX))
208  return FALSE;
209 
210  return TRUE;
211 }
212 
213 // This version of check_int will pass base 8, 10 and 16 numbers when they
214 // use the ANSI standard for string representation of those number bases.
215 
216 int check_int16(const char *val)
217 {
218  char *ptr;
219  long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
220 
221  if ((v == 0 && val == ptr) || *ptr != '\0') {
222  return FALSE;
223  }
224  // Don't use the constant from limits.h, use the ones in dods-limits.h
225  if (v > DODS_SHRT_MAX || v < DODS_SHRT_MIN) {
226  return FALSE;
227  }
228 
229  return TRUE;
230 }
231 
232 int check_uint16(const char *val)
233 {
234  char *ptr;
235  unsigned long v = strtol(val, &ptr, 0);
236 
237  if ((v == 0 && val == ptr) || *ptr != '\0') {
238  return FALSE;
239  }
240 
241  if (v > DODS_USHRT_MAX) {
242  return FALSE;
243  }
244 
245  return TRUE;
246 }
247 
248 int check_int32(const char *val)
249 {
250  char *ptr;
251  errno = 0;
252  long v = strtol(val, &ptr, 0); // `0' --> use val to determine base
253 
254  if ((v == 0 && val == ptr) || *ptr != '\0') {
255  return FALSE;
256  }
257 
258  // We need to check errno since strtol return clamps on overflow so the
259  // check against the DODS values below will always pass, even for out of
260  // bounds values in the string. mjohnson 7/20/09
261  if (errno == ERANGE) {
262  return FALSE;
263  }
264  // This could be combined with the above, or course, but I'm making it
265  // separate to highlight the test. On 64-bit linux boxes 'long' may be
266  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
267  else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
268  return FALSE;
269  }
270  else {
271  return TRUE;
272  }
273 }
274 
275 int check_uint32(const char *val)
276 {
277  // Eat whitespace and check for an initial '-' sign...
278  // strtoul allows an initial minus. mjohnson
279  const char* c = val;
280  while (c && isspace(*c)) {
281  c++;
282  }
283  if (c && (*c == '-')) {
284  return FALSE;
285  }
286 
287  char *ptr;
288  errno = 0;
289  unsigned long v = strtoul(val, &ptr, 0);
290 
291  if ((v == 0 && val == ptr) || *ptr != '\0') {
292  return FALSE;
293  }
294 
295  // check overflow first, or the below check is invalid due to
296  // clamping to the maximum value by strtoul
297  // maybe consider using long long for these checks? mjohnson
298  if (errno == ERANGE) {
299  return FALSE;
300  }
301  // See above.
302  else if (v > DODS_UINT_MAX) {
303  return FALSE;
304  }
305  else {
306  return TRUE;
307  }
308 }
309 
310 int check_int32(const char *val, int &v)
311 {
312  char *ptr;
313  errno = 0;
314  long tmp = strtol(val, &ptr, 0); // `0' --> use val to determine base
315 
316  if ((tmp == 0 && val == ptr) || *ptr != '\0') {
317  return FALSE;
318  }
319 
320  // We need to check errno since strtol return clamps on overflow so the
321  // check against the DODS values below will always pass, even for out of
322  // bounds values in the string. mjohnson 7/20/09
323  if (errno == ERANGE) {
324  return FALSE;
325  }
326  // This could be combined with the above, or course, but I'm making it
327  // separate to highlight the test. On 64-bit linux boxes 'long' may be
328  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
329  else if (tmp > DODS_INT_MAX || tmp < DODS_INT_MIN) {
330  return FALSE;
331  }
332  else {
333  v = (int)tmp;
334  return TRUE;
335  }
336 }
337 
338 int check_uint32(const char *val, unsigned int &v)
339 {
340  // Eat whitespace and check for an initial '-' sign...
341  // strtoul allows an initial minus. mjohnson
342  const char* c = val;
343  while (c && isspace(*c)) {
344  c++;
345  }
346  if (c && (*c == '-')) {
347  return FALSE;
348  }
349 
350  char *ptr;
351  errno = 0;
352  unsigned long tmp = strtoul(val, &ptr, 0);
353 
354  if ((tmp == 0 && val == ptr) || *ptr != '\0') {
355  return FALSE;
356  }
357 
358  // check overflow first, or the below check is invalid due to
359  // clamping to the maximum value by strtoul
360  // maybe consider using long long for these checks? mjohnson
361  if (errno == ERANGE) {
362  return FALSE;
363  }
364  // See above.
365  else if (tmp > DODS_UINT_MAX) {
366  return FALSE;
367  }
368  else {
369  v = (unsigned int)tmp;
370  return TRUE;
371  }
372 }
373 
374 int check_int64(const char *val)
375 {
376  char *ptr;
377  errno = 0;
378  long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
379 
380  if ((v == 0 && val == ptr) || *ptr != '\0') {
381  return FALSE;
382  }
383 
384  // We need to check errno since strtol return clamps on overflow so the
385  // check against the DODS values below will always pass, even for out of
386  // bounds values in the string. mjohnson 7/20/09
387  if (errno == ERANGE) {
388  return FALSE;
389  }
390 #if 0
391  // This could be combined with the above, or course, but I'm making it
392  // separate to highlight the test. On 64-bit linux boxes 'long' may be
393  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
394  //
395  // Removed - Coverity says it can never be false. Makes sense. jhrg 5/10/16
396  else if (v <= DODS_LLONG_MAX && v >= DODS_LLONG_MIN) {
397  return FALSE;
398  }
399 #endif
400  else {
401  return TRUE;
402  }
403 }
404 
405 int check_uint64(const char *val)
406 {
407  // Eat whitespace and check for an initial '-' sign...
408  // strtoul allows an initial minus. mjohnson
409  const char* c = val;
410  while (c && isspace(*c)) {
411  c++;
412  }
413  if (c && (*c == '-')) {
414  return FALSE;
415  }
416 
417  char *ptr;
418  errno = 0;
419  unsigned long long v = strtoull(val, &ptr, 0);
420 
421  if ((v == 0 && val == ptr) || *ptr != '\0') {
422  return FALSE;
423  }
424 
425  if (errno == ERANGE) {
426  return FALSE;
427  }
428  else if (v > DODS_ULLONG_MAX) { // 2^61
429  return FALSE;
430  }
431  else {
432  return v;
433  }
434 }
435 
436 // Check first for system errors (like numbers so small they convert
437 // (erroneously) to zero. Then make sure that the value is within
438 // limits.
439 
440 int check_float32(const char *val)
441 {
442  char *ptr;
443  errno = 0; // Clear previous value. Fix for the 64bit
444  // IRIX from Rob Morris. 5/21/2001 jhrg
445 
446 #ifdef WIN32
447  double v = w32strtod(val, &ptr);
448 #else
449  double v = strtod(val, &ptr);
450 #endif
451 
452  DBG(cerr << "v: " << v << ", ptr: " << ptr
453  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
454 
455  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
456  return FALSE;
457 
458 #if 0
459  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
460  || *ptr != '\0') {
461  return FALSE;
462  }
463 #endif
464 
465  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
466  double abs_val = fabs(v);
467  if (abs_val > DODS_FLT_MAX
468  || (abs_val != 0.0 && abs_val < DODS_FLT_MIN))
469  return FALSE;
470 
471  return TRUE;
472 }
473 
474 int check_float64(const char *val)
475 {
476  DBG(cerr << "val: " << val << endl);
477  char *ptr;
478  errno = 0; // Clear previous value. 5/21/2001 jhrg
479 
480 #ifdef WIN32
481  double v = w32strtod(val, &ptr);
482 #else
483  double v = strtod(val, &ptr);
484 #endif
485 
486  DBG(cerr << "v: " << v << ", ptr: " << ptr
487  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
488 
489 
490  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
491  return FALSE;
492 #if 0
493  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
494  || *ptr != '\0') {
495  return FALSE;
496  }
497 #endif
498  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
499  double abs_val = fabs(v);
500  if (abs_val > DODS_DBL_MAX
501  || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
502  return FALSE;
503 
504  return TRUE;
505 }
506 
507 int check_float64(const char *val, double &v)
508 {
509  DBG(cerr << "val: " << val << endl);
510  char *ptr;
511  errno = 0; // Clear previous value. 5/21/2001 jhrg
512 
513 #ifdef WIN32
514  v = w32strtod(val, &ptr);
515 #else
516  v = strtod(val, &ptr);
517 #endif
518 
519  DBG(cerr << "v: " << v << ", ptr: " << ptr
520  << ", errno: " << errno << ", val==ptr: " << (val == ptr) << endl);
521 
522 
523  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
524  return FALSE;
525 #if 0
526  if ((v == 0.0 && (val == ptr || errno == HUGE_VAL || errno == ERANGE))
527  || *ptr != '\0') {
528  return FALSE;
529  }
530 #endif
531  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
532  double abs_val = fabs(v);
533  if (abs_val > DODS_DBL_MAX
534  || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
535  return FALSE;
536 
537  return TRUE;
538 }
539 
540 long long get_int64(const char *val)
541 {
542  char *ptr;
543  errno = 0;
544  long long v = strtoll(val, &ptr, 0); // `0' --> use val to determine base
545 
546  if ((v == 0 && val == ptr) || *ptr != '\0') {
547  throw Error("Expected a 64-bit integer, but found other characters.");
548  // The value '" + string(val) + "' contains extra characters.");
549  }
550 
551  // We need to check errno since strtol return clamps on overflow so the
552  // check against the DODS values below will always pass, even for out of
553  // bounds values in the string. mjohnson 7/20/09
554  if (errno == ERANGE) {
555  throw Error("The 64-bit integer value is out of range.");
556  }
557 
558 #if 0
559  // This could be combined with the above, or course, but I'm making it
560  // separate to highlight the test. On 64-bit linux boxes 'long' may be
561  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
562  //
563  // Removed because coverity flags it as useless, which it is until we
564  // have 128-bit ints... jhrg 5/9/16
565  else if (v > DODS_LLONG_MAX || v < DODS_LLONG_MIN) {
566  throw Error("The value '" + string(val) + "' is out of range.");
567  }
568 #endif
569 
570  else {
571  return v;
572  }
573 }
574 
575 unsigned long long get_uint64(const char *val)
576 {
577  // Eat whitespace and check for an initial '-' sign...
578  // strtoul allows an initial minus. mjohnson
579  const char* c = val;
580  while (c && isspace(*c)) {
581  c++;
582  }
583  if (c && (*c == '-')) {
584  throw Error("Expected a valid array index.");
585  }
586 
587  char *ptr;
588  errno = 0;
589  unsigned long long v = strtoull(val, &ptr, 0);
590 
591  if ((v == 0 && val == ptr) || *ptr != '\0') {
592  throw Error("Expected an unsigned 64-bit integer, but found other characters.");
593  }
594 
595  if (errno == ERANGE) {
596  throw Error("The 64-bit integer value is out of range.");
597  }
598 #if 0
599  // Coverity; see above. jhrg 5/9/16
600  else if (v > DODS_MAX_ARRAY_INDEX) { // 2^61
601  throw Error("The value '" + string(val) + "' is out of range.");
602  }
603 #endif
604  else {
605  return v;
606  }
607 }
608 
609 int get_int32(const char *val)
610 {
611  char *ptr;
612  errno = 0;
613  int v = strtol(val, &ptr, 0); // `0' --> use val to determine base
614 
615  if ((v == 0 && val == ptr) || *ptr != '\0') {
616  throw Error("Expected a 32-bit integer, but found other characters.");
617  }
618 
619  // We need to check errno since strtol return clamps on overflow so the
620  // check against the DODS values below will always pass, even for out of
621  // bounds values in the string. mjohnson 7/20/09
622  if (errno == ERANGE) {
623  throw Error("The 32-bit integer value is out of range.");
624  }
625  // This could be combined with the above, or course, but I'm making it
626  // separate to highlight the test. On 64-bit linux boxes 'long' may be
627  // 64-bits and so 'v' can hold more than a DODS_INT32. jhrg 3/23/10
628  else if (v > DODS_INT_MAX || v < DODS_INT_MIN) {
629  return FALSE;
630  }
631 
632  else {
633  return v;
634  }
635 }
636 
637 unsigned int get_uint32(const char *val)
638 {
639  // Eat whitespace and check for an initial '-' sign...
640  // strtoul allows an initial minus. mjohnson
641  const char* c = val;
642  while (c && isspace(*c)) {
643  c++;
644  }
645  if (c && (*c == '-')) {
646  throw Error("Expected an unsigned 32-bit integer, but found other characters.");
647  }
648 
649  char *ptr;
650  errno = 0;
651  unsigned int v = strtoul(val, &ptr, 0);
652 
653  if ((v == 0 && val == ptr) || *ptr != '\0') {
654  throw Error("Expected an unsigned 32-bit integer, but found other characters.");
655  }
656 
657  if (errno == ERANGE) {
658  throw Error("The 32-bit integer value is out of range.");
659  }
660  // See above.
661  else if (v > DODS_UINT_MAX) {
662  return FALSE;
663  }
664  else {
665  return v;
666  }
667 }
668 
669 double get_float64(const char *val)
670 {
671  DBG(cerr << "val: " << val << endl);
672  char *ptr;
673  errno = 0; // Clear previous value. 5/21/2001 jhrg
674 
675 #ifdef WIN32
676  double v = w32strtod(val, &ptr);
677 #else
678  double v = strtod(val, &ptr);
679 #endif
680 
681  if (errno == ERANGE || (v == 0.0 && val == ptr) || *ptr != '\0')
682  throw Error("The 64-bit floating point value is out of range.");;
683 
684  DBG(cerr << "fabs(" << val << ") = " << fabs(v) << endl);
685  double abs_val = fabs(v);
686  if (abs_val > DODS_DBL_MAX || (abs_val != 0.0 && abs_val < DODS_DBL_MIN))
687  throw Error("The 64-bit floating point value is out of range.");;
688 
689  return v;
690 }
691 
692 /*
693  Maybe someday we will really check the Urls to see if they are valid...
694 */
695 
696 int check_url(const char *)
697 {
698  return TRUE;
699 }
700 
701 } // namespace libdap
void downcase(string &s)
Definition: util.cc:563
string prune_spaces(const string &name)
Definition: util.cc:459
top level DAP object to house generic methods
Definition: AISConnect.cc:30
int check_url(const char *)
Is the value a valid URL?
Definition: parser-util.cc:696
A class for error processing.
Definition: Error.h:92
int check_byte(const char *val)
Is the value a valid byte?
Definition: parser-util.cc:190
void save_str(char *dst, const char *src, const int line_num)
Save a string to a temporary variable during the parse.
Definition: parser-util.cc:156