libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
GNURegex.cc
Go to the documentation of this file.
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2005 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26// #define DODS_DEBUG
27
28#include "config.h"
29
30#if 0
31#ifndef WIN32
32#include <alloca.h>
33#endif
34#include <stdlib.h>
35
36#include <regex.h>
37#include <sys/types.h>
38
39#include <new>
40#include <stdexcept>
41#include <string>
42#include <vector>
43#endif
44
45#include <vector>
46
47#include <regex.h>
48
49#include "Error.h"
50#include "GNURegex.h"
51
52#include "debug.h"
53#include "util.h"
54
55#if 0
56#include "debug.h"
57#include "util.h"
58#endif
59
60using namespace std;
61
62namespace libdap {
63
64void Regex::init(const char *t) {
65#if !USE_CPP_11_REGEX
66 DBG(cerr << "Regex::init() - BEGIN" << endl);
67
68 DBG(cerr << "Regex::init() - creating new regex..." << endl);
69 d_preg = static_cast<void *>(new regex_t);
70
71 DBG(cerr << "Regex::init() - Calling regcomp()..." << endl);
72 int result = regcomp(static_cast<regex_t *>(d_preg), t, REG_EXTENDED);
73
74 if (result != 0) {
75 DBG(cerr << "Regex::init() - Call to regcomp FAILED" << endl);
76 DBG(cerr << "Regex::init() - Calling regerror()..." << endl);
77 size_t msg_len =
78 regerror(result, static_cast<regex_t *>(d_preg), static_cast<char *>(NULL), static_cast<size_t>(0));
79
80 DBG(cerr << "Regex::init() - Creating message" << endl);
81 vector<char> msg(msg_len + 1);
82 // char *msg = new char[msg_len+1];
83 DBG(cerr << "Regex::init() - Calling regerror() again..." << endl);
84 regerror(result, static_cast<regex_t *>(d_preg), msg.data(), msg_len);
85 DBG(cerr << "Regex::init() - Throwing libdap::Error" << endl);
86 throw Error(string("Regex error: ") + string(msg.data()));
87 // delete[] msg;
88 // throw e;
89 }
90 DBG(cerr << "Regex::init() - Call to regcomp() SUCCEEDED" << endl);
91 DBG(cerr << "Regex::init() - END" << endl);
92#else
93 d_exp = regex(t);
94#endif
95}
96
97#if 0
98void
99Regex::init(const string &t)
100{
101 d_exp = regex(t);
102}
103#endif
104
105#if !USE_CPP_11_REGEX
107 regfree(static_cast<regex_t *>(d_preg));
108 delete static_cast<regex_t *>(d_preg);
109 d_preg = 0;
110}
111#endif
112
113#if 0
117Regex::Regex(const char* t)
118{
119 init(t);
120}
121
124Regex::Regex(const char* t, int)
125{
126 init(t);
127}
128#endif
129
136int Regex::match(const char *s, int len, int pos) const {
137#if !USE_CPP_11_REGEX
138 if (len > 32766) // Integer overflow protection
139 return -1;
140
141 regmatch_t *pmatch = new regmatch_t[len + 1];
142 string ss = s;
143
144 int result = regexec(static_cast<regex_t *>(d_preg), ss.substr(pos, len - pos).c_str(), len, pmatch, 0);
145 int matchnum;
146 if (result == REG_NOMATCH)
147 matchnum = -1;
148 else
149 matchnum = pmatch[0].rm_eo - pmatch[0].rm_so;
150
151 delete[] pmatch;
152 pmatch = 0;
153
154 return matchnum;
155#else
156 if (pos > len)
157 throw Error("Position exceed length in Regex::match()");
158
159 smatch match;
160 auto target = string(s + pos, len - pos);
161 bool found = regex_search(target, match, d_exp);
162 if (found)
163 return (int)match.length();
164 else
165 return -1;
166#endif
167}
168
174int Regex::match(const string &s) const {
175#if USE_CPP_11_REGEX
176 smatch match;
177 bool found = regex_search(s, match, d_exp);
178 if (found)
179 return (int)match.length();
180 else
181 return -1;
182#else
183 return match(s.c_str(), s.length(), 0);
184#endif
185}
186
197int Regex::search(const char *s, int len, int &matchlen, int pos) const {
198#if !USE_CPP_11_REGEX
199 // sanitize allocation
200 if (!size_ok(sizeof(regmatch_t), len + 1))
201 return -1;
202
203 // alloc space for len matches, which is theoretical max.
204 // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
205 // then len+1 is a an integer overflow and this might be exploited by
206 // an attacker. It's not likely there will be more than a handful of
207 // matches, so I am going to limit this value to 32766. jhrg 3/4/09
208 if (len > 32766)
209 return -1;
210
211 regmatch_t *pmatch = new regmatch_t[len + 1];
212 string ss = s;
213
214 int result = regexec(static_cast<regex_t *>(d_preg), ss.substr(pos, len - pos).c_str(), len, pmatch, 0);
215 if (result == REG_NOMATCH) {
216 delete[] pmatch;
217 pmatch = 0;
218 return -1;
219 }
220
221 // Match found, find the first one (pmatch lists the longest first)
222 int m = 0;
223 for (int i = 1; i < len; ++i)
224 if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
225 m = i;
226
227 matchlen = pmatch[m].rm_eo - pmatch[m].rm_so;
228 int matchpos = pmatch[m].rm_so;
229
230 delete[] pmatch;
231 pmatch = 0;
232 return matchpos;
233#else
234 smatch match;
235 // This is needed because in C++14, the first arg to regex_search() cannot be a
236 // temporary string. It seems the C++11 compilers on some linux dists are using
237 // regex headers that enforce c++14 rules. jhrg 12/2/21
238 auto target = string(s + pos, len - pos);
239 bool found = regex_search(target, match, d_exp);
240 matchlen = (int)match.length();
241 if (found)
242 return (int)match.position();
243 else
244 return -1;
245#endif
246}
247
254int Regex::search(const string &s, int &matchlen) const {
255#if USE_CPP_11_REGEX
256 smatch match;
257 bool found = regex_search(s, match, d_exp);
258 matchlen = (int)match.length();
259 if (found)
260 return (int)match.position();
261 else
262 return -1;
263#else
264 // search(const char *s, int len, int& matchlen, int pos) const
265 return search(s.c_str(), s.length(), matchlen, 0);
266#endif
267}
268
269} // namespace libdap
A class for error processing.
Definition Error.h:92
Regex(const char *s)
initialize a Regex with a C string
Definition GNURegex.h:76
int search(const char *s, int len, int &matchlen, int pos=0) const
How much of the string does the pattern match.
Definition GNURegex.cc:197
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition GNURegex.cc:136
#define regfree
Definition config.h:1361
#define regerror
Definition config.h:1355
#define regcomp
Definition config.h:1352
#define regexec
Definition config.h:1358
#define DBG(x)
Definition debug.h:58
top level DAP object to house generic methods
Definition AISConnect.cc:30
bool size_ok(unsigned int sz, unsigned int nelem)
sanitize the size of an array. Test for integer overflow when dynamically allocating an array.
Definition util.cc:1138