bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
BESRegex.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2005 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26// These headers are used when HAVE_WORKING_REGEX is not true. jhrg 12/30/22
27#include "config.h"
28
29#include <string>
30#include <vector>
31#include <memory>
32
33#include <libdap/util.h>
34
35#include "BESInternalError.h"
36#include "BESRegex.h"
37
38using namespace std;
39
40#if !HAVE_WORKING_REGEX
41void
42BESRegex::init(const char *t)
43{
44 d_preg.reset(new regex_t);
45
46 int result = regcomp(d_preg.get(), t, REG_EXTENDED);
47 if (result != 0) {
48 size_t msg_len = regerror(result, d_preg.get(), nullptr, (size_t)0);
49
50 vector<char> msg(msg_len+1);
51 regerror(result, d_preg.get(), msg.data(), msg_len);
52 string err = string("BESRegex error: ") + string(msg.data(), msg_len);
53 throw BESError(err, BES_SYNTAX_USER_ERROR, __FILE__, __LINE__);
54 }
55}
56
57BESRegex::~BESRegex()
58{
59 regfree(d_preg.get());
60}
61#endif
62
69int
70BESRegex::match(const char *s, int len, int pos) const
71{
72#if HAVE_WORKING_REGEX
73 if (pos > len)
74 throw BESInternalError("Position exceed length in BESRegex::match()", __FILE__, __LINE__);
75
76 smatch match;
77 // This is needed because in C++14, the first arg to regex_search() cannot be a
78 // temporary string. It seems the C++11 compilers on some linux dists are using
79 // regex headers that enforce c++14 rules. jhrg 12/2/21
80 auto target = string(s+pos, len-pos);
81 bool found = regex_search(target, match, d_exp);
82 return (found && match.ready()) ? (int)match.length(): -1;
83#else
84 if (len > 32766) // Integer overflow protection
85 return -1;
86
87 vector<regmatch_t> pmatch(len+1);
88 int result = regexec(d_preg.get(), s + pos, len, pmatch.data(), 0);
89 int matchnum;
90 if (result == REG_NOMATCH)
91 matchnum = -1;
92 else
93 matchnum = (int)(pmatch[0].rm_eo - pmatch[0].rm_so);
94
95 return matchnum;
96#endif
97}
98
104int
105BESRegex::match(const string &s) const
106{
107#if HAVE_WORKING_REGEX
108 smatch match;
109 bool found = regex_search(s, match, d_exp);
110 return (found && match.ready()) ? (int)match.length(): -1;
111#else
112 return match(s.c_str(), (int)s.size(), 0);
113#endif
114}
115
126int
127BESRegex::search(const char *s, int len, int& matchlen, int pos) const
128{
129#if HAVE_WORKING_REGEX
130 smatch match;
131 auto target = string(s+pos, len-pos);
132 bool found = regex_search(target, match, d_exp);
133 if (found && match.ready()) {
134 matchlen = (int)match.length();
135 return (int)match.position();
136 }
137 else {
138 matchlen = -1;
139 return -1;
140 }
141#else
142 // sanitize allocation
143 if (!libdap::size_ok(sizeof(regmatch_t), len+1))
144 return -1;
145
146 // alloc space for len matches, which is theoretical max.
147 // Problem: If somehow 'len' is very large - say the size of a 32-bit int,
148 // then len+1 is a an integer overflow and this might be exploited by
149 // an attacker. It's not likely there will be more than a handful of
150 // matches, so I am going to limit this value to 32766. jhrg 3/4/09
151 if (len > 32766)
152 return -1;
153
154 vector<regmatch_t> pmatch(len+1);
155 int result = regexec(d_preg.get(), s + pos, len, pmatch.data(), 0);
156 if (result == REG_NOMATCH)
157 return -1;
158
159 // Match found, find the first one (pmatch lists the longest first)
160 int m = 0;
161 for (int i = 1; i < len; ++i)
162 if (pmatch[i].rm_so != -1 && pmatch[i].rm_so < pmatch[m].rm_so)
163 m = i;
164
165 matchlen = (int)(pmatch[m].rm_eo - pmatch[m].rm_so);
166 auto matchpos = (int)pmatch[m].rm_so;
167
168 return matchpos;
169#endif
170}
171
178int
179BESRegex::search(const string &s, int& matchlen) const
180{
181#if HAVE_WORKING_REGEX
182 smatch match;
183 bool found = regex_search(s, match, d_exp);
184 if (found && match.ready()) {
185 matchlen = (int)match.length();
186 return (int)match.position();
187 }
188 else {
189 matchlen = -1;
190 return -1;
191 }
192#else
193 return search(s.c_str(), (int)s.size(), matchlen, 0);
194#endif
195}
196
exception thrown if internal error encountered
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition BESRegex.cc:70
int search(const char *s, int len, int &matchlen, int pos=0) const
Where does the pattern match.
Definition BESRegex.cc:127