bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
HttpUtils.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2// This file is part of the BES http package, part of the Hyrax data server.
3
4// Copyright (c) 2020 OPeNDAP, Inc.
5// Author: Nathan Potter <ndp@opendap.org>
6//
7// This library is free software; you can redistribute it and/or
8// modify it under the terms of the GNU Lesser General Public
9// License as published by the Free Software Foundation; either
10// version 2.1 of the License, or (at your option) any later version.
11//
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15// Lesser General Public License for more details.
16//
17// You should have received a copy of the GNU Lesser General Public
18// License along with this library; if not, write to the Free Software
19// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20//
21// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22
23// Authors:
24// ndp Nathan Potter <ndp@opendap.org>
25
26#include "config.h"
27
28#ifdef HAVE_UNISTD_H
29#include <unistd.h>
30#endif
31
32#include <cstdlib>
33#include <cstring>
34#include <string>
35#include <map>
36#include <vector>
37#include <sstream>
38#include <time.h>
39
40#include <curl/curl.h>
41
42#include "BESUtil.h"
43#include "BESCatalogUtils.h"
44#include "BESCatalogList.h"
45#include "BESCatalog.h"
46#include "BESRegex.h"
47#include "TheBESKeys.h"
48#include "BESInternalError.h"
49#include "BESNotFoundError.h"
50#include "BESSyntaxUserError.h"
51#include "BESDebug.h"
52#include "BESUtil.h"
53
54#include "HttpNames.h"
55#include "HttpUtils.h"
56#include "ProxyConfig.h"
57
58#define MODULE "http"
59
60using namespace std;
61using namespace http;
62
63// These are static class members
64
65#define prolog string("HttpUtils::").append(__func__).append("() - ")
66
67namespace http {
72void load_mime_list_from_keys(map<string, string> &mime_list)
73{
74 // MimeTypes - translate from a mime type to a module name
75 bool found = false;
76 vector<string> vals;
77 TheBESKeys::TheKeys()->get_values(HTTP_MIMELIST_KEY, vals, found);
78 if (found && vals.size()) {
79 vector<string>::iterator i = vals.begin();
80 vector<string>::iterator e = vals.end();
81 for (; i != e; i++) {
82 size_t colon = (*i).find(":");
83 if (colon == string::npos) {
84 string err = (string) "Malformed " + HTTP_MIMELIST_KEY + " " + (*i) +
85 " specified in the gateway configuration";
86 throw BESSyntaxUserError(err, __FILE__, __LINE__);
87 }
88 string mod = (*i).substr(0, colon);
89 string mime = (*i).substr(colon + 1);
90 mime_list[mod] = mime;
91 }
92 }
93}
94
95
96
110void get_type_from_disposition(const string &disp, string &type)
111{
112 // If this function extracts a filename from disp and it matches a handler's
113 // regex using the Catalog Utils, this will be set to a non-empty value.
114 type = "";
115
116 size_t fnpos = disp.find("filename");
117 if (fnpos != string::npos) {
118 // Got the filename attribute, now get the
119 // filename, which is after the pound sign (#)
120 size_t pos = disp.find("#", fnpos);
121 if (pos == string::npos) pos = disp.find("=", fnpos);
122 if (pos != string::npos) {
123 // Got the filename to the end of the
124 // string, now get it to either the end of
125 // the string or the start of the next
126 // attribute
127 string filename;
128 size_t sp = disp.find(" ", pos);
129 if (pos != string::npos) {
130 // space before the next attribute
131 filename = disp.substr(pos + 1, sp - pos - 1);
132 } else {
133 // to the end of the string
134 filename = disp.substr(pos + 1);
135 }
136
138
139 // we have the filename now, run it through
140 // the type match to get the file type.
141
142 const BESCatalogUtils *utils = BESCatalogList::TheCatalogList()->default_catalog()->get_catalog_utils();
143 type = utils->get_handler_name(filename);
144 }
145 }
146}
147
148void get_type_from_content_type(const string &ctype, string &type)
149{
150 BESDEBUG(MODULE, prolog << "BEGIN content-type: " << ctype << endl);
151 map<string,string> mime_list;
152 load_mime_list_from_keys(mime_list);
153 map<string, string>::iterator i = mime_list.begin();
154 map<string, string>::iterator e = mime_list.end();
155 bool done = false;
156 for (; i != e && !done; i++) {
157 BESDEBUG(MODULE, prolog << "Comparing content type '" << ctype << "' against mime list element '" << (*i).second << "'" << endl);
158 BESDEBUG(MODULE, prolog << "first: " << (*i).first << " second: " << (*i).second << endl);
159 if ((*i).second == ctype) {
160 BESDEBUG(MODULE, prolog << "MATCH" << endl);
161 type = (*i).first;
162 done = true;
163 }
164 }
165 BESDEBUG(MODULE, prolog << "END" << endl);
166}
167
168void get_type_from_url(const string &url, string &type) {
169 const BESCatalogUtils *utils = BESCatalogList::TheCatalogList()->find_catalog("catalog")->get_catalog_utils();
170
171 type = utils->get_handler_name(url);
172}
173
179 size_t max_redirects=0;
180 bool found = false;
181 string value;
182 TheBESKeys::TheKeys()->get_value(HTTP_MAX_REDIRECTS_KEY, value, found);
183 if (found && !value.empty()) {
184 std::istringstream(value) >> max_redirects; // Returns 0 if the parse fails.
185 }
186 if(!max_redirects){
187 max_redirects = HTTP_MAX_REDIRECTS_DEFAULT;
188 }
189 return max_redirects;
190}
191
192// http://help.adobe.com/en_US/FlashPlatform/reference/actionscript/3/package.html#encodeURIComponent()
193
194
195
202void hexchar(const unsigned char &c, unsigned char &hex1, unsigned char &hex2)
203{
204 hex1 = c / 16;
205 hex2 = c % 16;
206 hex1 += hex1 <= 9 ? '0' : 'a' - 10;
207 hex2 += hex2 <= 9 ? '0' : 'a' - 10;
208}
209
215string url_encode(const string &s)
216{
217 const char *str = s.c_str();
218 vector<char> v(s.size());
219 v.clear();
220 for (size_t i = 0, l = s.size(); i < l; i++)
221 {
222 char c = str[i];
223 if ((c >= '0' && c <= '9') ||
224 (c >= 'a' && c <= 'z') ||
225 (c >= 'A' && c <= 'Z') ||
226 c == '-' || c == '_' || c == '.' || c == '!' || c == '~' ||
227 c == '*' || c == '\'' || c == '(' || c == ')')
228 {
229 v.push_back(c);
230 }
231 else if (c == ' ')
232 {
233 v.push_back('+');
234 }
235 else
236 {
237 v.push_back('%');
238 unsigned char d1, d2;
239 hexchar(c, d1, d2);
240 v.push_back(d1);
241 v.push_back(d2);
242 }
243 }
244
245 return {v.cbegin(), v.cend()};
246}
247
248#if 0
263
264
265 void HttpUtils::decompose_url(const string target_url, map<string,string> &url_info)
266 {
267 string url_base;
268 string query_string;
269
270 size_t query_index = target_url.find_first_of("?");
271 BESDEBUG(MODULE, prolog << "query_index: " << query_index << endl);
272 if(query_index != string::npos){
273 query_string = target_url.substr(query_index+1);
274 url_base = target_url.substr(0,query_index);
275 }
276 else {
277 url_base = target_url;
278 }
279 url_info.insert( std::pair<string,string>(HTTP_TARGET_URL_KEY,target_url));
280 BESDEBUG(MODULE, prolog << HTTP_TARGET_URL_KEY << ": " << target_url << endl);
281 url_info.insert( std::pair<string,string>(HTTP_URL_BASE_KEY,url_base));
282 BESDEBUG(MODULE, prolog << HTTP_URL_BASE_KEY <<": " << url_base << endl);
283 url_info.insert( std::pair<string,string>(HTTP_QUERY_STRING_KEY,query_string));
284 BESDEBUG(MODULE, prolog << HTTP_QUERY_STRING_KEY << ": " << query_string << endl);
285 if(!query_string.empty()){
286 vector<string> records;
287 string delimiters = "&";
288 BESUtil::tokenize(query_string,records, delimiters);
289 vector<string>::iterator i = records.begin();
290 for(; i!=records.end(); i++){
291 size_t index = i->find('=');
292 if(index != string::npos) {
293 string key = i->substr(0, index);
294 string value = i->substr(index+1);
295 BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
296 url_info.insert( std::pair<string,string>(key,value));
297 }
298 }
299 }
300 time_t now;
301 time(&now); /* get current time; same as: timer = time(NULL) */
302 stringstream unix_time;
303 unix_time << now;
304 url_info.insert( std::pair<string,string>(HTTP_INGEST_TIME_KEY,unix_time.str()));
305 }
306
307#endif
308
309}
310
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
error thrown if there is a user syntax error in the request or any other user error
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition BESUtil.cc:995
static void trim_if_surrounding_quotes(std::string &value)
Remove double quotes around a string This function will remove a leading and/or trailing double quote...
Definition BESUtil.cc:126
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Parse a URL into the protocol, host, path and query parts.
Definition url_impl.h:44
STL iterator class.
STL iterator class.
utility class for the HTTP catalog module
Definition TheBESKeys.h:51
size_t load_max_redirects_from_keys()
Definition HttpUtils.cc:178
void get_type_from_disposition(const string &disp, string &type)
Definition HttpUtils.cc:110
string url_encode(const string &s)
Definition HttpUtils.cc:215
void load_mime_list_from_keys(map< string, string > &mime_list)
Definition HttpUtils.cc:72
void hexchar(const unsigned char &c, unsigned char &hex1, unsigned char &hex2)
Definition HttpUtils.cc:202