bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
RemoteResource.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES http package, part of the Hyrax data server.
4
5// Copyright (c) 2020 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24// Authors:
25// ndp Nathan Potter <ndp@opendap.org>
26
27#include "config.h"
28
29#include <cstdio>
30#include <unistd.h>
31#include <cstring>
32
33#include <sstream>
34#include <string>
35#include <utility>
36#include <memory>
37#include <thread>
38
39#include "BESInternalError.h"
40
41#include "BESDebug.h"
42#include "BESUtil.h"
43
44#include "HttpUtils.h"
45#include "CurlUtils.h"
46#include "HttpError.h"
47#include "HttpNames.h"
48#include "RemoteResource.h"
49#include "TheBESKeys.h"
50#include "BESStopWatch.h"
51#include "BESLog.h"
52
53#define BES_CATALOG_ROOT_KEY "BES.Catalog.catalog.RootDirectory"
54// See HttpNames.h for the key definitions.
55
56#define MODULE HTTP_MODULE
57#define prolog string("RemoteResource::").append(__func__).append("() - ")
58
59using namespace std;
60
61namespace http {
62
63string RemoteResource::d_temp_file_dir;
64std::mutex RemoteResource::d_temp_file_dir_mutex;
65std::mutex RemoteResource::d_mkstemp_mutex;
66
80RemoteResource::RemoteResource(shared_ptr <http::url> target_url, string uid)
81 : d_url(std::move(target_url)), d_uid(std::move(uid)) {
82
83 if (d_url->protocol() == FILE_PROTOCOL) {
84 set_filename_for_file_url();
85 // d_delete_file is true by default; don't delete things referenced by file:// URLs
86 d_delete_file = false;
87 d_initialized = true;
88 } else if (d_url->protocol() == HTTPS_PROTOCOL || d_url->protocol() == HTTP_PROTOCOL) {
89 BESDEBUG(MODULE, prolog << "URL: " << d_url->str() << endl);
90
91 // d_initialized is false until the resource is retrieved (for http/s URLs)
92 set_delete_temp_file();
93 set_temp_file_dir();
94 } else {
95 string err = prolog + "Unsupported protocol: " + d_url->protocol();
96 throw BESInternalError(err, __FILE__, __LINE__);
97 }
98
99 // Now set d_basename using the URL path (this elides any query string).
100 // d_basename may stay empty (its init value) if the URL path is empty.
101 vector<string> path_elements;
102 BESUtil::tokenize(d_url->path(), path_elements);
103 if (!path_elements.empty()) {
104 d_basename = path_elements.back();
105 }
106
107 http::get_type_from_url(d_url->str(), d_type);
108 if (d_type.empty()) {
109 d_type = "unknown";
110 }
111}
112
118 if (!d_filename.empty() && d_delete_file)
119 unlink(d_filename.c_str());
120 if (d_fd != -1)
121 close(d_fd);
122}
123
126
136void RemoteResource::set_temp_file_dir() {
137 lock_guard<mutex> lock(d_temp_file_dir_mutex);
138
139 // d_temp_file_dir is static, so we only need to set it once.
140 if (!d_temp_file_dir.empty())
141 return;
142
143 d_temp_file_dir = TheBESKeys::TheKeys()->read_string_key(REMOTE_RESOURCE_TMP_DIR_KEY, "/tmp/bes_rr_tmp");
144
145 if (BESUtil::mkdir_p(d_temp_file_dir, 0775) != 0) {
146 throw BESInternalError("Temporary file directory '" + d_temp_file_dir + "' error: " + strerror(errno),
147 __FILE__, __LINE__);
148 }
149}
150
158void RemoteResource::set_delete_temp_file() {
159 d_delete_file = TheBESKeys::TheKeys()->read_bool_key(REMOTE_RESOURCE_DELETE_TMP_FILE, true);
160}
161
169void RemoteResource::set_filename_for_file_url() {
170 BESDEBUG(MODULE, prolog << "Found FILE protocol." << endl);
171 d_filename = d_url->path();
172 while (BESUtil::endsWith(d_filename, "/")) {
173 // Strip trailing slashes, because this about files, not directories
174 d_filename = d_filename.substr(0, d_filename.size() - 1);
175 }
176
177 // Now we check that the data is in the BES_CATALOG_ROOT
178 string catalog_root = TheBESKeys::TheKeys()->read_string_key(BES_CATALOG_ROOT_KEY, "");
179 if (catalog_root.empty()) {
180 throw BESInternalError(prolog + "ERROR - " + BES_CATALOG_ROOT_KEY + "is not set", __FILE__, __LINE__);
181 }
182
183 if (d_filename.find(catalog_root) != 0) {
184 d_filename = BESUtil::pathConcat(catalog_root, d_filename);
185 }
186 BESDEBUG(MODULE, "d_filename: " << d_filename << endl);
187}
188
190
199 if (d_initialized) {
200 return;
201 }
202
203 {
204 lock_guard<mutex> lock(d_mkstemp_mutex);
205 // Make a temporary file, get an open descriptor for it. The make_temp_file() function
206 // throws BESInternalError if it can't make the file.
207 d_fd = BESUtil::make_temp_file(d_temp_file_dir, d_filename);
208 }
209
210 // Get the contents of the URL and put them in the temp file
211 get_url(d_fd);
212
213 string new_name = d_filename + "_" + d_uid + "#" + d_basename;
214 if (rename(d_filename.c_str(), new_name.c_str()) != 0) {
215 throw BESInternalError("Could not rename " + d_filename + " to " + new_name + " ("
216 + ::strerror(errno) + ")", __FILE__, __LINE__);
217 }
218
219 d_filename = new_name;
220
221 d_initialized = true;
222}
223
236void RemoteResource::get_url(int fd) {
237
238 BESDEBUG(MODULE, prolog << "BEGIN" << endl);
239 BES_STOPWATCH_START(MODULE, prolog + "Timing retrieval. Target url: " + d_url->str());
240
241 try {
242 // Throws an HttpError if there is a curl error.
243 curl::http_get_and_write_resource(d_url, fd, &d_response_headers);
244 BESDEBUG(MODULE, prolog << "Resource " << d_url->str() << " saved to temporary file " << d_filename << endl);
245 }
246 catch (http::HttpError &http_error) {
247 string err_msg = prolog + "Hyrax encountered a Service Chaining Error while "
248 "attempting to retrieve a RemoteResource.\n" + http_error.get_message();
249 http_error.set_message(err_msg);
250 throw;
251 }
252
253
254 // Moved into curl::super_easy_perform(CURL*, int fd)
255#if 0
256 auto status = lseek(fd, 0, SEEK_SET);
257 if (-1 == status)
258 throw BESInternalError("Could not seek within the response file.", __FILE__, __LINE__);
259 BESDEBUG(MODULE, prolog << "Reset file descriptor to start of file." << endl);
260#endif
261
262 BESDEBUG(MODULE, prolog << "END" << endl);
263}
264
265} // namespace http
266
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
void set_message(const std::string &msg)
set the error message for this exception
Definition BESError.h:108
exception thrown if internal error encountered
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition BESUtil.cc:837
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition BESUtil.cc:995
static int mkdir_p(const std::string &path, mode_t mode)
Definition BESUtil.cc:1254
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:754
static int make_temp_file(const std::string &temp_file_dir, std::string &temp_file_name)
Make and open a temporary file. The file is opened such that we know it is unique and not in use by a...
Definition BESUtil.cc:1297
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
static bool read_bool_key(const std::string &key, bool default_value)
Read a boolean-valued key from the bes.conf file.
static std::string read_string_key(const std::string &key, const std::string &default_value)
Read a string-valued key from the bes.conf file.
RemoteResource()=default
The default constructor is here to ease testing. Remove if not needed. jhrg 3/8/23.
utility class for the HTTP catalog module
Definition TheBESKeys.h:51