bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
S3Container.cc
1// S3Container.cc
2
3// -*- mode: c++; c-basic-offset:4 -*-
4
5// This file is part of S3_module, A C++ module that can be loaded in to
6// the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
7
8// Copyright (c) 2020 OPeNDAP, Inc.
9// Author: Nathan Potter <ndp@opendap.org>
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
26// Authors:
27// ndp Nathan Potter <ndp@opendap.org>
28
29#include "config.h"
30
31#include <map>
32#include <string>
33
34#include "BESStopWatch.h"
35#include "BESLog.h"
36#include "BESSyntaxUserError.h"
37#include "BESInternalError.h"
38#include "BESDebug.h"
39#include "BESContextManager.h"
40#include "BESUtil.h"
41#include "CurlUtils.h"
42#include "RemoteResource.h"
43
44#include "S3RequestHandler.h"
45#include "S3Container.h"
46#include "S3Names.h"
47
48#define prolog std::string("S3Container::").append(__func__).append("() - ")
49
50using namespace std;
51
52namespace s3 {
53
54void S3Container::_duplicate(S3Container &copy_to)
55{
56 if (d_dmrpp_rresource) {
57 throw BESInternalError("The Container has already been accessed, cannot create a copy of this container.",
58 __FILE__, __LINE__);
59 }
60
61 copy_to.d_dmrpp_rresource = d_dmrpp_rresource;
63}
64
65void S3Container::initialize()
66{
67 BESDEBUG(MODULE, prolog << "sym_name: " << get_symbolic_name() << endl);
68 BESDEBUG(MODULE, prolog << "real_name: " << get_real_name() << endl);
69 BESDEBUG(MODULE, prolog << "type: " << get_container_type() << endl);
70
71 if (get_container_type().empty())
72 set_container_type(S3_NAME);
73
74 bool found;
75 string uid = BESContextManager::TheManager()->get_context(EDL_UID_KEY, found);
76 BESDEBUG(MODULE, prolog << "EDL_UID_KEY(" << EDL_UID_KEY << "): " << uid << endl);
77
78 // Because we know the name is really a URL, then we know the "relative_name" is meaningless
79 // So we set it to be the same as "name"
81}
82
93S3Container::S3Container(const string &sym_name, const string &real_name, const string &type) :
94 BESContainer(sym_name, real_name, type)
95{
96 initialize();
97}
98
101{
102 auto container = new S3Container;
103 _duplicate(*container);
104 return container;
105}
106
107#if 0
108
109S3Container::~S3Container()
110{
111#if 0
112 if (d_dmrpp_rresource) {
113 release();
114 }
115#endif
116}
117
118#endif
119
131void S3Container::filter_response(const map<string, string, std::less<>> &content_filters) const {
132
133 string resource_content = BESUtil::file_to_string(d_dmrpp_rresource->get_filename());
134
135 for (const auto &apair: content_filters) {
136 unsigned int replace_count = BESUtil::replace_all(resource_content, apair.first, apair.second);
137 BESDEBUG(MODULE, prolog << "Replaced " << replace_count << " instance(s) of template(" <<
138 apair.first << ") with " << apair.second << " in cached RemoteResource" << endl);
139 }
140
141 // This call will invalidate the file descriptor of the RemoteResource. jhrg 3/9/23
142 BESUtil::string_to_file(d_dmrpp_rresource->get_filename(), resource_content);
143}
144
151{
152 if (!d_dmrpp_rresource) {
153 BESDEBUG(MODULE, prolog << "Building new RemoteResource (dmr++)." << endl);
154
155 // Since this is S3 we know that the real_name is a URL.
156 const string data_access_url_str = get_real_name();
157
158 // And we know that the dmr++ file should be "right next to it" (side-car)
159 const string dmrpp_url_str = data_access_url_str + ".dmrpp";
160
161 // And if there's a missing data file (side-car) it should be "right there" too.
162 const string missing_data_url_str = data_access_url_str+ ".missing";
163
164 BESDEBUG(MODULE, prolog << " data_access_url: " << data_access_url_str << endl);
165 BESDEBUG(MODULE, prolog << " dmrpp_url: " << dmrpp_url_str << endl);
166 BESDEBUG(MODULE, prolog << "missing_data_url: " << missing_data_url_str << endl);
167
168 const string href = R"(href=")";
169
170 const string data_access_url_key = href + DATA_ACCESS_URL_KEY + R"(")";
171 const string missing_data_access_url_key = href + MISSING_DATA_ACCESS_URL_KEY + R"(")";
172
173 const string trusted_url_hack = R"(" dmrpp:trust="true")";
174
175 const string data_access_url_with_trusted_attr_str = href + data_access_url_str + trusted_url_hack;
176 const string missing_data_url_with_trusted_attr_str = href + missing_data_url_str + trusted_url_hack;
177
178 BESDEBUG(MODULE, prolog << " data_access_url_key: " << data_access_url_key << endl);
179 BESDEBUG(MODULE, prolog << " data_access_url_trusted: " << data_access_url_with_trusted_attr_str << endl);
180 BESDEBUG(MODULE, prolog << "missing_data_access_url_key: " << missing_data_access_url_key << endl);
181 BESDEBUG(MODULE, prolog << " missing_data_url_trusted: " << missing_data_url_with_trusted_attr_str << endl);
182
183 auto dmrpp_url = std::make_shared<http::url>(dmrpp_url_str, true);
184
185 {
186 // This scope is here because of the BESStopWatch. jhrg 10/18/22
187 //d_dmrpp_rresource = new http::RemoteResource(dmrpp_url);
188 d_dmrpp_rresource = std::make_shared<http::RemoteResource>(dmrpp_url);
189
190 BES_STOPWATCH_START(MODULE, prolog + "Timing DMR++ retrieval. Target url: " + dmrpp_url->str());
191 d_dmrpp_rresource->retrieve_resource();
192
193 // Substitute the data_access_url and missing_data_access_url in the dmr++ file.
194 map<string, string, std::less<>> content_filters;
195 if (S3RequestHandler::d_inject_data_url) {
196 content_filters.insert(pair<string, string>(data_access_url_key, data_access_url_with_trusted_attr_str));
197 content_filters.insert(pair<string, string>(missing_data_access_url_key, missing_data_url_with_trusted_attr_str));
198 }
199
200 filter_response(content_filters);
201 }
202
203 BESDEBUG(MODULE, prolog << "Done retrieving: " << dmrpp_url->str() << " returning cached file "
204 << d_dmrpp_rresource->get_filename() << endl);
205 }
206
207 const auto type = d_dmrpp_rresource->get_type();
208 set_container_type(type);
209
210 BESDEBUG(MODULE, prolog << "Type: " << type << endl);
211 BESDEBUG(MODULE, prolog << "END (obj_addr: " << (void *) this << ")" << endl);
212
213 return d_dmrpp_rresource->get_filename(); // this should return the dmr++ file name for the temporary file
214}
215
224{
225 if (d_dmrpp_rresource) {
226 // delete d_dmrpp_rresource;
227 d_dmrpp_rresource = nullptr;
228 }
229
230 return true;
231}
232
240void S3Container::dump(ostream &strm) const
241{
242 strm << BESIndent::LMarg << "S3Container::dump - (" << (void *) this << ")" << endl;
243 BESIndent::Indent();
244 BESContainer::dump(strm);
245 if (d_dmrpp_rresource) {
246 strm << BESIndent::LMarg << "RemoteResource.getCacheFileName(): " << d_dmrpp_rresource->get_filename()
247 << endl;
248 strm << BESIndent::LMarg << "response headers: ";
249 }
250 else {
251 strm << BESIndent::LMarg << "response not yet obtained" << endl;
252 }
253 BESIndent::UnIndent();
254}
255
256} // s3 namespace
A container is something that holds data. E.G., a netcdf file or a database entry.
void set_container_type(const std::string &type)
set the type of data that this container represents, such as cedar or netcdf.
std::string get_symbolic_name() const
retrieve the symbolic name for this container
void dump(std::ostream &strm) const override
dumps information about this object
std::string get_container_type() const
retrieve the type of data this container holds, such as cedar or netcdf.
void set_relative_name(const std::string &relative)
Set the relative name of the object in this container.
void _duplicate(BESContainer &copy_to)
duplicate this instance into the passed container
std::string get_real_name() const
retrieve the real name for this container, such as a file name.
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
Definition BESUtil.cc:924
static void string_to_file(const std::string &filename, const std::string &content)
Write a string to a file.
Definition BESUtil.cc:1321
Container representing a remote request.
Definition S3Container.h:48
std::string access() override
access the remote target response by making the remote request
bool release() override
release the resources
BESContainer * ptr_duplicate() override
pure abstract method to duplicate this instances of BESContainer
void dump(std::ostream &strm) const override
dumps information about this object
void filter_response(const std::map< std::string, std::string, std::less<> > &content_filters) const
Filter the cached resource. Each key in content_filters is replaced with its associated map value.