39#include "BESStopWatch.h"
42#include "BESContextManager.h"
45#include "TheBESKeys.h"
46#include "BESSyntaxUserError.h"
49#include "NgapRequestHandler.h"
50#include "NgapOwnedContainer.h"
54#define prolog std::string("NgapOwnedContainer::").append(__func__).append("() - ")
56#define CACHE_LOG(x) INFO_LOG(x)
65std::string NgapOwnedContainer::d_data_source_location =
"https://cloudydap.s3.us-east-1.amazonaws.com";
66bool NgapOwnedContainer::d_use_opendap_bucket =
true;
67bool NgapOwnedContainer::d_inject_data_url =
true;
80NgapOwnedContainer::NgapOwnedContainer(
const string &sym_name,
const string &real_name,
const string &)
81 : BESContainer(sym_name, real_name,
"owned-ngap"), d_ngap_path(real_name) {
82 NgapOwnedContainer::d_data_source_location
84 NgapOwnedContainer::d_use_opendap_bucket
86 NgapOwnedContainer::d_inject_data_url
96bool NgapOwnedContainer::file_to_string(
int fd,
string &content) {
98 struct stat statbuf = {};
99 if (fstat(fd, &statbuf) < 0) {
100 ERROR_LOG(
"NgapOwnedContainer::file_to_string() - failed to get file descriptor status\n");
105 vector<char> buffer(4096);
107 while ((bytes_read = read(fd, buffer.data(), buffer.size())) > 0) {
108 content.append(buffer.data(), bytes_read);
112 if (statbuf.st_size != content.size()) {
113 ERROR_LOG(
"NgapOwnedContainer::file_to_string() - failed to read all bytes from file cache\n");
133string NgapOwnedContainer::build_data_url_to_daac_bucket(
const string &rest_path) {
134 BES_MODULE_TIMING(prolog + rest_path);
137 string uid = BESContextManager::TheManager()->
get_context(EDL_UID_KEY, found);
138 BESDEBUG(MODULE, prolog <<
"EDL_UID_KEY(" << EDL_UID_KEY <<
"): " << uid << endl);
141 string url_key = rest_path +
':' + uid;
143 if (NgapRequestHandler::d_use_cmr_cache) {
144 if (NgapRequestHandler::d_cmr_mem_cache.get(url_key, data_url)) {
145 CACHE_LOG(prolog +
"CMR Cache hit, translated URL: " + data_url +
'\n');
148 CACHE_LOG(prolog +
"CMR Cache miss, REST path: " + url_key +
'\n');
156 if (NgapRequestHandler::d_use_cmr_cache) {
157 NgapRequestHandler::d_cmr_mem_cache.put(url_key, data_url);
158 CACHE_LOG(prolog +
"CMR Cache put, translated URL: " + data_url +
'\n');
170string NgapOwnedContainer::build_dmrpp_url_to_owned_bucket(
const string &rest_path,
const string &data_source) {
174 BES_MODULE_TIMING(prolog + rest_path);
177 if (parts.size() != 4 || parts[0] !=
"collections" || parts[2] !=
"granules") {
178 throw BESSyntaxUserError(
"Invalid NGAP path: " + rest_path, __FILE__, __LINE__);
181 string dmrpp_name = parts[1] +
'/' + parts[3] +
".dmrpp";
187 string dmrpp_url_str = data_source +
'/' + dmrpp_name;
189 return dmrpp_url_str;
192bool NgapOwnedContainer::get_item_from_dmrpp_cache(
string &dmrpp_string)
const {
195 if (NgapRequestHandler::d_dmrpp_mem_cache.get(
get_real_name(), dmrpp_string)) {
196 CACHE_LOG(prolog +
"Memory Cache hit, DMR++: " +
get_real_name() +
'\n');
200 CACHE_LOG(prolog +
"Memory Cache miss, DMR++: " +
get_real_name() +
'\n');
206 FileCache::Item item;
209 CACHE_LOG(prolog +
"File Cache hit, DMR++: " +
get_real_name() +
'\n');
210 if (file_to_string(item.get_fd(), dmrpp_string)) {
212 NgapRequestHandler::d_dmrpp_mem_cache.put(
get_real_name(), dmrpp_string);
213 CACHE_LOG(prolog +
"Memory Cache put, DMR++: " +
get_real_name() +
'\n');
217 ERROR_LOG(prolog +
"Failed to read DMR++ from file cache\n");
222 CACHE_LOG(prolog +
"File Cache miss, DMR++: " +
get_real_name() +
'\n');
228bool NgapOwnedContainer::put_item_in_dmrpp_cache(
const std::string &dmrpp_string)
const
231 CACHE_LOG(prolog +
"File Cache put, DMR++: " +
get_real_name() +
'\n');
235 CACHE_LOG(prolog +
"Failed to put DMR++ in file cache\n");
239 if (!NgapRequestHandler::d_dmrpp_file_cache.purge()) {
240 ERROR_LOG(prolog +
"Call to FileCache::purge() failed\n");
243 NgapRequestHandler::d_dmrpp_mem_cache.put(
get_real_name(), dmrpp_string);
244 CACHE_LOG(prolog +
"Memory Cache put, DMR++: " +
get_real_name() +
'\n');
261void NgapOwnedContainer::filter_response(
const map <
string,
string, std::less<>> &content_filters,
string &content) {
262 for (
const auto &filter: content_filters) {
264 BESDEBUG(MODULE, prolog <<
"Replaced " << replace_count <<
" instance(s) of template(" << filter.first
265 <<
") with " << filter.second <<
" in cached RemoteResource" << endl);
275bool NgapOwnedContainer::get_daac_content_filters(
const string &data_url, map<
string,
string, std::less<>> &content_filters) {
276 if (NgapOwnedContainer::d_inject_data_url) {
278 const string missing_data_url_str = data_url +
"_mvs.h5";
279 const string href = R
"(href=")";
280 const string trusted_url_hack = R
"(" dmrpp:trust="true")";
281 const string data_access_url_key = href + DATA_ACCESS_URL_KEY +
"\"";
282 const string data_access_url_with_trusted_attr_str = href + data_url + trusted_url_hack;
283 const string missing_data_access_url_key = href + MISSING_DATA_ACCESS_URL_KEY +
"\"";
284 const string missing_data_url_with_trusted_attr_str = href + missing_data_url_str + trusted_url_hack;
286 content_filters.clear();
287 content_filters.insert(pair<string, string>(data_access_url_key, data_access_url_with_trusted_attr_str));
288 content_filters.insert(pair<string, string>(missing_data_access_url_key, missing_data_url_with_trusted_attr_str));
303bool NgapOwnedContainer::get_opendap_content_filters(map<
string,
string, std::less<>> &content_filters) {
304 if (NgapOwnedContainer::d_inject_data_url) {
306 const string version_attribute =
"dmrpp:version";
307 const string trusted_attribute = R
"(dmrpp:trust="true" )";
309 const string trusted_and_version = trusted_attribute + version_attribute;
311 content_filters.clear();
312 content_filters.insert(pair<string, string>(version_attribute, trusted_and_version));
326bool NgapOwnedContainer::dmrpp_read_from_opendap_bucket(
string &dmrpp_string)
const {
328 bool dmrpp_read =
false;
330 string dmrpp_url_str = build_dmrpp_url_to_owned_bucket(
get_real_name(), get_data_source_location());
331 INFO_LOG(prolog +
"Look in the OPeNDAP-bucket for the DMRpp for: " + dmrpp_url_str);
332 curl::http_get(dmrpp_url_str, dmrpp_string);
333 map <string, string, std::less<>> content_filters;
334 if (!get_opendap_content_filters(content_filters)) {
335 throw BESInternalError(
"Could not build opendap content filters for DMR++", __FILE__, __LINE__);
337 filter_response(content_filters, dmrpp_string);
338 INFO_LOG(prolog +
"Found the DMRpp in the OPeNDAP-bucket for: " + dmrpp_url_str);
341 catch (http::HttpError &http_error) {
348 switch (http_error.http_status()) {
352 ERROR_LOG(prolog +
"Looked in the OPeNDAP bucket for the DMRpp for: " +
get_real_name()
353 +
" but got HTTP Status: " + std::to_string(http_error.http_status()));
354 dmrpp_string.clear();
359 dmrpp_string.clear();
365 +
". This error for a OPeNDAP-owned DMR++ could be from Hyrax or S3.");
378void NgapOwnedContainer::dmrpp_read_from_daac_bucket(
string &dmrpp_string)
const {
381 string data_url = build_data_url_to_daac_bucket(
get_real_name());
382 string dmrpp_url_str = data_url +
".dmrpp";
383 INFO_LOG(prolog +
"Look in the DAAC-bucket for the DMRpp for: " + dmrpp_url_str);
386 curl::http_get(dmrpp_url_str, dmrpp_string);
388 map <string, string, std::less<>> content_filters;
389 if (!get_daac_content_filters(data_url, content_filters)) {
390 throw BESInternalError(
"Could not build content filters for DMR++", __FILE__, __LINE__);
392 filter_response(content_filters, dmrpp_string);
393 INFO_LOG(prolog +
"Found the DMRpp in the DAAC-bucket for: " + dmrpp_url_str);
395 catch (http::HttpError &http_error) {
396 http_error.
set_message(http_error.
get_message() +
"NgapOwnedContainer::dmrpp_read_from_daac_bucket() failed to read the DMR++ from S3.");
416bool NgapOwnedContainer::get_dmrpp_from_cache_or_remote_source(
string &dmrpp_string)
const {
420 if (NgapRequestHandler::d_use_dmrpp_cache && get_item_from_dmrpp_cache(dmrpp_string)) {
426 bool dmrpp_read =
false;
429 if (NgapOwnedContainer::d_use_opendap_bucket) {
432 dmrpp_read = dmrpp_read_from_opendap_bucket(dmrpp_string);
437 dmrpp_read_from_daac_bucket(dmrpp_string);
443 if (NgapRequestHandler::d_use_dmrpp_cache && !put_item_in_dmrpp_cache(dmrpp_string)) {
467 get_dmrpp_from_cache_or_remote_source(dmrpp_string);
485 strm << BESIndent::LMarg <<
"NgapOwnedContainer::dump - (" << (
void *)
this <<
")\n";
488 BESIndent::UnIndent();
void set_container_type(const std::string &type)
set the type of data that this container represents, such as cedar or netcdf.
void set_attributes(const std::string &attrs)
set desired attributes for this container
void dump(std::ostream &strm) const override
dumps information about this object
std::string get_real_name() const
retrieve the real name for this container, such as a file name.
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
std::string get_message() const
get the error message for this exception
void set_message(const std::string &msg)
set the error message for this exception
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
static std::string hash_key(const std::string &key, bool log_it=false)
Return a SHA256 hash of the given key.
static bool read_bool_key(const std::string &key, bool default_value)
Read a boolean-valued key from the bes.conf file.
static std::string read_string_key(const std::string &key, const std::string &default_value)
Read a string-valued key from the bes.conf file.
static std::string convert_ngap_resty_path_to_data_access_url(const std::string &restified_path)
Converts an NGAP restified granule path into a CMR metadata query for the granule.
void dump(std::ostream &strm) const override
dumps information about this object
std::string access() override
Get the DMR++ from a remote source or a local cache.