33#include "BESNotFoundError.h"
34#include "BESSyntaxUserError.h"
35#include "BESInternalError.h"
38#include "TheBESKeys.h"
47#define prolog string("NgapApi::").append(__func__).append("() - ")
51const unsigned int REFRESH_THRESHOLD = 3600;
59std::string NgapApi::get_cmr_search_endpoint_url() {
60 static string cmr_search_endpoint_url;
61 if (cmr_search_endpoint_url.empty()) {
64 DEFAULT_CMR_SEARCH_ENDPOINT_PATH);
68 return cmr_search_endpoint_url;
78std::string NgapApi::build_cmr_query_url_old_rpath_format(
const std::string &restified_path) {
81 string r_path = (restified_path[0] !=
'/' ?
"/" :
"") + restified_path;
83 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
84 if (provider_index == string::npos) {
86 msg << prolog <<
"The specified path '" << r_path <<
"'";
87 msg <<
" does not contain the required path element '" << NGAP_PROVIDERS_KEY <<
"'";
88 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
90 if (provider_index != 0) {
92 msg << prolog <<
"The specified path '" << r_path <<
"'";
93 msg <<
" has the path element '" << NGAP_PROVIDERS_KEY <<
"' located in the incorrect position (";
94 msg << provider_index <<
") expected 0.";
95 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
97 provider_index += string(NGAP_PROVIDERS_KEY).size();
99 bool use_collection_concept_id =
false;
100 size_t collection_index = r_path.find(NGAP_COLLECTIONS_KEY);
101 if (collection_index == string::npos) {
102 size_t concepts_index = r_path.find(NGAP_CONCEPTS_KEY);
103 if (concepts_index == string::npos) {
105 msg << prolog <<
"The specified path '" << r_path <<
"'";
106 msg <<
" contains neither the '" << NGAP_COLLECTIONS_KEY <<
"'";
107 msg <<
" nor the '" << NGAP_CONCEPTS_KEY <<
"'";
108 msg <<
" key, one must be provided.";
109 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
111 collection_index = concepts_index;
112 use_collection_concept_id =
true;
114 if (collection_index <= provider_index + 1) {
116 msg << prolog <<
"The specified path '" << r_path <<
"'";
117 msg <<
" has the path element '" << (use_collection_concept_id ? NGAP_CONCEPTS_KEY : NGAP_COLLECTIONS_KEY)
118 <<
"' located in the incorrect position (";
119 msg << collection_index <<
") expected at least " << provider_index + 1;
120 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
122 string provider = r_path.substr(provider_index, collection_index - provider_index);
123 collection_index += use_collection_concept_id ? string(NGAP_CONCEPTS_KEY).size() : string(
124 NGAP_COLLECTIONS_KEY).size();
126 size_t granule_index = r_path.find(NGAP_GRANULES_KEY);
127 if (granule_index == string::npos) {
129 msg << prolog <<
"The specified path '" << r_path <<
"'";
130 msg <<
" does not contain the required path element '" << NGAP_GRANULES_KEY <<
"'";
131 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
133 if (granule_index <= collection_index + 1) {
135 msg << prolog <<
"The specified path '" << r_path <<
"'";
136 msg <<
" has the path element '" << NGAP_GRANULES_KEY <<
"' located in the incorrect position (";
137 msg << granule_index <<
") expected at least " << collection_index + 1;
138 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
140 string collection = r_path.substr(collection_index, granule_index - collection_index);
141 granule_index += string(NGAP_GRANULES_KEY).size();
144 string granule = r_path.substr(granule_index);
147 string cmr_url = get_cmr_search_endpoint_url() +
"?";
150 CURL *ceh = curl_easy_init();
151 char *esc_url_content;
154 esc_url_content = curl_easy_escape(ceh, provider.c_str(), provider.size());
155 cmr_url += string(CMR_PROVIDER).append(
"=").append(esc_url_content).append(
"&");
156 curl_free(esc_url_content);
158 esc_url_content = curl_easy_escape(ceh, collection.c_str(), collection.size());
159 if (use_collection_concept_id) {
161 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append(
"=").append(esc_url_content).append(
"&");
164 cmr_url += string(CMR_ENTRY_TITLE).append(
"=").append(esc_url_content).append(
"&");
167 curl_free(esc_url_content);
169 esc_url_content = curl_easy_escape(ceh, granule.c_str(), granule.size());
170 cmr_url += string(CMR_GRANULE_UR).append(
"=").append(esc_url_content);
171 curl_free(esc_url_content);
173 curl_easy_cleanup(ceh);
195std::string NgapApi::build_cmr_query_url(
const std::string &restified_path) {
198 string r_path = (restified_path[0] !=
'/' ?
"/" :
"") + restified_path;
200 size_t provider_index = r_path.find(NGAP_PROVIDERS_KEY);
201 if (provider_index != string::npos) {
202 return build_cmr_query_url_old_rpath_format(restified_path);
205 size_t collections_key_index = r_path.find(NGAP_COLLECTIONS_KEY);
206 if (collections_key_index == string::npos) {
208 msg << prolog <<
"The specified path '" << r_path <<
"'";
209 msg <<
" contains neither the '" << NGAP_COLLECTIONS_KEY <<
"'";
210 msg <<
" nor the '" << NGAP_CONCEPTS_KEY <<
"'";
211 msg <<
" one must be provided.";
212 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
214 if (collections_key_index != 0) {
216 msg << prolog <<
"The specified path '" << r_path <<
"'";
217 msg <<
" has the path element '" << NGAP_COLLECTIONS_KEY <<
"' located in the incorrect position (";
218 msg << collections_key_index <<
") expected at least " << provider_index + 1;
219 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
222 size_t collections_index = collections_key_index + string(NGAP_COLLECTIONS_KEY).size();
224 size_t granules_key_index = r_path.find(NGAP_GRANULES_KEY);
225 if (granules_key_index == string::npos) {
227 msg << prolog <<
"The specified path '" << r_path <<
"'";
228 msg <<
" does not contain the required path element '" << NGAP_GRANULES_KEY <<
"'";
229 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
234 if (granules_key_index <= collections_index + 1) {
236 msg << prolog <<
"The specified path '" << r_path <<
"'";
237 msg <<
" has the path element '" << NGAP_GRANULES_KEY <<
"' located in the incorrect position (";
238 msg << granules_key_index <<
") expected at least " << collections_index + 1;
239 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
241 size_t granules_index = granules_key_index + string(NGAP_GRANULES_KEY).size();
243 string granule_name = r_path.substr(granules_index);
247 string collection_name = r_path.substr(collections_index, granules_key_index - collections_index);
253 string optional_part;
254 size_t slash_pos = collection_name.find(
'/');
255 if (slash_pos != string::npos) {
256 optional_part = collection_name.substr(slash_pos);
257 BESDEBUG(MODULE, prolog <<
"Found optional collections name component: " << optional_part << endl);
258 collection_name = collection_name.substr(0, slash_pos);
260 BESDEBUG(MODULE, prolog <<
"Found collection_name (aka collection_concept_id): " << collection_name << endl);
263 string cmr_url = get_cmr_search_endpoint_url() +
"?";
266 CURL *ceh = curl_easy_init();
267 char *esc_url_content;
269 esc_url_content = curl_easy_escape(ceh, collection_name.c_str(), collection_name.size());
270 cmr_url += string(CMR_COLLECTION_CONCEPT_ID).append(
"=").append(esc_url_content).append(
"&");
271 curl_free(esc_url_content);
273 esc_url_content = curl_easy_escape(ceh, granule_name.c_str(), granule_name.size());
274 cmr_url += string(CMR_GRANULE_UR).append(
"=").append(esc_url_content);
275 curl_free(esc_url_content);
277 curl_easy_cleanup(ceh);
298std::string NgapApi::find_get_data_url_in_granules_umm_json_v1_4(
const std::string &rest_path,
299 rapidjson::Document &cmr_granule_response) {
300 const rapidjson::Value &val = cmr_granule_response[
"hits"];
301 int hits = val.GetInt();
303 throw BESNotFoundError(
string(
"The specified path '") + rest_path
304 +
"' does not identify a granule in CMR.", __FILE__, __LINE__);
307 rapidjson::Value &items = cmr_granule_response[
"items"];
308 if (!items.IsArray()) {
309 throw BESInternalError(
string(
"ERROR! The CMR response did not contain the data URL information: ")
310 + rest_path, __FILE__, __LINE__);
313 if (BESISDEBUG(MODULE)) {
315 const string RJ_TYPE_NAMES[] = {string(
"kNullType"), string(
"kFalseType"), string(
"kTrueType"),
316 string(
"kObjectType"), string(
"kArrayType"), string(
"kStringType"),
317 string(
"kNumberType")};
318 for (rapidjson::SizeType i = 0; i < items.Size(); i++)
319 ss <<
"items[" << i <<
"]: " << RJ_TYPE_NAMES[items[i].GetType()] << endl;
320 BESDEBUG(MODULE, prolog <<
"items size: " << items.Size() << endl << ss.str() << endl);
323 rapidjson::Value &items_obj = items[0];
324 auto mitr = items_obj.FindMember(
"umm");
326 rapidjson::Value &umm = mitr->value;
327 mitr = umm.FindMember(
"RelatedUrls");
328 if (mitr == umm.MemberEnd()) {
329 throw BESInternalError(
"Error! The umm/RelatedUrls object was not located!", __FILE__, __LINE__);
332 rapidjson::Value &related_urls = mitr->value;
334 if (!related_urls.IsArray()) {
335 throw BESNotFoundError(
"Error! The RelatedUrls object in the CMR response is not an array!", __FILE__,
339 BESDEBUG(MODULE, prolog <<
" Found RelatedUrls array in CMR response." << endl);
341 string data_access_url;
342 for (rapidjson::SizeType i = 0; i < related_urls.Size() && data_access_url.empty(); i++) {
343 rapidjson::Value &obj = related_urls[i];
344 mitr = obj.FindMember(
"URL");
345 if (mitr == obj.MemberEnd()) {
347 err <<
"Error! The umm/RelatedUrls[" << i <<
"] does not contain the URL object";
348 throw BESInternalError(err.str(), __FILE__, __LINE__);
351 const rapidjson::Value &r_url = mitr->value;
353 mitr = obj.FindMember(
"Type");
354 if (mitr == obj.MemberEnd()) {
356 err <<
"Error! The umm/RelatedUrls[" << i <<
"] does not contain the Type object";
357 throw BESInternalError(err.str(), __FILE__, __LINE__);
360 const rapidjson::Value &r_type = mitr->value;
362 bool noSubtype = obj.FindMember(
"Subtype") == obj.MemberEnd();
364 BESDEBUG(MODULE, prolog <<
"RelatedUrl Object:" <<
365 " URL: '" << r_url.GetString() <<
"'" <<
366 " Type: '" << r_type.GetString() <<
"'" <<
367 " SubType: '" << (noSubtype ?
"Absent" :
"Present") <<
"'" << endl);
369 if ((r_type.GetString() ==
string(CMR_URL_TYPE_GET_DATA)) && noSubtype) {
374 string candidate_url = r_url.GetString();
376 if ((candidate_url.rfind(
"https://", 0) == 0 || candidate_url.rfind(
"http://", 0) == 0)
377 && candidate_url.find(
".xml", candidate_url.size()-5) == string::npos) {
378 data_access_url = candidate_url;
383 if (data_access_url.empty()) {
384 throw BESInternalError(
string(
"ERROR! Failed to locate a data access URL for the path: ") + rest_path,
388 return data_access_url;
415 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
416 string data_access_url;
418 string cmr_query_url = build_cmr_query_url(restified_path);
420 BESDEBUG(MODULE, prolog <<
"CMR Request URL: " << cmr_query_url << endl);
422 string cmr_json_string;
424 curl::http_get(cmr_query_url, cmr_json_string);
427 string err_msg = prolog +
"Hyrax encountered a Service Chaining Error while "
428 "attempting to retrieve a CMR record. " + http_error.
get_message();
433 rapidjson::Document cmr_response;
434 cmr_response.Parse(cmr_json_string.c_str());
435 data_access_url = find_get_data_url_in_granules_umm_json_v1_4(restified_path, cmr_response);
437 BESDEBUG(MODULE, prolog <<
"END (data_access_url: " << data_access_url <<
")" << endl);
439 return data_access_url;
456 BESDEBUG(MODULE, prolog <<
"now: " << now << endl);
458 time_t expires = now;
461 time_t ingest_time = signed_url.ingest_time();
464 if (!cf_expires.empty()) {
465 expires = stoll(cf_expires);
466 BESDEBUG(MODULE, prolog <<
"Using " << CLOUDFRONT_EXPIRES_HEADER_KEY <<
": " << expires << endl);
467 }
else if (!aws_expires.empty()) {
471 time_t start_time = ingest_time;
474 if (!aws_date.empty()) {
475 string year = aws_date.substr(0, 4);
476 string month = aws_date.substr(4, 2);
477 string day = aws_date.substr(6, 2);
478 string hour = aws_date.substr(9, 2);
479 string minute = aws_date.substr(11, 2);
480 string second = aws_date.substr(13, 2);
482 BESDEBUG(MODULE, prolog <<
"date: " << aws_date <<
483 " year: " << year <<
" month: " << month <<
" day: " << day <<
484 " hour: " << hour <<
" minute: " << minute <<
" second: " << second << endl);
487 if (gmtime_r(&now, &ti) ==
nullptr)
488 throw BESInternalError(
"Could not get the current time, gmtime_r() failed!", __FILE__, __LINE__);
489 ti.tm_year = stoi(year) - 1900;
490 ti.tm_mon = stoi(month) - 1;
491 ti.tm_mday = stoi(day);
492 ti.tm_hour = stoi(hour);
493 ti.tm_min = stoi(minute);
494 ti.tm_sec = stoi(second);
496 BESDEBUG(MODULE, prolog <<
"ti.tm_year: " << ti.tm_year <<
497 " ti.tm_mon: " << ti.tm_mon <<
498 " ti.tm_mday: " << ti.tm_mday <<
499 " ti.tm_hour: " << ti.tm_hour <<
500 " ti.tm_min: " << ti.tm_min <<
501 " ti.tm_sec: " << ti.tm_sec << endl);
503 start_time = mktime(&ti);
504 BESDEBUG(MODULE, prolog <<
"AWS (computed) start_time: " << start_time << endl);
507 expires = start_time + stoll(aws_expires);
508 BESDEBUG(MODULE, prolog <<
"Using " << AMS_EXPIRES_HEADER_KEY <<
": " << aws_expires <<
509 " (expires: " << expires <<
")" << endl);
514 time_t remaining = expires - now;
515 BESDEBUG(MODULE, prolog <<
"expires_time: " << expires <<
516 " remaining_time: " << remaining <<
517 " refresh_threshold: " << REFRESH_THRESHOLD << endl);
519 is_expired = remaining < REFRESH_THRESHOLD;
520 BESDEBUG(MODULE, prolog <<
"is_expired: " << (is_expired ?
"true" :
"false") << endl);
std::string get_message() const
get the error message for this exception
void set_message(const std::string &msg)
set the error message for this exception
exception thrown if internal error encountered
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
static TheBESKeys * TheKeys()
Access to the singleton.
static std::string read_string_key(const std::string &key, const std::string &default_value)
Read a string-valued key from the bes.conf file.
Parse a URL into the protocol, host, path and query parts.
virtual std::string query_parameter_value(const std::string &key) const
Get the value of a query string key.
static bool signed_url_is_expired(const http::url &signed_url)
Has the signed S3 URL expired? If neither the CloudFront Expires header nor the AWS Expires header ar...
static std::string convert_ngap_resty_path_to_data_access_url(const std::string &restified_path)
Converts an NGAP restified granule path into a CMR metadata query for the granule.