38#include "BESContextManager.h"
39#include "BESSyntaxUserError.h"
40#include "BESInternalError.h"
44#include "TheBESKeys.h"
46#include "BESStopWatch.h"
51#include "ProxyConfig.h"
52#include "AllowedHosts.h"
54#include "CredentialsManager.h"
55#include "AccessCredentials.h"
56#include "RequestServiceTimer.h"
62#define CURL_TIMING "curl:timing"
68#define prolog std::string("CurlUtils::").append(__func__).append("() - ")
72static void super_easy_perform(CURL *c_handle,
int fd);
74const unsigned int retry_limit = 3;
75const useconds_t url_retry_time = 250'000;
78const int curl_trace = 0;
80const int CLIENT_ERR_MIN = 400;
81const int CLIENT_ERR_MAX = 417;
82const vector <string> http_client_errors = {
84 "Unauthorized: Contact the server administrator.",
86 "Forbidden: Contact the server administrator.",
87 "Not Found: The underlying data source or server could not be found.",
88 "Method Not Allowed.",
90 "Proxy Authentication Required.",
95 "Precondition Failed.",
96 "Request Entity Too Large.",
97 "Request URI Too Large.",
98 "Unsupported Media Type.",
99 "Requested Range Not Satisfiable.",
100 "Expectation Failed."
103const int SERVER_ERR_MIN = 500;
104const int SERVER_ERR_MAX = 505;
105const vector <string> http_server_errors = {
106 "Internal Server Error.",
109 "Service Unavailable.",
111 "HTTP Version Not Supported."
122static string http_code_to_string(
long code) {
123 if (code >= CLIENT_ERR_MIN && code <= CLIENT_ERR_MAX)
124 return {http_client_errors[code - CLIENT_ERR_MIN]};
125 else if (code >= SERVER_ERR_MIN && code <= SERVER_ERR_MAX)
126 return {http_server_errors[code - SERVER_ERR_MIN]};
128 return {
"Unknown HTTP Error: " + to_string(code)};
137static string getCurlAuthTypeName(
unsigned long auth_type) {
139 string authTypeString;
142 match = auth_type & CURLAUTH_BASIC;
144 authTypeString +=
"CURLAUTH_BASIC";
147 match = auth_type & CURLAUTH_DIGEST;
149 if (!authTypeString.empty())
150 authTypeString +=
" ";
151 authTypeString +=
"CURLAUTH_DIGEST";
154 match = auth_type & CURLAUTH_DIGEST_IE;
156 if (!authTypeString.empty())
157 authTypeString +=
" ";
158 authTypeString +=
"CURLAUTH_DIGEST_IE";
161 match = auth_type & CURLAUTH_GSSNEGOTIATE;
163 if (!authTypeString.empty())
164 authTypeString +=
" ";
165 authTypeString +=
"CURLAUTH_GSSNEGOTIATE";
168 match = auth_type & CURLAUTH_NTLM;
170 if (!authTypeString.empty())
171 authTypeString +=
" ";
172 authTypeString +=
"CURLAUTH_NTLM";
175 return authTypeString;
182#define CURL_WRITE_TO_FILE_TIMEOUT_MSG "The function curl::writeToOpenFileDescriptor() was unable to complete the download process because it ran out of time."
184static size_t writeToOpenFileDescriptor(
const char *data,
size_t ,
size_t nmemb,
const void *userdata) {
186 const auto fd =
static_cast<const int *
>(userdata);
188 BESDEBUG(MODULE, prolog <<
"Bytes received: " << nmemb << endl);
189 size_t bytes_written = write(*fd, data, nmemb);
190 BESDEBUG(MODULE, prolog <<
" Bytes written: " << bytes_written << endl);
195 return bytes_written;
221static size_t save_http_response_headers(
void *ptr,
size_t size,
size_t nmemb,
void *resp_hdrs) {
222 BESDEBUG(MODULE, prolog <<
"Inside the header parser." << endl);
223 auto hdrs =
static_cast<vector<string> *
>(resp_hdrs);
226 string complete_line;
227 if (nmemb > 1 && *(
static_cast<char *
>(ptr) + size * (nmemb - 2)) ==
'\r')
228 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 2));
230 complete_line.assign(
static_cast<char *
>(ptr), size * (nmemb - 1));
233 if (!complete_line.empty() && complete_line.find(
"HTTP") == string::npos) {
234 BESDEBUG(MODULE, prolog <<
"Header line: " << complete_line << endl);
235 hdrs->push_back(complete_line);
248static int curl_debug(
const CURL *, curl_infotype info,
const char *msg,
size_t size,
const void *) {
249 string message(msg, size);
253 BESDEBUG(MODULE, prolog <<
"Text: " << message << endl);
255 case CURLINFO_HEADER_IN:
256 BESDEBUG(MODULE, prolog <<
"Header in: " << message << endl);
258 case CURLINFO_HEADER_OUT:
259 BESDEBUG(MODULE, prolog <<
"Header out: " << endl << message << endl);
261 case CURLINFO_DATA_IN:
262 BESDEBUG(MODULE, prolog <<
"Data in: " << message << endl);
264 case CURLINFO_DATA_OUT:
265 BESDEBUG(MODULE, prolog <<
"Data out: " << message << endl);
268 BESDEBUG(MODULE, prolog <<
"End: " << message << endl);
270#ifdef CURLINFO_SSL_DATA_IN
271 case CURLINFO_SSL_DATA_IN:
272 BESDEBUG(MODULE, prolog <<
"SSL Data in: " << message << endl );
break;
274#ifdef CURLINFO_SSL_DATA_OUT
275 case CURLINFO_SSL_DATA_OUT:
276 BESDEBUG(MODULE, prolog <<
"SSL Data out: " << message << endl );
break;
279 BESDEBUG(MODULE, prolog <<
"Curl info: " << message << endl);
290static void unset_error_buffer(CURL *ceh) {
291 set_error_buffer(ceh,
nullptr);
316static bool configure_curl_handle_for_proxy(CURL *ceh,
const string &target_url) {
317 BESDEBUG(MODULE, prolog <<
"BEGIN." << endl);
319 bool using_proxy = http::ProxyConfig::theOne()->is_configured();
322 BESDEBUG(MODULE, prolog <<
"Proxy has been configured..." << endl);
324 http::ProxyConfig *proxy = http::ProxyConfig::theOne();
327 string proxyHost = proxy->host();
328 int proxyPort = proxy->port();
329 string proxyPassword = proxy->proxy_password();
330 string proxyUser = proxy->user();
331 string proxyUserPW = proxy->password();
332 int proxyAuthType = proxy->auth_type();
333 string no_proxy_regex = proxy->no_proxy_regex();
340 if (!no_proxy_regex.empty()) {
341 BESDEBUG(MODULE, prolog <<
"Found NoProxyRegex." << endl);
342 BESRegex r(no_proxy_regex.c_str());
343 if (r.match(target_url.c_str(),
static_cast<int>(target_url.size())) != -1) {
345 prolog <<
"Found NoProxy match. BESRegex: " << no_proxy_regex <<
"; Url: " << target_url
353 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
355 BESDEBUG(MODULE, prolog <<
"Setting up a proxy server." << endl);
356 BESDEBUG(MODULE, prolog <<
"Proxy host: " << proxyHost << endl);
357 BESDEBUG(MODULE, prolog <<
"Proxy port: " << proxyPort << endl);
359 set_error_buffer(ceh, error_buffer.data());
361 res = curl_easy_setopt(ceh, CURLOPT_PROXY, proxyHost.data());
362 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXY", error_buffer.data(), __FILE__, __LINE__);
364 res = curl_easy_setopt(ceh, CURLOPT_PROXYPORT, proxyPort);
365 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPORT", error_buffer.data(), __FILE__, __LINE__);
374 res = curl_easy_setopt(ceh, CURLOPT_PROXYAUTH, proxyAuthType);
375 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYAUTH", error_buffer.data(), __FILE__, __LINE__);
376 BESDEBUG(MODULE, prolog <<
"Using CURLOPT_PROXYAUTH = " << getCurlAuthTypeName(proxyAuthType) << endl);
378 if (!proxyUser.empty()) {
379 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERNAME, proxyUser.data());
380 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERNAME", error_buffer.data(), __FILE__,
382 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERNAME : " << proxyUser << endl);
384 if (!proxyPassword.empty()) {
385 res = curl_easy_setopt(ceh, CURLOPT_PROXYPASSWORD, proxyPassword.data());
386 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYPASSWORD", error_buffer.data(), __FILE__,
388 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYPASSWORD: " << proxyPassword << endl);
390 }
else if (!proxyUserPW.empty()) {
391 res = curl_easy_setopt(ceh, CURLOPT_PROXYUSERPWD, proxyUserPW.data());
392 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_PROXYUSERPWD", error_buffer.data(), __FILE__,
394 BESDEBUG(MODULE, prolog <<
"CURLOPT_PROXYUSERPWD : " << proxyUserPW << endl);
396 unset_error_buffer(ceh);
399 BESDEBUG(MODULE, prolog <<
"END. using_proxy: " << (using_proxy ?
"true" :
"false") << endl);
404static CURL *init(CURL *ceh,
const string &target_url,
const curl_slist *http_request_headers,
405 vector <string> *http_response_hdrs) {
406 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
410 throw BESInternalError(
"Could not initialize cURL easy handle.", __FILE__, __LINE__);
413 set_error_buffer(ceh, error_buffer.data());
416 res = curl_easy_setopt(ceh, CURLOPT_URL, target_url.c_str());
417 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_URL", error_buffer.data(), __FILE__, __LINE__);
426 if (http_request_headers) {
428 res = curl_easy_setopt(ceh, CURLOPT_HTTPHEADER, http_request_headers);
429 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HTTPHEADER", error_buffer.data(), __FILE__, __LINE__);
433 if (http_response_hdrs) {
434 res = curl_easy_setopt(ceh, CURLOPT_HEADERFUNCTION, save_http_response_headers);
435 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HEADERFUNCTION", error_buffer.data(), __FILE__, __LINE__);
440 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, http_response_hdrs);
441 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEHEADER", error_buffer.data(), __FILE__, __LINE__);
445#ifndef CURLOPT_ACCEPT_ENCODING
446 res = curl_easy_setopt(ceh, CURLOPT_ENCODING,
"");
447 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_ENCODING", error_buffer.data(), __FILE__, __LINE__);
449 res = curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING,
"");
450 check_setopt_result(res, prolog,
"CURLOPT_ACCEPT_ENCODING", error_buffer, __FILE__,__LINE__);
453 res = curl_easy_setopt(ceh, CURLOPT_NOPROGRESS, 1L);
454 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NOPROGRESS", error_buffer.data(), __FILE__, __LINE__);
457 res = curl_easy_setopt(ceh, CURLOPT_NOSIGNAL, 1L);
458 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NOSIGNAL", error_buffer.data(), __FILE__, __LINE__);
467 res = curl_easy_setopt(ceh, CURLOPT_FAILONERROR, 0L);
468 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FAILONERROR", error_buffer.data(), __FILE__, __LINE__);
474 res = curl_easy_setopt(ceh, CURLOPT_HTTPAUTH, (
long) CURLAUTH_ANY);
475 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_HTTPAUTH", error_buffer.data(), __FILE__, __LINE__);
481 res = curl_easy_setopt(ceh, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
482 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NETRC", error_buffer.data(), __FILE__, __LINE__);
485 string netrc_file = get_netrc_filename();
486 if (!netrc_file.empty()) {
487 res = curl_easy_setopt(ceh, CURLOPT_NETRC_FILE, netrc_file.c_str());
488 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_NETRC_FILE", error_buffer.data(), __FILE__, __LINE__);
491 VERBOSE(prolog +
" is using the netrc file '"
492 + (!netrc_file.empty() ? netrc_file :
"~/.netrc") +
"'");
498 res = curl_easy_setopt(ceh, CURLOPT_COOKIEFILE, curl::get_cookie_filename().c_str());
499 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_COOKIEFILE", error_buffer.data(), __FILE__, __LINE__);
501 res = curl_easy_setopt(ceh, CURLOPT_COOKIEJAR, curl::get_cookie_filename().c_str());
502 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_COOKIEJAR", error_buffer.data(), __FILE__, __LINE__);
507 res = curl_easy_setopt(ceh, CURLOPT_FOLLOWLOCATION, 1L);
508 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FOLLOWLOCATION", error_buffer.data(), __FILE__, __LINE__);
510 res = curl_easy_setopt(ceh, CURLOPT_MAXREDIRS, max_redirects());
511 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_MAXREDIRS", error_buffer.data(), __FILE__, __LINE__);
514 res = curl_easy_setopt(ceh, CURLOPT_USERAGENT, hyrax_user_agent().c_str());
515 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_USERAGENT", error_buffer.data(), __FILE__, __LINE__);
518 BESDEBUG(MODULE, prolog <<
"Curl version: " << curl_version() << endl);
519 res = curl_easy_setopt(ceh, CURLOPT_VERBOSE, 1L);
520 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_VERBOSE", error_buffer.data(), __FILE__, __LINE__);
521 BESDEBUG(MODULE, prolog <<
"Curl in verbose mode." << endl);
523 res = curl_easy_setopt(ceh, CURLOPT_DEBUGFUNCTION, curl_debug);
524 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_DEBUGFUNCTION", error_buffer.data(), __FILE__, __LINE__);
525 BESDEBUG(MODULE, prolog <<
"Curl debugging function installed." << endl);
529 unset_error_buffer(ceh);
531 curl::configure_curl_handle_for_proxy(ceh, target_url);
533 BESDEBUG(MODULE, prolog <<
"curl: " << (
void *) ceh << endl);
549CURL *init(
const string &target_url,
550 const curl_slist *http_request_headers,
551 vector <string> *http_response_hdrs) {
552 CURL *swanky_new_curl_easy_handle = curl_easy_init();
553 return init(swanky_new_curl_easy_handle, target_url, http_request_headers, http_response_hdrs);
557string get_range_arg_string(
const unsigned long long &offset,
const unsigned long long &size) {
559 range << offset <<
"-" << offset + size - 1;
560 BESDEBUG(MODULE, prolog <<
" range: " << range.str() << endl);
575sign_url_for_s3_if_possible(
const string &url, curl_slist *request_headers) {
580 if (ac && ac->is_s3_cred()) {
581 BESDEBUG(MODULE, prolog <<
"Located S3 credentials for url: " << url
582 <<
" Using request headers to hold AWS signature\n");
583 request_headers = sign_s3_url(url, ac, request_headers);
587 BESDEBUG(MODULE, prolog <<
"Located credentials for url: " << url <<
"They are "
588 << (ac->is_s3_cred()?
"":
"NOT ") <<
"S3 credentials.\n");
591 BESDEBUG(MODULE, prolog <<
"Unable to locate credentials for url: " << url <<
"\n");
596 return request_headers;
610sign_url_for_s3_if_possible(
const shared_ptr <url> &url, curl_slist *request_headers) {
611 return sign_url_for_s3_if_possible(url->str(), request_headers);
623static string get_effective_url(CURL *ceh,
const string &requested_url) {
624 char *effective_url =
nullptr;
625 CURLcode curl_code = curl_easy_getinfo(ceh, CURLINFO_EFFECTIVE_URL, &effective_url);
626 if (curl_code != CURLE_OK) {
628 msg << prolog <<
"Unable to determine CURLINFO_EFFECTIVE_URL! Requested URL: " << requested_url;
629 BESDEBUG(MODULE, msg.str() << endl);
630 throw BESInternalError(msg.str(), __FILE__, __LINE__);
632 return effective_url;
647string filter_aws_url(
const string &eff_url) {
651 auto pos = eff_url.find(
'&');
652 string filtered_url = eff_url.substr(0, pos);
654 if (filtered_url.find(
"X-Amz-") == string::npos) {
657 pos = filtered_url.find(
'?');
658 return filtered_url.substr(0, pos);
676bool is_retryable(
const string &target_url) {
677 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
678 bool retryable =
true;
680 vector<string> nr_regexs;
684 for (
const auto &nr_regex: nr_regexs) {
685 BESDEBUG(MODULE, prolog <<
"nr_regex: '" << nr_regex <<
"'" << endl);
686 BESRegex no_retry_regex(nr_regex.c_str(), (
int) nr_regex.size());
687 size_t match_length = no_retry_regex.match(target_url.c_str(), (
int) target_url.size(), 0);
688 if (match_length == target_url.size()) {
689 BESDEBUG(MODULE, prolog <<
"The url: '" << target_url <<
"' fully matched the "
690 << HTTP_NO_RETRY_URL_REGEX_KEY <<
": '" << nr_regex <<
"'" << endl);
698 BESDEBUG(MODULE, prolog <<
"END retryable: " << (retryable ?
"true" :
"false") << endl);
724static bool eval_curl_easy_perform_code(
725 const string &eff_req_url,
727 const char *error_buffer,
728 const unsigned int attempt
730 if (curl_code == CURLE_SSL_CONNECT_ERROR) {
732 msg << prolog <<
"ERROR - cURL experienced a CURLE_SSL_CONNECT_ERROR error. Message: ";
733 msg << curl::error_message(curl_code, error_buffer) <<
". ";
734 msg <<
"A retry may be possible for: " << filter_aws_url(eff_req_url) <<
" (attempt: " << attempt <<
")."
736 BESDEBUG(MODULE, msg.str());
737 ERROR_LOG(msg.str());
739 }
else if (curl_code == CURLE_SSL_CACERT_BADFILE) {
741 msg << prolog <<
"ERROR - cURL experienced a CURLE_SSL_CACERT_BADFILE error. Message: ";
742 msg << curl::error_message(curl_code, error_buffer) <<
". ";
743 msg <<
"A retry may be possible for: " << filter_aws_url(eff_req_url) <<
" (attempt: " << attempt <<
")."
745 BESDEBUG(MODULE, msg.str());
746 ERROR_LOG(msg.str());
748 }
else if (curl_code == CURLE_GOT_NOTHING) {
753 msg << prolog <<
"ERROR - cURL returned CURLE_GOT_NOTHING. Message: ";
754 msg << error_message(curl_code, error_buffer) <<
". ";
755 msg <<
"A retry may be possible for: " << filter_aws_url(eff_req_url) <<
" (attempt: " << attempt <<
")."
757 BESDEBUG(MODULE, msg.str());
758 ERROR_LOG(msg.str());
760 }
else if (curl_code != CURLE_OK) {
762 msg <<
"ERROR - Problem with data transfer. Message: " << curl::error_message(curl_code, error_buffer);
763 msg <<
" CURLINFO_EFFECTIVE_URL: " << filter_aws_url(eff_req_url);
764 BESDEBUG(MODULE, prolog << msg.str() << endl);
765 ERROR_LOG(msg.str());
784process_http_code_helper(
const long http_code,
const string &requested_url,
const string &last_accessed_url) {
786 if (http_code >= 400) {
787 msg <<
"ERROR - The HTTP GET request for the source URL: " << requested_url <<
" FAILED. ";
788 msg <<
"CURLINFO_EFFECTIVE_URL: " << filter_aws_url(last_accessed_url) <<
" ";
789 BESDEBUG(MODULE, prolog << msg.str() << endl);
792 msg <<
"The response from " << last_accessed_url <<
" (Originally: " << requested_url <<
") ";
793 msg <<
"returned an HTTP code of " << http_code;
794 msg <<
" which means " << http_code_to_string(http_code) <<
" ";
808 throw http::HttpError(msg.str(),
823 if (!is_retryable(last_accessed_url)) {
824 msg <<
" The HTTP response code of this last accessed URL indicate that it should not be retried.";
825 ERROR_LOG(msg.str());
826 throw http::HttpError(msg.str(),
833 msg <<
" The HTTP response code of this last accessed URL indicate that it should be retried.";
834 BESDEBUG(MODULE, prolog << msg.str() << endl);
841 throw BESInternalError(msg.str(), __FILE__, __LINE__);
852long get_http_code(CURL *ceh) {
854 CURLcode curl_code = curl_easy_getinfo(ceh, CURLINFO_RESPONSE_CODE, &http_code);
855 if (curl_code != CURLE_OK) {
856 throw BESInternalError(prolog +
"Error acquiring HTTP response code.", __FILE__, __LINE__);
858 BESDEBUG(MODULE, prolog <<
"http_code: " << http_code <<
"\n");
900static bool eval_http_get_response(CURL *ceh,
const string &requested_url,
long &http_code) {
901 BESDEBUG(MODULE, prolog <<
"Requested URL: " << requested_url << endl);
903 http_code = get_http_code(ceh);
906 if (requested_url.find(FILE_PROTOCOL) == 0 && http_code == 0)
910 if (BESISDEBUG(MODULE)) {
913 curl_code = curl_easy_getinfo(ceh, CURLINFO_REDIRECT_COUNT, &redirects);
914 if (curl_code != CURLE_OK)
915 throw BESInternalError(
"Error acquiring CURLINFO_REDIRECT_COUNT.", __FILE__, __LINE__);
916 BESDEBUG(MODULE, prolog <<
"CURLINFO_REDIRECT_COUNT: " << redirects << endl);
918 char *redirect_url =
nullptr;
919 curl_code = curl_easy_getinfo(ceh, CURLINFO_REDIRECT_URL, &redirect_url);
920 if (curl_code != CURLE_OK)
921 throw BESInternalError(
"Error acquiring CURLINFO_REDIRECT_URL.", __FILE__, __LINE__);
924 BESDEBUG(MODULE, prolog <<
"CURLINFO_REDIRECT_URL: " << redirect_url << endl);
938 string last_accessed_url = get_effective_url(ceh, requested_url);
939 BESDEBUG(MODULE, prolog <<
"Last Accessed URL(CURLINFO_EFFECTIVE_URL): "
940 << filter_aws_url(last_accessed_url) << endl);
946 process_http_code_helper(http_code, requested_url, last_accessed_url);
955static void truncate_file(
int fd) {
956 auto status = ftruncate(fd, 0);
958 throw BESInternalError(
string(
"Could not truncate the file before retrying request (") + strerror(errno) +
").",
963 status = lseek(fd, 0, SEEK_SET);
965 throw BESInternalError(
string(
"Could not seek within the response file (") + strerror(errno) +
").",
992void super_easy_perform(CURL *c_handle) {
994 super_easy_perform(c_handle, fd);
997static void super_easy_perform(CURL *c_handle,
int fd) {
998 BESDEBUG(MODULE, prolog <<
"BEGIN\n");
1000 useconds_t retry_time = url_retry_time;
1001 bool curl_success{
false};
1002 bool http_success{
false};
1004 unsigned int attempts{0};
1006 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
1007 set_error_buffer(c_handle, error_buffer.data());
1009 string target_url = get_effective_url(c_handle,
"");
1011 if (target_url.empty())
1012 throw BESInternalError(
"URL acquisition failed.", __FILE__, __LINE__);
1015 while (!curl_success || !http_success) {
1018 prolog <<
"Requesting URL: " << filter_aws_url(target_url) <<
" attempt: " << attempts << endl);
1020 CURLcode curl_code = curl_easy_perform(c_handle);
1021 curl_success = eval_curl_easy_perform_code(target_url, curl_code, error_buffer.data(), attempts);
1022 BESDEBUG(MODULE, prolog <<
"curl_success: " << (curl_success ?
"true" :
"false") << endl);
1025 http_success = eval_http_get_response(c_handle, target_url, http_code);
1026 BESDEBUG(MODULE, prolog <<
"http_success: " << (http_success ?
"true" :
"false") << endl);
1031 if (!curl_success || !http_success) {
1032 string effective_url;
1034 effective_url = filter_aws_url(get_effective_url(c_handle, target_url));
1036 catch (BESInternalError &bie) {
1037 effective_url =
"Unable_To_Determine_CURLINFO_EFFECTIVE_URL: " + bie.
get_message();
1039 if (attempts == retry_limit) {
1041 msg << prolog <<
"ERROR - Made " << retry_limit <<
" failed attempts to retrieve the URL ";
1042 msg << filter_aws_url(target_url) <<
" The retry limit has been exceeded. Giving up! ";
1043 msg <<
"CURLINFO_EFFECTIVE_URL: " << effective_url <<
" ";
1044 msg <<
"Returned HTTP_STATUS: " << http_code;
1045 throw HttpError(msg.str(),
1050 __FILE__, __LINE__);
1052 INFO_LOG(prolog +
"Problem with data transfer. Will retry (url: "
1053 + filter_aws_url(target_url) +
" attempt: " + std::to_string(attempts) +
"). "
1054 +
"CURLINFO_EFFECTIVE_URL: " + effective_url +
" "
1055 +
"Returned HTTP_STATUS: " + std::to_string(http_code));
1066 unset_error_buffer(c_handle);
1068 BESDEBUG(MODULE, prolog <<
"cURL operations completed. fd: " << fd <<
"\n");
1072 BESDEBUG(MODULE, prolog <<
"Rewinding fd(" << fd <<
")\n");
1073 auto status = lseek(fd, 0, SEEK_SET);
1075 throw BESInternalError(
"Could not seek within the response file.", __FILE__, __LINE__);
1077 BESDEBUG(MODULE, prolog <<
"END\n");
1098void http_get_and_write_resource(
const std::shared_ptr<http::url> &target_url,
int fd,
1099 vector <string> *http_response_headers) {
1101 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
1103 CURL *ceh =
nullptr;
1104 curl_slist *req_headers =
nullptr;
1106 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
1108 if (!http::AllowedHosts::theHosts()->is_allowed(target_url)) {
1109 string err = (string)
"The specified URL " + target_url->str()
1110 +
" does not match any of the accessible services in"
1111 +
" the allowed hosts list.";
1112 BESDEBUG(MODULE, prolog << err << endl);
1113 throw BESSyntaxUserError(err, __FILE__, __LINE__);
1118 req_headers = add_edl_auth_headers(req_headers);
1120 req_headers = sign_url_for_s3_if_possible(target_url->str(), req_headers);
1123 ceh = init(target_url->str(), req_headers, http_response_headers);
1125 set_error_buffer(ceh, error_buffer.data());
1127 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, writeToOpenFileDescriptor);
1128 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", error_buffer.data(), __FILE__, __LINE__);
1131 res = curl_easy_setopt(ceh, CURLOPT_FILE, &fd);
1132 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FILE", error_buffer.data(), __FILE__, __LINE__);
1135 unset_error_buffer(ceh);
1137 super_easy_perform(ceh, fd);
1140 BESDEBUG(MODULE, prolog <<
"Cleanup request headers. Calling curl_slist_free_all()." << endl);
1141 curl_slist_free_all(req_headers);
1144 curl_easy_cleanup(ceh);
1145 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." << endl);
1150 curl_slist_free_all(req_headers);
1152 curl_easy_cleanup(ceh);
1157 BESDEBUG(MODULE, prolog <<
"END" << endl);
1168string error_message(
const CURLcode response_code,
const char *error_buffer) {
1171 msg = string(
"cURL_error_buffer: ") + error_buffer +
", ";
1173 msg += string(
"cURL_message: ") + curl_easy_strerror(response_code) +
" (code: "
1174 + to_string(response_code) +
")\n";
1179static size_t string_write_data(
void *buffer,
size_t size,
size_t nmemb,
void *data) {
1180 auto str =
reinterpret_cast<string *
>(data);
1181 size_t nbytes = size * nmemb;
1182 size_t current_size = str->size();
1183 str->resize(current_size + nbytes);
1184 memcpy((
void *) (str->data() + current_size), buffer, nbytes);
1200void http_get(
const string &target_url,
string &buf) {
1201 BESDEBUG(MODULE, prolog <<
"BEGIN\n");
1203 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
1204 CURL *ceh =
nullptr;
1206 curl_slist *request_headers =
nullptr;
1210 request_headers = add_edl_auth_headers(request_headers);
1212 request_headers = sign_url_for_s3_if_possible(target_url, request_headers);
1217 INFO_LOG(prolog +
"Looking for EDL Token for URL: " + target_url );
1218 string edl_token = credentials->
get(
"edl_token");
1219 if (!edl_token.empty()) {
1220 INFO_LOG(prolog +
"Using EDL Token for URL: " + target_url +
'\n');
1221 request_headers = curl::append_http_header(request_headers,
"Authorization", edl_token);
1226 ceh = curl::init(target_url, request_headers,
nullptr);
1228 throw BESInternalError(
string(
"ERROR! Failed to acquire cURL Easy Handle! "), __FILE__, __LINE__);
1231 set_error_buffer(ceh, error_buffer.data());
1234 res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, string_write_data);
1235 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", error_buffer.data(), __FILE__, __LINE__);
1238 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA,
reinterpret_cast<void *
>(&buf));
1239 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEDATA", error_buffer.data(), __FILE__, __LINE__);
1242 unset_error_buffer(ceh);
1244 super_easy_perform(ceh);
1247 BESDEBUG(MODULE, prolog <<
"Cleanup request headers. Calling curl_slist_free_all()." << endl);
1248 curl_slist_free_all(request_headers);
1251 curl_easy_cleanup(ceh);
1252 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." << endl);
1255 buf.push_back(
'\0');
1258 curl_slist_free_all(request_headers);
1260 curl_easy_cleanup(ceh);
1264 BESDEBUG(MODULE, prolog <<
"END\n");
1268static string get_cookie_file_base() {
1274string get_cookie_filename() {
1275 string cookie_file_base = get_cookie_file_base();
1276 stringstream cf_with_pid;
1277 cf_with_pid << cookie_file_base <<
"-" << getpid();
1278 return cf_with_pid.str();
1291string get_netrc_filename() {
1303void set_error_buffer(CURL *ceh,
char *error_buffer) {
1305 res = curl_easy_setopt(ceh, CURLOPT_ERRORBUFFER, error_buffer);
1306 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_ERRORBUFFER", error_buffer, __FILE__, __LINE__);
1316string hyrax_user_agent() {
1320 if (!found || user_agent.empty()) {
1321 user_agent = HTTP_DEFAULT_USER_AGENT;
1323 BESDEBUG(MODULE, prolog <<
"User-Agent: " << user_agent << endl);
1344void eval_curl_easy_setopt_result(CURLcode curl_code,
const string &msg_base,
const string &opt_name,
1345 const char *ebuf,
const string &file,
unsigned int line) {
1346 if (curl_code != CURLE_OK) {
1348 msg << msg_base <<
"ERROR - cURL failed to set " << opt_name <<
" Message: "
1349 << curl::error_message(curl_code, ebuf);
1350 throw BESInternalError(msg.str(), file, line);
1355unsigned long max_redirects() {
1372curl_slist *append_http_header(curl_slist *slist,
const string &header_name,
const string &value) {
1374 string full_header = header_name;
1375 full_header.append(
": ").append(value);
1377 BESDEBUG(MODULE, prolog << full_header << endl);
1379 auto temp = curl_slist_append(slist, full_header.c_str());
1382 msg << prolog <<
"Encountered cURL Error setting the " << header_name <<
" header. full_header: "
1384 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1421curl_slist *add_edl_auth_headers(curl_slist *request_headers) {
1425 s = BESContextManager::TheManager()->
get_context(EDL_UID_KEY, found);
1426 if (found && !s.empty()) {
1427 request_headers = append_http_header(request_headers,
"User-Id", s);
1430 s = BESContextManager::TheManager()->
get_context(EDL_AUTH_TOKEN_KEY, found);
1431 if (found && !s.empty()) {
1432 request_headers = append_http_header(request_headers,
"Authorization", s);
1435 s = BESContextManager::TheManager()->
get_context(EDL_ECHO_TOKEN_KEY, found);
1436 if (found && !s.empty()) {
1437 request_headers = append_http_header(request_headers,
"Echo-Token", s);
1440 return request_headers;
1464sign_s3_url(
const string &target_url, AccessCredentials *ac, curl_slist *req_headers) {
1465 const time_t request_time = time(
nullptr);
1466 const auto url_obj = http::url(target_url);
1467 const string auth_header = compute_awsv4_signature(url_obj.path(), url_obj.query(), url_obj.host(),
1468 request_time, ac->
get(AccessCredentials::ID_KEY),
1469 ac->
get(AccessCredentials::KEY_KEY),
1470 ac->
get(AccessCredentials::REGION_KEY),
"s3");
1472 BESDEBUG(MODULE, prolog <<
"Authorization: " << auth_header <<
"\n");
1473 req_headers = append_http_header(req_headers,
"Authorization", auth_header);
1474 req_headers = append_http_header(req_headers,
"x-amz-content-sha256",
1475 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
1476 req_headers = append_http_header(req_headers,
"x-amz-date", AWSV4::ISO8601_date(request_time));
1477 INFO_LOG(prolog +
"Signed S3 request for " + target_url);
1491sign_s3_url(
const shared_ptr <url> &target_url, AccessCredentials *ac, curl_slist *req_headers) {
1492 return sign_s3_url(target_url->str(), ac, req_headers);
1504bool is_url_signed_for_s3(
const std::string &url) {
1505 return url.find(
"X-Amz-Algorithm=") != string::npos &&
1506 url.find(
"X-Amz-Credential=") != string::npos &&
1507 url.find(
"X-Amz-Signature=") != string::npos;
1519bool is_url_signed_for_s3(
const std::shared_ptr<http::url> &target_url) {
1520 return is_url_signed_for_s3(target_url->str());
1538static CURL *init_no_follow_redirects_handle(
const string &target_url,
const curl_slist *req_headers,
1539 vector <string> &resp_hdrs,
string &response_body) {
1541 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
1542 CURL *ceh = curl::init(target_url, req_headers, &resp_hdrs);
1544 set_error_buffer(ceh, error_buffer.data());
1547 CURLcode res = curl_easy_setopt(ceh, CURLOPT_WRITEFUNCTION, string_write_data);
1548 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEFUNCTION", error_buffer.data(), __FILE__, __LINE__);
1551 res = curl_easy_setopt(ceh, CURLOPT_WRITEDATA,
reinterpret_cast<void *
>(&response_body));
1552 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEDATA", error_buffer.data(), __FILE__, __LINE__);
1557 res = curl_easy_setopt(ceh, CURLOPT_WRITEHEADER, &resp_hdrs);
1558 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_WRITEHEADER", error_buffer.data(), __FILE__, __LINE__);
1561 res = curl_easy_setopt(ceh, CURLOPT_FOLLOWLOCATION, 0L);
1562 eval_curl_easy_setopt_result(res, prolog,
"CURLOPT_FOLLOWLOCATION", error_buffer.data(), __FILE__, __LINE__);
1564 unset_error_buffer(ceh);
1576void write_response_details(
const long http_code,
1577 const vector <string> &response_headers,
1578 const string &response_body,
1579 stringstream &msg) {
1580 msg <<
"# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --\n";
1581 msg <<
"HTTP Response Details\n";
1582 msg <<
"The remote service returned an HTTP code of: " << http_code <<
"\n";
1583 msg <<
"Response Headers -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --\n";
1584 for (
const auto &hdr: response_headers) {
1585 msg <<
" " << hdr <<
"\n";
1587 msg <<
"# BEGIN Response Body -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --\n";
1588 msg << response_body <<
"\n";
1589 msg <<
"# END Response Body -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --\n";
1603bool process_get_redirect_http_code(
const long http_code,
1604 const vector <string> &response_headers,
1605 const string &response_body,
1606 const string &redirect_url_str,
1607 const string &origin_url_str,
1608 const unsigned int attempt,
1609 const unsigned int max_attempts) {
1610 bool success =
false;
1611 switch (http_code) {
1619 http::url rdu(redirect_url_str);
1620 if (rdu.host().find(
"urs.earthdata.nasa.gov") != string::npos) {
1621 if (attempt >= max_attempts) {
1623 msg << prolog <<
"ERROR - I tried " << attempt <<
" times to access the url:\n";
1624 msg <<
" " << origin_url_str <<
"\n";
1625 msg <<
"It seems that the provided access credentials are either missing, invalid, or expired.\n";
1626 msg <<
"Here are the details from the most recent attempt:\n\n";
1627 write_response_details(http_code, response_headers, response_body, msg);
1628 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
1639 if (attempt >= max_attempts) {
1642 msg << prolog <<
"ERROR - I tried " << attempt <<
" times to access:\n";
1643 msg <<
" " << origin_url_str <<
"\n";
1644 msg <<
"I was expecting to receive an HTTP redirect code and location header in the response. \n";
1645 msg <<
"Unfortunately this did not happen.\n";
1646 msg <<
"Here are the details of the most recent transaction:\n\n";
1647 write_response_details(http_code, response_headers, response_body, msg);
1648 throw HttpError(msg.str(),
1655 __FILE__, __LINE__);
1672static bool gru_mk_attempt(
const shared_ptr <url> &origin_url,
1673 const unsigned int attempt,
1674 const unsigned int max_attempts,
1675 shared_ptr <EffectiveUrl> &redirect_url) {
1677 BESDEBUG(MODULE, prolog <<
" BEGIN This is attempt #" << attempt <<
" for " << origin_url->str() <<
"\n");
1678 bool http_success =
false;
1679 bool curl_success =
false;
1680 CURL *ceh =
nullptr;
1681 vector<char> error_buffer(CURL_ERROR_SIZE, (
char) 0);
1682 curl_slist *req_headers =
nullptr;
1684 vector<string> response_headers;
1685 string response_body;
1688 string redirect_url_str;
1691 req_headers = add_edl_auth_headers(req_headers);
1692 req_headers = sign_url_for_s3_if_possible(origin_url, req_headers);
1698 INFO_LOG(prolog +
"Looking for EDL Token for URL: " + origin_url->str() +
'\n');
1699 string edl_token = credentials->
get(
"edl_token");
1700 if (!edl_token.empty()) {
1701 INFO_LOG(prolog +
"Using EDL Token for URL: " + origin_url->str() +
'\n');
1702 req_headers = curl::append_http_header(req_headers,
"Authorization", edl_token);
1709 ceh = init_no_follow_redirects_handle(
1717 BES_STOPWATCH_START(MODULE,prolog +
"Retrieved HTTP response from origin_url: " + origin_url->str());
1720 curl_code = curl_easy_perform(ceh);
1724 curl_success = eval_curl_easy_perform_code(
1727 error_buffer.data(),
1731 http_code = get_http_code(ceh);
1732 char *url =
nullptr;
1733 curl_easy_getinfo(ceh, CURLINFO_REDIRECT_URL, &url);
1735 redirect_url_str = url;
1737 BESDEBUG(MODULE, prolog <<
"redirect_url_str: " << redirect_url_str <<
"\n");
1738 http_success = process_get_redirect_http_code(http_code,
1746 redirect_url = make_shared<http::EffectiveUrl>(redirect_url_str,
1748 origin_url->is_trusted());
1750 }
else if (attempt >= max_attempts) {
1753 msg << prolog <<
"ERROR - I tried " << attempt <<
" times to access:\n";
1754 msg <<
" " << origin_url <<
"\n";
1755 msg <<
"I was expecting to receive an HTTP redirect code and location header in the response. \n";
1756 msg <<
"Unfortunately this did not happen.\n";
1757 msg <<
"This failure appears to be a problem with cURL.\n";
1758 msg <<
"The cURL message associated with the most recent failure is:\n";
1759 msg <<
" " << error_message(curl_code, error_buffer.data()) <<
"\n";
1760 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1764 curl_slist_free_all(req_headers);
1767 curl_easy_cleanup(ceh);
1768 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." <<
"\n");
1774 curl_slist_free_all(req_headers);
1778 curl_easy_cleanup(ceh);
1779 BESDEBUG(MODULE, prolog <<
"Called curl_easy_cleanup()." <<
"\n");
1783 BESDEBUG(MODULE, prolog <<
"curl_success: " << (curl_success ?
"true" :
"false") <<
"\n");
1784 BESDEBUG(MODULE, prolog <<
"http_success: " << (http_success ?
"true" :
"false") <<
"\n");
1785 BESDEBUG(MODULE, prolog <<
" END success: " << ((curl_success && http_success) ?
"true" :
"false") <<
1786 " on attempt #" << attempt <<
" for " << origin_url->str() <<
"\n");
1788 return curl_success && http_success;
1800std::shared_ptr<http::EffectiveUrl> get_redirect_url(
const std::shared_ptr<http::url> &origin_url) {
1802 BESDEBUG(MODULE, prolog <<
"BEGIN" << endl);
1804 if (!http::AllowedHosts::theHosts()->is_allowed(origin_url)) {
1805 string err = (string)
"The specified URL " + origin_url->str()
1806 +
" does not match any of the accessible services in"
1807 +
" the allowed hosts list.";
1808 BESDEBUG(MODULE, prolog << err << endl);
1809 throw BESSyntaxUserError(err, __FILE__, __LINE__);
1812 std::shared_ptr<http::EffectiveUrl> redirect_url;
1814 unsigned int attempt = 0;
1815 bool success =
false;
1817 while (!success && (attempt < retry_limit)) {
1819 success = gru_mk_attempt(origin_url, attempt, retry_limit, redirect_url);
1824 if (attempt >= retry_limit) {
1826 msg << prolog <<
"ERROR: I tried " << attempt <<
" times to determine the redirect URL for the origin_url:\n";
1827 msg <<
" " << origin_url->str() <<
"\n";
1828 msg <<
"Oddly, I was unable to detect an error, but nonetheless I have made the maximum ";
1829 msg <<
"number of attempts and I must now give up...\n";
1830 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1833 BESDEBUG(MODULE, prolog <<
"END redirect_url: " << redirect_url->str() <<
"\n");
1834 return redirect_url;
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
std::string get_message() const
get the error message for this exception
static RequestServiceTimer * TheTimer()
Return a pointer to a singleton timer instance. If an instance does not exist it will create and init...
void throw_if_timeout_expired(const std::string &message, const std::string &file, const int line)
Checks the RequestServiceTimer to determine if the time spent servicing the request at this point has...
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
static std::string read_string_key(const std::string &key, const std::string &default_value)
Read a string-valued key from the bes.conf file.
virtual std::string get(const std::string &key)
static CredentialsManager * theCM()
Returns the singleton instance of the CredentialsManager.
AccessCredentials * get(const std::shared_ptr< http::url > &url)
utility class for the HTTP catalog module
size_t load_max_redirects_from_keys()