bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
CurlHandlePool.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2018 OPeNDAP, Inc.
6// Author: James Gallagher<jgallagher@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <string>
27#include <sstream>
28#include <mutex>
29
30#include <curl/curl.h>
31
32#include "CurlUtils.h"
33#include "HttpError.h"
34#include "BESForbiddenError.h"
35#include "AllowedHosts.h"
36
37#include "DmrppCommon.h"
38#include "CurlHandlePool.h"
39#include "Chunk.h"
40#include "CredentialsManager.h"
41
42#define CURL_VERBOSE 0 // Logs curl info to the bes.log
43
44// set to 1 to abuse the credentials mgr to get/use and EDL Token for certain URLs.
45// This is very expensive since it is run in code that is used when _each chunk_ is
46// transferred. jhrg 5/18/24
47// Turned this off in the master branch. jhrg 11/15/24
48#define POC_DMRpp_OWNERSHIP 0
49
50#define prolog std::string("CurlHandlePool::").append(__func__).append("() - ")
51
52using namespace dmrpp;
53using namespace http;
54using namespace std;
55
56std::recursive_mutex CurlHandlePool::d_share_mutex;
57
58std::recursive_mutex CurlHandlePool::d_cookie_mutex;
59std::recursive_mutex CurlHandlePool::d_dns_mutex;
60std::recursive_mutex CurlHandlePool::d_ssl_session_mutex;
61std::recursive_mutex CurlHandlePool::d_connect_mutex;
62std::recursive_mutex CurlHandlePool::d_mutex;
63
69#if 0
70static
71string dump(const char *text, unsigned char *ptr, size_t size)
72{
73 size_t i;
74 size_t c;
75 unsigned int width=0x10;
76
77 ostringstream oss;
78 oss << text << ", " << std::setw(10) << (long)size << std::setbase(16) << (long)size << endl;
79
80 for(i=0; i<size; i+= width) {
81 oss << std::setw(4) << (long)i;
82 // fprintf(stream, "%4.4lx: ", (long)i);
83
84 /* show hex to the left */
85 for(c = 0; c < width; c++) {
86 if(i+c < size) {
87 oss << std::setw(2) << ptr[i+c];
88 //fprintf(stream, "%02x ", ptr[i+c]);
89 }
90 else {
91 oss << " ";
92 // fputs(" ", stream);
93 }
94 }
95
96 /* show data on the right */
97 for(c = 0; (c < width) && (i+c < size); c++) {
98 char x = (ptr[i+c] >= 0x20 && ptr[i+c] < 0x80) ? ptr[i+c] : '.';
99 // fputc(x, stream);
100 oss << std::setw(1) << x;
101 }
102
103 // fputc('\n', stream); /* newline */
104 oss << endl;
105 }
106
107 return oss.str();
108}
109#endif
110
111#if CURL_VERBOSE
117static
118int curl_trace(CURL */*handle*/, curl_infotype type, char *data, size_t /*size*/, void */*userp*/)
119{
120 string text = "";
121 switch (type) {
122 // print info
123 case CURLINFO_TEXT:
124 case CURLINFO_HEADER_OUT:
125 case CURLINFO_HEADER_IN: {
126 text = data;
127 size_t pos;
128 while ((pos = text.find('\n')) != string::npos)
129 text = text.substr(0, pos);
130 break;
131 }
132
133 // Do not build up 'text' for the data transfers
134 case CURLINFO_DATA_OUT:
135 case CURLINFO_SSL_DATA_OUT:
136 case CURLINFO_DATA_IN:
137 case CURLINFO_SSL_DATA_IN:
138 default: /* in case a new one is introduced to shock us */
139 break;
140 }
141
142 switch (type) {
143 // print info
144 case CURLINFO_TEXT:
145 LOG("libcurl == Info: " << text << endl);
146 break;
147
148 case CURLINFO_HEADER_OUT:
149 LOG("libcurl == Send header: " << text << endl);
150 break;
151 case CURLINFO_HEADER_IN:
152 LOG("libcurl == Recv header: " << text << endl);
153 break;
154
155 // Only print these if we're desperate and the above code has been hacked to match
156 case CURLINFO_DATA_OUT:
157 case CURLINFO_SSL_DATA_OUT:
158 case CURLINFO_DATA_IN:
159 case CURLINFO_SSL_DATA_IN:
160 default:
161 break;
162 }
163
164 return 0;
165}
166#endif
167
169 d_handle = curl_easy_init();
170 if (!d_handle) throw BESInternalError("Could not allocate CURL handle", __FILE__, __LINE__);
171
172 curl::set_error_buffer(d_handle, d_errbuf.data());
173
174 CURLcode res = curl_easy_setopt(d_handle, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2);
175 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_SSLVERSION", d_errbuf.data(), __FILE__, __LINE__);
176
177#if CURL_VERBOSE
178 res = curl_easy_setopt(d_handle, CURLOPT_DEBUGFUNCTION, curl_trace);
179 curl::check_setopt_result(res, prolog, "CURLOPT_DEBUGFUNCTION", d_errbuf, __FILE__, __LINE__);
180 // Many tests fail with this option, but it's still useful to see how connections
181 // are treated. jhrg 10/2/18
182 res = curl_easy_setopt(d_handle, CURLOPT_VERBOSE, 1L);
183 curl::check_setopt_result(res, prolog, "CURLOPT_VERBOSE", d_errbuf, __FILE__, __LINE__);
184#endif
185
186 res = curl_easy_setopt(d_handle, CURLOPT_HEADERFUNCTION, chunk_header_callback);
187 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERFUNCTION", d_errbuf.data(), __FILE__, __LINE__);
188
189 // Pass all data to the 'write_data' function
190 res = curl_easy_setopt(d_handle, CURLOPT_WRITEFUNCTION, chunk_write_data);
191 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEFUNCTION", d_errbuf.data(), __FILE__, __LINE__);
192
193#ifdef CURLOPT_TCP_KEEPALIVE
194 /* enable TCP keep-alive for this transfer */
195 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPALIVE, 1L);
196 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPALIVE", d_errbuf, __FILE__, __LINE__);
197#endif
198
199#ifdef CURLOPT_TCP_KEEPIDLE
200 /* keep-alive idle time to 120 seconds */
201 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPIDLE, 120L);
202 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPIDLE", d_errbuf, __FILE__, __LINE__);
203#endif
204
205#ifdef CURLOPT_TCP_KEEPINTVL
206 /* interval time between keep-alive probes: 120 seconds */
207 res = curl_easy_setopt(d_handle, CURLOPT_TCP_KEEPINTVL, 120L)
208 curl::check_setopt_result(res, prolog, "CURLOPT_TCP_KEEPINTVL", d_errbuf, __FILE__, __LINE__);
209#endif
210}
211
212dmrpp_easy_handle::~dmrpp_easy_handle() {
213 if (d_handle) curl_easy_cleanup(d_handle);
214 if (d_request_headers) curl_slist_free_all(d_request_headers);
215}
216
230 // Treat HTTP/S requests specially; retry some kinds of failures.
231 if (d_url->protocol() == HTTPS_PROTOCOL || d_url->protocol() == HTTP_PROTOCOL) {
232 try {
233 // This code throws an exception if there is a problem. jhrg 11/16/23
234 curl::super_easy_perform(d_handle);
235 }
236 catch (http::HttpError &http_error) {
237 string err_msg = prolog + "Hyrax encountered a Service Chaining Error while attempting to acquire "
238 "granule data from a remote source.\n"
239 "This could be a problem with TEA (the AWS URL signing authority),\n"
240 "or with accessing data granule at its resident location (typically S3).\n"
241 + http_error.get_message();
242 http_error.set_message(err_msg);
243 throw;
244 }
245
246 } else {
247 CURLcode curl_code = curl_easy_perform(d_handle);
248 if (CURLE_OK != curl_code) {
249 string msg = prolog + "ERROR - Data transfer error: ";
250 throw BESInternalError(msg.append(curl::error_message(curl_code, d_errbuf.data())), __FILE__, __LINE__);
251 }
252 }
253
254 d_chunk->set_is_read(true);
255}
256
257void CurlHandlePool::initialize() {
258 d_cookies_filename = curl::get_cookie_filename();
259 d_hyrax_user_agent = curl::hyrax_user_agent();
260 d_max_redirects = curl::max_redirects();
261 d_netrc_file = curl::get_netrc_filename();
262
263 // For this modification, the code that managed the curl handle pool (the original
264 // code) processed data from 20 requests for 2 variables with contiguous storage in
265 // 222ms. Removing the 'handle reuse' of the pool saw that time change to 351ms.
266 // Using the sharing feature of curl (and the blunt force trauma lock functions)
267 // and that time becomes 227ms. So our self-managed and the libcurl scheme are
268 // effectively equal, with the latter having some room for better performance if
269 // the lock functions are improved. jhrg 10/6/23
270
271 // See https://curl.se/libcurl/c/curl_share_init.html
272 d_share = curl_share_init();
273
274 // See https://curl.se/libcurl/c/curl_share_setopt.html
275 curl_share_setopt(d_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE);
276 curl_share_setopt(d_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
277 curl_share_setopt(d_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT);
278 curl_share_setopt(d_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_SSL_SESSION);
279
280 curl_share_setopt(d_share, CURLSHOPT_LOCKFUNC, lock_cb);
281 curl_share_setopt(d_share, CURLSHOPT_UNLOCKFUNC, unlock_cb);
282}
283
284CurlHandlePool::~CurlHandlePool() {
285 // See https://curl.se/libcurl/c/curl_share_cleanup.html
286 curl_share_cleanup(d_share);
287}
288
305 // Here we check to make sure that we are only going to
306 // access an approved location with this easy_handle
307 // TODO I don't think this belongs here. jhrg 5/13/22
308 string reason = "The requested resource does not match any of the AllowedHost rules.";
309 if (!http::AllowedHosts::theHosts()->is_allowed(chunk->get_data_url(), reason)) {
310 stringstream ss;
311 ss << "ERROR! The chunk url " << chunk->get_data_url()->str() << " was rejected because: " << reason;
312 throw BESForbiddenError(ss.str(), __FILE__, __LINE__);
313 }
314
315 auto handle = make_unique<dmrpp_easy_handle>();
316
317 if (handle) {
318 // Once here, d_easy_handle holds a CURL* we can use.
319 handle->d_in_use = true;
320 handle->d_url = chunk->get_data_url();
321
322 handle->d_chunk = chunk;
323
324 CURLcode res = curl_easy_setopt(handle->d_handle, CURLOPT_URL, chunk->get_data_url()->str().c_str());
325 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_URL", handle->d_errbuf.data(), __FILE__, __LINE__);
326
327 res = curl_easy_setopt(handle->d_handle, CURLOPT_SHARE, d_share);
328 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_SHARE", handle->d_errbuf.data(), __FILE__, __LINE__);
329
330 // get the offset to offset + size bytes
331 res = curl_easy_setopt(handle->d_handle, CURLOPT_RANGE, chunk->get_curl_range_arg_string().c_str());
332 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_RANGE", handle->d_errbuf.data(), __FILE__, __LINE__);
333
334 // Pass this to chunk_header_callback as the fourth argument
335 res = curl_easy_setopt(handle->d_handle, CURLOPT_HEADERDATA, reinterpret_cast<void *>(chunk));
336 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HEADERDATA", handle->d_errbuf.data(), __FILE__, __LINE__);
337
338 // Pass this to chunk_write_data as the fourth argument
339 res = curl_easy_setopt(handle->d_handle, CURLOPT_WRITEDATA, reinterpret_cast<void *>(chunk));
340 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_WRITEDATA", handle->d_errbuf.data(), __FILE__, __LINE__);
341
342 // store the easy_handle so that we can call release_handle in multi_handle::read_data()
343 res = curl_easy_setopt(handle->d_handle, CURLOPT_PRIVATE, reinterpret_cast<void *>(handle.get()));
344 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_PRIVATE", handle->d_errbuf.data(), __FILE__, __LINE__);
345
346 // Enabled cookies
347 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEFILE, d_cookies_filename.c_str());
348 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEFILE", handle->d_errbuf.data(), __FILE__, __LINE__);
349
350 res = curl_easy_setopt(handle->d_handle, CURLOPT_COOKIEJAR, d_cookies_filename.c_str());
351 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_COOKIEJAR", handle->d_errbuf.data(), __FILE__, __LINE__);
352
353 // Follow 302 (redirect) responses
354 res = curl_easy_setopt(handle->d_handle, CURLOPT_FOLLOWLOCATION, 1);
355 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_FOLLOWLOCATION", handle->d_errbuf.data(), __FILE__, __LINE__);
356
357 res = curl_easy_setopt(handle->d_handle, CURLOPT_MAXREDIRS, d_max_redirects);
358 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_MAXREDIRS", handle->d_errbuf.data(), __FILE__, __LINE__);
359
360 // Set the user agent something otherwise TEA will never redirect to URS.
361 res = curl_easy_setopt(handle->d_handle, CURLOPT_USERAGENT, d_hyrax_user_agent.c_str());
362 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_USERAGENT", handle->d_errbuf.data(), __FILE__, __LINE__);
363
364 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
365 // choosing the the 'safest' one supported by the server.
366 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
367 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPAUTH, (long) CURLAUTH_ANY);
368 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPAUTH", handle->d_errbuf.data(), __FILE__, __LINE__);
369
370 // Enable using the .netrc credentials file.
371 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC, CURL_NETRC_OPTIONAL);
372 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC", handle->d_errbuf.data(), __FILE__, __LINE__);
373
374 // If the configuration specifies a particular .netrc credentials file, use it.
375 if (!d_netrc_file.empty()) {
376 res = curl_easy_setopt(handle->d_handle, CURLOPT_NETRC_FILE, d_netrc_file.c_str());
377 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_NETRC_FILE", handle->d_errbuf.data(), __FILE__, __LINE__);
378 }
379
380 // If the URL is not signed for S3, then we need to look for credentials
381 // in the credentials manager.
382 if (!curl::is_url_signed_for_s3(handle->d_url->str())) {
383 AccessCredentials *credentials = CredentialsManager::theCM()->get(handle->d_url);
384 if (credentials && credentials->is_s3_cred()) {
385 handle->d_request_headers = curl::sign_s3_url(handle->d_url, credentials, handle->d_request_headers);
386 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_request_headers);
387 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", handle->d_errbuf.data(), __FILE__, __LINE__);
388 }
389 }
390#if 0
391 AccessCredentials *credentials = CredentialsManager::theCM()->get(handle->d_url);
392 INFO_LOG(prolog << "Looked for credentials for: " << handle->d_url->str() << '\n');
393 // TODO Replace with: curl_slist *sign_s3_url(const shared_ptr <url> &target_url, AccessCredentials *ac, curl_slist *req_headers)
394 // jhrg 11/22/24
395 if (credentials && credentials->is_s3_cred()) {
396 BESDEBUG(DMRPP_CURL, prolog << "Got AccessCredentials instance:\n" << credentials->to_json() << '\n');
397 // If there are available credentials, and they are S3 credentials then we need to sign the request
398 const std::time_t request_time = std::time(0);
399
400 const std::string auth_header =
401 AWSV4::compute_awsv4_signature(
402 handle->d_url,
403 request_time,
404 credentials->get(AccessCredentials::ID_KEY),
405 credentials->get(AccessCredentials::KEY_KEY),
406 credentials->get(AccessCredentials::REGION_KEY),
407 "s3");
408
409
410 handle->d_request_headers = curl::append_http_header((curl_slist *) nullptr, "Authorization", auth_header);
411 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-content-sha256",
412 "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
413 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "x-amz-date",
414 AWSV4::ISO8601_date(request_time));
415 // TODO here. jhrg 11/2/22
416 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_request_headers);
417 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", handle->d_errbuf.data(), __FILE__, __LINE__);
418 }
419#endif
420#if POC_DMRpp_OWNERSHIP
421 // FIXME DO NOT MERGE THIS. For POC work on DMR++ Ownership.
422 // TRY abuse the credentials mgr to get/use and EDL Token for certain URLs. jhrg 5/18/24
423 else if (credentials) {
424 INFO_LOG(prolog << "Looking for EDL Token for URL: " << handle->d_url->str() << '\n');
425 string edl_token = credentials->get("edl_token");
426 if (!edl_token.empty()) {
427 INFO_LOG(prolog << "Using EDL Token for URL: " << handle->d_url->str() << '\n');
428 handle->d_request_headers = curl::append_http_header(handle->d_request_headers, "Authorization", edl_token);
429 res = curl_easy_setopt(handle->d_handle, CURLOPT_HTTPHEADER, handle->d_request_headers);
430 curl::eval_curl_easy_setopt_result(res, prolog, "CURLOPT_HTTPHEADER", handle->d_errbuf.data(), __FILE__, __LINE__);
431 }
432 }
433#endif
434 }
435
436 return handle.release();
437}
438
446 delete handle;
447}
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
void set_message(const std::string &msg)
set the error message for this exception
Definition BESError.h:108
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition Chunk.cc:496
virtual std::shared_ptr< http::url > get_data_url() const
Get the data URL for this chunk.
Definition Chunk.cc:1357
static void release_handle(dmrpp_easy_handle *h)
dmrpp_easy_handle * get_easy_handle(Chunk *chunk)
Bundle a libcurl easy handle with other information.
void read_data()
This is the read_data() method for all transfers.
dmrpp_easy_handle()
Build a string with hex info about stuff libcurl gets.
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
virtual std::string get(const std::string &key)
static CredentialsManager * theCM()
Returns the singleton instance of the CredentialsManager.
AccessCredentials * get(const std::shared_ptr< http::url > &url)
utility class for the HTTP catalog module
Definition TheBESKeys.h:51