libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
Go to the documentation of this file.
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28#include <algorithm>
29#include <cerrno>
30#include <cstdlib>
31#include <cstring>
32#include <iterator>
33#include <sstream>
34#include <string>
35#include <vector>
36
37#ifdef HAVE_UNISTD_H
38#include <unistd.h>
39#endif
40
41#include <sys/stat.h>
42
43#include <curl/curl.h>
44// No longer used in CURL - pwest April 09, 2012
45// #include <curl/types.h>
46#include <curl/easy.h>
47
48#include "GNURegex.h"
49#include "HTTPCache.h"
50#include "HTTPCacheResponse.h"
51#include "HTTPConnect.h"
52#include "HTTPResponse.h"
53#include "RCReader.h"
54#include "debug.h"
55#include "mime_util.h"
56
57using namespace std;
58
59namespace libdap {
60
61#ifndef NDEBUG
62// If this is a developer build (so NDEBUG is not defined) then if the HTTPConnect
63// field d_verbose_runtime is true, then the VERBOSE_RUNTIME macro will print stuff.
64// This will only work inside HTTPConnect methods with code that has access to the
65// private field d_verbose_runtime. 02/22/23 jhrg
66#define VERBOSE_RUNTIME(x) \
67 do { \
68 if (d_verbose_runtime) \
69 (x); \
70 } while (false)
71#else
72#define VERBOSE_RUNTIME(x) /* x */
73#endif
74
75// These global variables are not MT-Safe, but I'm leaving them as is because
76// they are used only for debugging (set them in a debugger like gdb or ddd).
77// They are not static because I think that many debuggers cannot access
78// static variables. 08/07/02 jhrg
79
80// Set this to 1 to turn on libcurl's verbose mode (for debugging).
81int www_trace = 0;
82
83// Set this to 1 to turn on libcurl's VERY verbose mode.
85
86// Keep the temporary files; useful for debugging.
88
89#define CLIENT_ERR_MIN 400
90#define CLIENT_ERR_MAX 417
91static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN + 1] = {
92 "Bad Request:",
93 "Unauthorized: Contact the server administrator.",
94 "Payment Required.",
95 "Forbidden: Contact the server administrator.",
96 "Not Found: The data source or server could not be found.\n\
97 Often this means that the OPeNDAP server is missing or needs attention.\n\
98 Please contact the server administrator.",
99 "Method Not Allowed.",
100 "Not Acceptable.",
101 "Proxy Authentication Required.",
102 "Request Time-out.",
103 "Conflict.",
104 "Gone:.",
105 "Length Required.",
106 "Precondition Failed.",
107 "Request Entity Too Large.",
108 "Request URI Too Large.",
109 "Unsupported Media Type.",
110 "Requested Range Not Satisfiable.",
111 "Expectation Failed."};
112
113#define SERVER_ERR_MIN 500
114#define SERVER_ERR_MAX 505
115static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = {
116 "Internal Server Error.", "Not Implemented.", "Bad Gateway.",
117 "Service Unavailable.", "Gateway Time-out.", "HTTP Version Not Supported."};
118
121static string http_status_to_string(int status) {
122 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
123 return {http_client_errors[status - CLIENT_ERR_MIN]};
124 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
125 return {http_server_errors[status - SERVER_ERR_MIN]};
126 else
127 return {"Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."};
128}
129
130static ObjectType determine_object_type(const string &header_value) {
131 string::size_type plus = header_value.find('+');
132 string base_type;
133 string type_extension;
134 if (plus != string::npos) {
135 base_type = header_value.substr(0, plus);
136 type_extension = header_value.substr(plus + 1);
137 } else
138 base_type = header_value;
139
140 if (base_type == DMR_Content_Type ||
141 (base_type.find("application/") != string::npos && base_type.find("dap4.dataset-metadata") != string::npos)) {
142 if (type_extension == "xml")
143 return dap4_dmr;
144 else
145 return unknown_type;
146 } else if (base_type == DAP4_DATA_Content_Type ||
147 (base_type.find("application/") != string::npos && base_type.find("dap4.data") != string::npos)) {
148 return dap4_data;
149 } else if (header_value.find("text/html") != string::npos) {
150 return web_error;
151 } else
152 return unknown_type;
153}
154
158
159class ParseHeader : public unary_function<const string &, void> {
160 ObjectType type = unknown_type; // What type of object is in the stream?
161 string server = "dods/0.0"; // Server's version string.
162 string protocol = "2.0"; // Server's protocol version.
163 string location; // Url returned by server
164
165public:
166 ParseHeader() = default;
167
168 void operator()(const string &line) {
169 string name, value;
170 parse_mime_header(line, name, value);
171
172 DBG2(cerr << name << ": " << value << endl);
173
174 // Content-Type is used to determine the content of DAP4 responses, but allow the
175 // Content-Description header to override CT o preserve operation with DAP2 servers.
176 // jhrg 11/12/13
177 if (type == unknown_type && name == "content-type") {
178 type = determine_object_type(value); // see above
179 }
180 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
181 type = get_description_type(value); // defined in mime_util.cc
182 }
183 // The second test (== "dods/0.0") tests if xopendap-server has already
184 // been seen. If so, use that header in preference to the old
185 // XDODS-Server header. jhrg 2/7/06
186 else if (name == "xdods-server" && server == "dods/0.0") {
187 server = value;
188 } else if (name == "xopendap-server") {
189 server = value;
190 } else if (name == "xdap") {
191 protocol = value;
192 } else if (server == "dods/0.0" && name == "server") {
193 server = value;
194 } else if (name == "location") {
195 location = value;
196 }
197 }
198
199 ObjectType get_object_type() const { return type; }
200
201 string get_server() const { return server; }
202
203 string get_protocol() const { return protocol; }
204
205 string get_location() const { return location; }
206};
207
222
223static size_t save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) {
224 DBG2(cerr << "Inside the header parser." << endl);
225 auto hdrs = static_cast<vector<string> *>(resp_hdrs);
226
227 // Grab the header, minus the trailing newline. Or \r\n pair.
228 string complete_line;
229 if (nmemb > 1 && *(static_cast<char *>(ptr) + size * (nmemb - 2)) == '\r')
230 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
231 else
232 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
233
234 // Store all non-empty headers that are not HTTP status codes
235 if (!complete_line.empty() && complete_line.find("HTTP") == string::npos) {
236 DBG(cerr << "Header line: " << complete_line << endl);
237 hdrs->push_back(complete_line);
238 }
239
240 return size * nmemb;
241}
242
244static int curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) {
245 string message(msg, size);
246
247 switch (info) {
248 case CURLINFO_TEXT:
249 cerr << "Text: " << message;
250 break;
251 case CURLINFO_HEADER_IN:
252 cerr << "Header in: " << message;
253 break;
254 case CURLINFO_HEADER_OUT:
255 cerr << "Header out: " << message;
256 break;
257 case CURLINFO_DATA_IN:
259 cerr << "Data in: " << message;
260 break;
261 case CURLINFO_DATA_OUT:
263 cerr << "Data out: " << message;
264 break;
265 case CURLINFO_END:
266 cerr << "End: " << message;
267 break;
268#ifdef CURLINFO_SSL_DATA_IN
269 case CURLINFO_SSL_DATA_IN:
270 cerr << "SSL Data in: " << message;
271 break;
272#endif
273#ifdef CURLINFO_SSL_DATA_OUT
274 case CURLINFO_SSL_DATA_OUT:
275 cerr << "SSL Data out: " << message;
276 break;
277#endif
278 default:
280 cerr << "Curl info: " << message;
281 break;
282 }
283 return 0;
284}
285
288
289void HTTPConnect::www_lib_init() {
290 curl_global_init(CURL_GLOBAL_DEFAULT);
291
292 d_curl = curl_easy_init();
293 if (!d_curl)
294 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
295
296 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
297
298 curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
299
300 // Now set options that will remain constant for the duration of this
301 // CURL object.
302
303 // Set the proxy host.
304 if (!d_rcr->get_proxy_server_host().empty()) {
305 DBG(cerr << "Setting up a proxy server." << endl);
306 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() << endl);
307 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() << endl);
308 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() << endl);
309 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
310 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, d_rcr->get_proxy_server_port());
311
312 // As of 4/21/08 only NTLM, Digest and Basic work.
313#ifdef CURLOPT_PROXYAUTH
314 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
315#endif
316
317 // Password might not be required. 06/21/04 jhrg
318 if (!d_rcr->get_proxy_server_userpw().empty())
319 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, d_rcr->get_proxy_server_userpw().c_str());
320 }
321
322 // We have to set FailOnError to false for any of the non-Basic
323 // authentication schemes to work. 07/28/03 jhrg
324 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
325
326 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
327 // choosing the 'safest' one supported by the server.
328 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
329 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
330
331 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
332 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
333 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
334 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
335 // param of save_raw_http_headers to a vector<string> object.
336
337 // Follow 302 (redirect) responses
338 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
339 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
340
341 // If the user turns off SSL validation...
342 if (d_rcr->get_validate_ssl() == 0) {
343 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
344 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
345 }
346
347 // Set libcurl to use netrc to access data behind URS auth.
348 // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
349 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
350
351 // Look to see if cookies are turned on in the .dodsrc file. If so,
352 // activate here. We honor 'session cookies' (cookies without an
353 // expiration date) here so that session-based SSO systems will work as
354 // expected.
355 if (!d_cookie_jar.empty()) {
356 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
357 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
358 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
359 }
360
361 if (www_trace) {
362 cerr << "Curl version: " << curl_version() << endl;
363 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
364 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
365 }
366}
367
370
371class BuildHeaders : public unary_function<const string &, void> {
372 struct curl_slist *d_cl = nullptr;
373
374public:
375 BuildHeaders() = default;
376
377 void operator()(const string &header) {
378 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." << endl);
379 d_cl = curl_slist_append(d_cl, header.c_str());
380 }
381
382 struct curl_slist *get_headers() { return d_cl; }
383};
384
398
399long HTTPConnect::read_url(const string &url, FILE *stream, vector<string> &resp_hdrs) {
400 return read_url(url, stream, resp_hdrs, vector<string>());
401}
402
416
417long HTTPConnect::read_url(const string &url, FILE *stream, vector<string> &resp_hdrs, const vector<string> &headers) {
418 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
419
420 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
421
422 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
423
424 BuildHeaders req_hdrs;
425 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), req_hdrs);
426 req_hdrs = for_each(headers.begin(), headers.end(), req_hdrs);
427
428 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
429
430 // Turn off the proxy for this URL?
431 if (url_uses_no_proxy_for(url)) {
432 DBG(cerr << "Suppress proxy for url: " << url << endl);
433 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
434 }
435
436 string::size_type at_sign = url.find('@');
437 // Assume username:password present *and* assume it's an HTTP URL; it *is*
438 // HTTPConnect, after all. 7 is position after "http://"; the second arg
439 // to substr() is the sub string length.
440 if (at_sign != string::npos)
441 d_upstring = url.substr(7, at_sign - 7);
442
443 if (!d_upstring.empty())
444 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
445
446 // Pass save_raw_http_headers() a pointer to the vector<string> where the
447 // response headers may be stored. Callers can use the resp_hdrs
448 // value/result parameter to get the raw response header information .
449 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, &resp_hdrs);
450
451 // This is the call that causes curl to go and get the remote resource and "write it down"
452 // utilizing the configuration state that has been previously conditioned by various perturbations
453 // of calls to curl_easy_setopt().
454 CURLcode res = curl_easy_perform(d_curl);
455
456 // Free the header list and null the value in d_curl.
457 curl_slist_free_all(req_hdrs.get_headers());
458 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
459
460 // Reset the proxy?
461 if (url_uses_no_proxy_for(url) && !d_rcr->get_proxy_server_host().empty())
462 curl_easy_setopt(d_curl, CURLOPT_PROXY, d_rcr->get_proxy_server_host().c_str());
463
464 if (res != 0)
465 throw Error(d_error_buffer);
466
467 long status;
468 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
469 if (res != 0)
470 throw Error(d_error_buffer);
471
472 char *ct_ptr = 0;
473 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
474 if (res == CURLE_OK && ct_ptr)
475 d_content_type = ct_ptr;
476 else
477 d_content_type = "";
478
479 return status;
480}
481
484
485bool HTTPConnect::url_uses_proxy_for(const string &url) {
486 if (d_rcr->is_proxy_for_used()) {
487 // NB: This could be improved by moving the Regex instance into
488 // the RCReader class, but the proxy stuff is all deprecated.
489 // jhrg 12/1/21
490 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
491 int index = 0, matchlen;
492 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
493 }
494
495 return false;
496}
497
500
501bool HTTPConnect::url_uses_no_proxy_for(const string &url) noexcept {
502 return d_rcr->is_no_proxy_for_used() && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
503}
504
505// Public methods. Mostly...
506
515
517 : d_rcr(rcr), d_accept_deflate(rcr->get_deflate()), d_use_cpp_streams(use_cpp) {
518 // Load in the default headers to send with a request. The empty Pragma
519 // headers overrides libcurl's default Pragma: no-cache header (which
520 // will disable caching by Squid, et c.). The User-Agent header helps
521 // make server logs more readable. 05/05/03 jhrg
522 d_request_headers.emplace_back("Pragma:");
523 d_request_headers.emplace_back(string("User-Agent: ") + CNAME + "/" + CVER);
524 if (d_accept_deflate)
525 d_request_headers.emplace_back("Accept-Encoding: deflate, gzip, compress");
526
527 // HTTPCache::instance returns a valid ptr or nullptr.
528 if (d_rcr->get_use_cache())
529 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root());
530 else
531 d_http_cache = nullptr;
532
533 if (d_http_cache) {
534 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
535 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
536 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
537 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
538 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
539 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
540 }
541
542 d_cookie_jar = rcr->get_cookie_jar();
543
544 www_lib_init(); // This may throw either Error or InternalErr
545}
546
547HTTPConnect::~HTTPConnect() { curl_easy_cleanup(d_curl); }
548
550class HeaderMatch : public unary_function<const string &, bool> {
551 const string &d_header;
552
553public:
554 HeaderMatch(const string &header) : d_header(header) {}
555 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
556};
557
569
571 HTTPResponse *stream = nullptr;
572
573 if (is_cache_enabled()) {
574 stream = caching_fetch_url(url);
575 } else {
576 stream = plain_fetch_url(url);
577 }
578
579#ifdef HTTP_TRACE
580 stringstream ss;
581 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
582 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
583 ss << stream->get_headers()->at(i) << endl;
584 }
585 cout << ss.str();
586#endif
587
588 ParseHeader parser;
589
590 // An apparent quirk of libcurl is that it does not pass the Content-type
591 // header to the callback used to save them, but check and add it from the
592 // saved state variable only if it's not there (without this a test failed
593 // in HTTPCacheTest). jhrg 11/12/13
594 if (!d_content_type.empty() && find_if(stream->get_headers().begin(), stream->get_headers().end(),
595 HeaderMatch("Content-Type:")) == stream->get_headers().end())
596 stream->get_headers().emplace_back("Content-Type: " + d_content_type);
597
598 parser = for_each(stream->get_headers().begin(), stream->get_headers().end(), ParseHeader());
599
600 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
601 if (!parser.get_location().empty() &&
602 (url.substr(0, url.find('?')) == parser.get_location().substr(0, url.find('?')))) {
603 delete stream;
604 return fetch_url(parser.get_location());
605 }
606
607 stream->set_type(parser.get_object_type()); // uses the value of content-description
608
609 stream->set_version(parser.get_server());
610 stream->set_protocol(parser.get_protocol());
611
612 if (d_use_cpp_streams) {
613 stream->transform_to_cpp();
614 }
615
616 return stream;
617}
618
619// Look around for a reasonable place to put a temporary file. Check first
620// the value of the TMPDIR env var. If that does not yield a path that's
621// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir
622// (defined in stdio.h). If both come up empty, then use `./'.
623
624// Change this to a version that either returns a string or an open file
625// descriptor. Use information from https://buildsecurityin.us-cert.gov/
626// (see open()) to make it more secure. Ideal solution: get deserialize()
627// methods to read from a stream returned by libcurl, not from a temporary
628// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
629static string get_tempfile_template(const string &file_template) {
630 string c;
631
632 // Windows has one idea of the standard name(s) for a temporary files dir
633#ifdef WIN32
634 // white list for a WIN32 directory
635 Regex directory("[-a-zA-Z0-9_:\\]*");
636
637 // If we're OK to use getenv(), try it.
638#ifdef USE_GETENV
639 c = getenv("TEMP");
640 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
641 goto valid_temp_directory;
642
643 c = getenv("TMP");
644 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
645 goto valid_temp_directory;
646#endif // USE_GETENV
647
648 // The windows default
649 c = "c:\tmp";
650 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
651 goto valid_temp_directory;
652
653#else // Unix/Linux/OSX has another...
654 // white list for a directory
655 const Regex directory("[-a-zA-Z0-9_/]*");
656#ifdef USE_GETENV
657 c = getenv("TMPDIR");
658 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
659 goto valid_temp_directory;
660#endif // USE_GETENV
661
662 // Unix defines this sometimes - if present, use it.
663#ifdef P_tmpdir
664 if (access(P_tmpdir, W_OK | R_OK) == 0) {
665 c = P_tmpdir;
666 goto valid_temp_directory;
667 }
668#endif
669
670 // The Unix default
671 c = "/tmp";
672 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
673 goto valid_temp_directory;
674
675#endif // WIN32
676
677 // If we found nothing useful, use the current directory
678 c = ".";
679
680valid_temp_directory:
681
682#ifdef WIN32
683 c += "\\" + file_template;
684#else
685 c += "/" + file_template;
686#endif
687
688 return c;
689}
690
708
709static string get_temp_file(FILE *&stream) {
710 string dods_temp = get_tempfile_template((string) "dodsXXXXXX");
711
712 vector<char> pathname(dods_temp.length() + 1);
713
714 strncpy(pathname.data(), dods_temp.c_str(), dods_temp.length());
715
716 // Open truncated for update. NB: mkstemp() returns a file descriptor.
717 // Make sure that temp files are accessible only by the owner.
718 mode_t mask = umask(077);
719 int fd = mkstemp(pathname.data());
720 if (fd < 0)
721 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
722
723 stream = fdopen(fd, "w+");
724 umask(mask);
725
726 if (!stream)
727 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
728
729 return {pathname.data()};
730}
731
737static void close_temp(FILE *s, const string &name) {
738 int res = fclose(s);
739 if (res)
740 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
741
742 res = unlink(name.c_str());
743 if (res != 0)
744 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
745}
746
767
768HTTPResponse *HTTPConnect::caching_fetch_url(const string &url) {
769 // This lock enables caching for threads that run simultaneously. A recursive
770 // mutex is used because this private method can be called recursively by fetch_url().
771 static recursive_mutex m;
772 lock_guard<recursive_mutex> lock(m);
773
774 VERBOSE_RUNTIME(cerr << "Is this URL (" << url << ") in the cache?... ");
775
776 vector<string> headers;
777 string file_name;
778 FILE *s = d_http_cache->get_cached_response(url, headers, file_name);
779 if (!s) {
780 // url not in cache; get it and cache it
781 VERBOSE_RUNTIME(cerr << "no; getting response and caching." << endl);
782 time_t now = time(nullptr);
783 HTTPResponse *rs = plain_fetch_url(url);
784 d_http_cache->cache_response(url, now, rs->get_headers(), rs->get_stream());
785
786 return rs;
787 } else { // url in cache
788 VERBOSE_RUNTIME(cerr << "yes... ");
789
790 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
791 VERBOSE_RUNTIME(cerr << "and it's valid; using cached response." << endl);
792 d_cached_response = true; // False by default
793 auto crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
794 return crs;
795 } else { // url in cache but not valid; validate
796 VERBOSE_RUNTIME(cerr << "but it's not valid; validating... ");
797
798 d_http_cache->release_cached_response(s); // This closes 's'
799 headers.clear();
800 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
801 FILE *body = 0;
802 string dods_temp = get_temp_file(body);
803 time_t now = time(0); // When was the request made (now).
804 long http_status;
805
806 try {
807 http_status = read_url(url, body, /*resp_hdrs*/ headers, cond_hdrs);
808 rewind(body);
809 } catch (const Error &) {
810 close_temp(body, dods_temp);
811 throw;
812 }
813
814 switch (http_status) {
815 case 200: { // New headers and new body
816 VERBOSE_RUNTIME(cerr << "read a new response; caching." << endl);
817
818 d_http_cache->cache_response(url, now, /*resp_hdrs*/ headers, body);
819 auto rs = new HTTPResponse(body, http_status, /*resp_hdrs*/ headers, dods_temp);
820
821 return rs;
822 }
823
824 case 304: { // Just new headers, use cached body
825 VERBOSE_RUNTIME(cerr << "cached response valid; updating." << endl);
826
827 close_temp(body, dods_temp);
828 d_cached_response = true;
829 d_http_cache->update_response(url, now, /*resp_hdrs*/ headers);
830 string file_name;
831 FILE *hs = d_http_cache->get_cached_response(url, headers, file_name);
832 auto crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
833 return crs;
834 }
835
836 default: { // Oops.
837 close_temp(body, dods_temp);
838 if (http_status >= 400) {
839 string msg = "Error while reading the URL: ";
840 msg += url;
841 msg += ".\nThe OPeNDAP server returned the following message:\n";
842 msg += http_status_to_string(http_status);
843 throw Error(msg);
844 } else {
845 throw InternalErr(__FILE__, __LINE__,
846 "Bad response from the HTTP server: " + long_to_string(http_status));
847 }
848 }
849 }
850 }
851 }
852
853 throw InternalErr(__FILE__, __LINE__, "Should never get here");
854}
855
866
867HTTPResponse *HTTPConnect::plain_fetch_url(const string &url) {
868 DBG(cerr << "Getting URL: " << url << endl);
869 FILE *stream = nullptr;
870 string dods_temp = get_temp_file(stream);
871 vector<string> resp_hdrs;
872
873 int status = -1;
874 try {
875 status = read_url(url, stream, resp_hdrs); // Throws Error.
876 if (status >= 400) {
877 string msg = "Error while reading the URL: ";
878 msg += url;
879 msg += ".\nThe OPeNDAP server returned the following message:\n";
880 msg += http_status_to_string(status);
881 throw Error(msg);
882 }
883 } catch (const Error &) {
884 close_temp(stream, dods_temp);
885 throw;
886 }
887
888 rewind(stream);
889 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
890}
891
904 lock_guard<mutex> lock(d_connect_mutex);
905
906 d_accept_deflate = deflate;
907
908 if (d_accept_deflate) {
909 if (find(d_request_headers.begin(), d_request_headers.end(), "Accept-Encoding: deflate, gzip, compress") ==
910 d_request_headers.end())
911 d_request_headers.emplace_back("Accept-Encoding: deflate, gzip, compress");
912 } else {
914 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
915 [](const string &header) { return header == "Accept-Encoding: deflate, gzip, compress"; });
916 d_request_headers.erase(i, d_request_headers.end());
917 }
918}
919
928void HTTPConnect::set_xdap_protocol(int major, int minor) {
929 lock_guard<mutex> lock(d_connect_mutex);
930
931 // Look for, and remove if one exists, an XDAP-Accept header
933 i = find_if(d_request_headers.begin(), d_request_headers.end(), HeaderMatch("XDAP-Accept:"));
934 if (i != d_request_headers.end())
935 d_request_headers.erase(i);
936
937 // Record and add the new header value
938 d_dap_client_protocol_major = major;
939 d_dap_client_protocol_minor = minor;
940 ostringstream xdap_accept;
941 xdap_accept << "XDAP-Accept: " << major << "." << minor;
942
943 d_request_headers.push_back(xdap_accept.str());
944
945 DBG(copy(d_request_headers.begin(), d_request_headers.end(), ostream_iterator<string>(cerr, "\n")));
946}
947
962
963void HTTPConnect::set_credentials(const string &u, const string &p) {
964 lock_guard<mutex> lock(d_connect_mutex);
965
966 if (u.empty())
967 return;
968
969 // Store the credentials locally.
970 d_username = u;
971 d_password = p;
972
973 d_upstring = u + ":" + p;
974}
975
976} // namespace libdap
#define SERVER_ERR_MAX
#define VERBOSE_RUNTIME(x)
#define CLIENT_ERR_MAX
#define SERVER_ERR_MIN
#define CLIENT_ERR_MIN
A class for error processing.
Definition Error.h:92
static HTTPCache * instance(const std::string &cache_root="")
Definition HTTPCache.h:269
bool is_cache_enabled() const
friend class ParseHeader
void set_accept_deflate(bool deflate)
void set_xdap_protocol(int major, int minor)
void set_credentials(const std::string &u, const std::string &p)
HTTPResponse * fetch_url(const std::string &url)
virtual std::vector< std::string > & get_headers()
A class for software fault reporting.
Definition InternalErr.h:61
string get_cookie_jar() const
Definition RCReader.h:190
Regular expression matching.
Definition GNURegex.h:54
virtual void set_version(const std::string &v)
Definition Response.h:119
virtual int get_status() const
Definition Response.h:102
virtual void set_type(ObjectType o)
Definition Response.h:118
virtual void set_protocol(const std::string &p)
Definition Response.h:120
STL iterator class.
#define CNAME
Definition config.h:26
#define CVER
Definition config.h:32
#define DBG(x)
Definition debug.h:58
#define DBG2(x)
Definition debug.h:74
top level DAP object to house generic methods
Definition AISConnect.cc:30
ObjectType get_description_type(const string &value)
Definition mime_util.cc:309
string long_to_string(long val, int base)
Definition util.cc:946
void parse_mime_header(const string &header, string &name, string &value)
Definition mime_util.cc:848
int www_trace
ObjectType
The type of object in the stream coming from the data server.
Definition ObjectType.h:57
@ dap4_data
Definition ObjectType.h:68
@ dap4_error
Definition ObjectType.h:69
@ unknown_type
Definition ObjectType.h:58
@ web_error
Definition ObjectType.h:65
@ dap4_dmr
Definition ObjectType.h:67
int dods_keep_temps
int www_trace_extensive