58 #include "mime_util.h" 59 #include "media_types.h" 61 #include "HTTPCache.h" 62 #include "HTTPConnect.h" 64 #include "HTTPResponse.h" 65 #include "HTTPCacheResponse.h" 80 int www_trace_extensive = 0;
83 int dods_keep_temps = 0;
85 #define CLIENT_ERR_MIN 400 86 #define CLIENT_ERR_MAX 417 87 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
90 "Unauthorized: Contact the server administrator.",
92 "Forbidden: Contact the server administrator.",
93 "Not Found: The data source or server could not be found.\n\ 94 Often this means that the OPeNDAP server is missing or needs attention.\n\ 95 Please contact the server administrator.",
96 "Method Not Allowed.",
98 "Proxy Authentication Required.",
103 "Precondition Failed.",
104 "Request Entity Too Large.",
105 "Request URI Too Large.",
106 "Unsupported Media Type.",
107 "Requested Range Not Satisfiable.",
108 "Expectation Failed." 111 #define SERVER_ERR_MIN 500 112 #define SERVER_ERR_MAX 505 113 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
115 "Internal Server Error.",
118 "Service Unavailable.",
120 "HTTP Version Not Supported." 126 http_status_to_string(
int status)
128 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129 return string(http_client_errors[status - CLIENT_ERR_MIN]);
130 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131 return string(http_server_errors[status - SERVER_ERR_MIN]);
133 return string(
"Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
137 determine_object_type(
const string &header_value)
142 string::size_type plus = header_value.find(
'+');
144 string type_extension =
"";
145 if (plus != string::npos) {
146 base_type= header_value.substr(0, plus);
147 type_extension = header_value.substr(plus+1);
150 base_type = header_value;
152 if (base_type == DMR_Content_Type
153 || (base_type.find(
"application/") != string::npos
154 && base_type.find(
"dap4.dataset-metadata") != string::npos)) {
155 if (type_extension ==
"xml")
160 else if (base_type == DAP4_DATA_Content_Type
161 || (base_type.find(
"application/") != string::npos
162 && base_type.find(
"dap4.data") != string::npos)) {
165 else if (header_value.find(
"text/html") != string::npos) {
176 class ParseHeader :
public unary_function<const string &, void>
184 ParseHeader() : type(unknown_type), server(
"dods/0.0"), protocol(
"2.0")
187 void operator()(
const string &line)
192 DBG2(cerr << name <<
": " << value << endl);
197 if (type == unknown_type && name ==
"content-type") {
198 type = determine_object_type(value);
200 if (name ==
"content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
206 else if (name ==
"xdods-server" && server ==
"dods/0.0") {
209 else if (name ==
"xopendap-server") {
212 else if (name ==
"xdap") {
215 else if (server ==
"dods/0.0" && name ==
"server") {
218 else if (name ==
"location") {
233 string get_protocol()
238 string get_location() {
259 save_raw_http_headers(
void *ptr,
size_t size,
size_t nmemb,
void *resp_hdrs)
261 DBG2(cerr <<
"Inside the header parser." << endl);
262 vector<string> *hdrs =
static_cast<vector<string> *
>(resp_hdrs);
265 string complete_line;
266 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) ==
'\r')
267 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
269 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
272 if (complete_line !=
"" && complete_line.find(
"HTTP") == string::npos) {
273 DBG(cerr <<
"Header line: " << complete_line << endl);
274 hdrs->push_back(complete_line);
282 curl_debug(CURL *, curl_infotype info,
char *msg,
size_t size,
void *)
284 string message(msg, size);
288 cerr <<
"Text: " << message;
break;
289 case CURLINFO_HEADER_IN:
290 cerr <<
"Header in: " << message;
break;
291 case CURLINFO_HEADER_OUT:
292 cerr <<
"Header out: " << message;
break;
293 case CURLINFO_DATA_IN:
294 if (www_trace_extensive)
295 cerr <<
"Data in: " << message;
break;
296 case CURLINFO_DATA_OUT:
297 if (www_trace_extensive)
298 cerr <<
"Data out: " << message;
break;
300 cerr <<
"End: " << message;
break;
301 #ifdef CURLINFO_SSL_DATA_IN 302 case CURLINFO_SSL_DATA_IN:
303 cerr <<
"SSL Data in: " << message;
break;
305 #ifdef CURLINFO_SSL_DATA_OUT 306 case CURLINFO_SSL_DATA_OUT:
307 cerr <<
"SSL Data out: " << message;
break;
310 if (www_trace_extensive)
311 cerr <<
"Curl info: " << message;
break;
320 HTTPConnect::www_lib_init()
322 d_curl = curl_easy_init();
324 throw InternalErr(__FILE__, __LINE__,
"Could not initialize libcurl.");
330 if (!d_rcr->get_proxy_server_host().empty()) {
331 DBG(cerr <<
"Setting up a proxy server." << endl);
332 DBG(cerr <<
"Proxy host: " << d_rcr->get_proxy_server_host()
334 DBG(cerr <<
"Proxy port: " << d_rcr->get_proxy_server_port()
336 DBG(cerr <<
"Proxy pwd : " << d_rcr->get_proxy_server_userpw()
338 curl_easy_setopt(d_curl, CURLOPT_PROXY,
339 d_rcr->get_proxy_server_host().c_str());
340 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
341 d_rcr->get_proxy_server_port());
344 #ifdef CURLOPT_PROXYAUTH 345 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (
long)CURLAUTH_ANY);
349 if (!d_rcr->get_proxy_server_userpw().empty())
350 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
351 d_rcr->get_proxy_server_userpw().c_str());
354 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
357 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
362 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (
long)CURLAUTH_ANY);
364 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
365 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
366 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
371 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
372 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
375 if (d_rcr->get_validate_ssl() == 0) {
376 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
377 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
382 curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
388 if (!d_cookie_jar.empty()) {
389 DBG(cerr <<
"Setting the cookie jar to: " << d_cookie_jar << endl);
390 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
391 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
395 cerr <<
"Curl version: " << curl_version() << endl;
396 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
397 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
404 class BuildHeaders :
public unary_function<const string &, void>
406 struct curl_slist *d_cl;
409 BuildHeaders() : d_cl(0)
412 void operator()(
const string &header)
414 DBG(cerr <<
"Adding '" << header.c_str() <<
"' to the header list." 416 d_cl = curl_slist_append(d_cl, header.c_str());
419 struct curl_slist *get_headers()
440 HTTPConnect::read_url(
const string &url, FILE *stream, vector<string> *resp_hdrs,
const vector<string> *headers)
442 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
452 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
453 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
455 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
459 ostream_iterator<string>(cerr,
"\n")));
461 BuildHeaders req_hdrs;
462 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
465 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
467 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
470 bool temporary_proxy =
false;
471 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
472 DBG(cerr <<
"Suppress proxy for url: " << url << endl);
473 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
476 string::size_type at_sign = url.find(
'@');
480 if (at_sign != url.npos)
481 d_upstring = url.substr(7, at_sign - 7);
483 if (!d_upstring.empty())
484 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
489 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
494 CURLcode res = curl_easy_perform(d_curl);
497 curl_slist_free_all(req_hdrs.get_headers());
498 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
501 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
502 curl_easy_setopt(d_curl, CURLOPT_PROXY,
503 d_rcr->get_proxy_server_host().c_str());
506 throw Error(d_error_buffer);
509 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
511 throw Error(d_error_buffer);
514 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
515 if (res == CURLE_OK && ct_ptr)
516 d_content_type = ct_ptr;
527 HTTPConnect::url_uses_proxy_for(
const string &url)
529 if (d_rcr->is_proxy_for_used()) {
530 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
531 int index = 0, matchlen;
532 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
542 HTTPConnect::url_uses_no_proxy_for(
const string &url)
throw()
544 return d_rcr->is_no_proxy_for_used()
545 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
556 HTTPConnect::HTTPConnect(
RCReader *rcr,
bool use_cpp) : d_username(
""), d_password(
""), d_cookie_jar(
""),
557 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
560 d_accept_deflate = rcr->get_deflate();
567 d_request_headers.push_back(
string(
"Pragma:"));
568 string user_agent = string(
"User-Agent: ") + string(CNAME)
569 + string(
"/") + string(CVER);
570 d_request_headers.push_back(user_agent);
571 if (d_accept_deflate)
572 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
575 if (d_rcr->get_use_cache())
580 DBG2(cerr <<
"Cache object created (" << hex << d_http_cache << dec
586 d_http_cache->
set_max_size(d_rcr->get_max_cache_size());
592 d_cookie_jar = rcr->get_cookie_jar();
597 HTTPConnect::~HTTPConnect()
599 DBG2(cerr <<
"Entering the HTTPConnect dtor" << endl);
601 curl_easy_cleanup(d_curl);
603 DBG2(cerr <<
"Leaving the HTTPConnect dtor" << endl);
607 class HeaderMatch :
public unary_function<const string &, bool> {
608 const string &d_header;
610 HeaderMatch(
const string &header) : d_header(header) {}
611 bool operator()(
const string &arg) {
return arg.find(d_header) == 0; }
630 cout <<
"GET " << url <<
" HTTP/1.0" << endl;
636 stream = caching_fetch_url(url);
639 stream = plain_fetch_url(url);
644 ss <<
"HTTP/1.0 " << stream->get_status() <<
" -" << endl;
645 for (
size_t i = 0; i < stream->get_headers()->size(); i++) {
646 ss << stream->get_headers()->at(i) << endl;
657 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
658 HeaderMatch(
"Content-Type:")) == stream->get_headers()->end())
659 stream->get_headers()->push_back(
"Content-Type: " + d_content_type);
661 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
664 cout << endl << endl;
668 if (parser.get_location() !=
"" &&
669 url.substr(0,url.find(
"?",0)).compare(parser.get_location().substr(0,url.find(
"?",0))) != 0) {
674 stream->set_type(parser.get_object_type());
676 stream->set_version(parser.get_server());
677 stream->set_protocol(parser.get_protocol());
679 if (d_use_cpp_streams) {
697 get_tempfile_template(
const string &file_template)
704 Regex directory(
"[-a-zA-Z0-9_:\\]*");
709 if (c && directory.
match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710 goto valid_temp_directory;
713 if (c && directory.
match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
714 goto valid_temp_directory;
719 if (c && directory.
match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
720 goto valid_temp_directory;
722 #else // Unix/Linux/OSX has another... 724 Regex directory(
"[-a-zA-Z0-9_/]*");
726 c = getenv(
"TMPDIR");
727 if (directory.
match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
728 goto valid_temp_directory;
733 if (access(P_tmpdir, W_OK | R_OK) == 0) {
735 goto valid_temp_directory;
741 if (directory.
match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
742 goto valid_temp_directory;
749 valid_temp_directory:
752 c +=
"\\" + file_template;
754 c +=
"/" + file_template;
781 string dods_temp = get_tempfile_template((
string)
"dodsXXXXXX");
783 vector<char> pathname(dods_temp.length() + 1);
785 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
787 DBG(cerr <<
"pathanme: " << &pathname[0] <<
" (" << dods_temp.length() + 1 <<
")" << endl);
790 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 791 stream = fopen(_mktemp(&pathname[0]),
"w+b");
794 int mask = umask(077);
796 throw Error(
"Could not set the file creation mask: " +
string(strerror(errno)));
797 int fd = mkstemp(&pathname[0]);
799 throw Error(
"Could not create a temporary file to store the response: " +
string(strerror(errno)));
801 stream = fdopen(fd,
"w+");
806 throw Error(
"Failed to open a temporary file for the data values (" + dods_temp +
")");
808 dods_temp = &pathname[0];
823 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
825 res = unlink(name.c_str());
827 throw InternalErr(__FILE__, __LINE__,
"!FAIL! " + long_to_string(res));
852 HTTPConnect::caching_fetch_url(
const string &url)
854 DBG(cerr <<
"Is this URL (" << url <<
") in the cache?... ");
856 vector<string> *headers =
new vector<string>;
858 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
861 DBGN(cerr <<
"no; getting response and caching." << endl);
862 delete headers; headers = 0;
863 time_t now = time(0);
865 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
870 DBGN(cerr <<
"yes... ");
872 if (d_http_cache->is_url_valid(url)) {
873 DBGN(cerr <<
"and it's valid; using cached response." << endl);
878 DBGN(cerr <<
"but it's not valid; validating... ");
880 d_http_cache->release_cached_response(s);
882 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
885 time_t now = time(0);
889 http_status = read_url(url, body, headers, &cond_hdrs);
898 switch (http_status) {
900 DBGN(cerr <<
"read a new response; caching." << endl);
902 d_http_cache->cache_response(url, now, *headers, body);
909 DBGN(cerr <<
"cached response valid; updating." << endl);
912 d_http_cache->update_response(url, now, *headers);
914 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
921 if (http_status >= 400) {
922 delete headers; headers = 0;
923 string msg =
"Error while reading the URL: ";
926 +=
".\nThe OPeNDAP server returned the following message:\n";
927 msg += http_status_to_string(http_status);
931 delete headers; headers = 0;
933 "Bad response from the HTTP server: " + long_to_string(http_status));
940 throw InternalErr(__FILE__, __LINE__,
"Should never get here");
955 HTTPConnect::plain_fetch_url(
const string &url)
957 DBG(cerr <<
"Getting URL: " << url << endl);
960 vector<string> *resp_hdrs =
new vector<string>;
964 status = read_url(url, stream, resp_hdrs);
967 string msg =
"Error while reading the URL: ";
969 msg +=
".\nThe OPeNDAP server returned the following message:\n";
970 msg += http_status_to_string(status);
982 if (d_use_cpp_streams) {
984 fstream *in =
new fstream(dods_temp.c_str(), ios::in|ios::binary);
985 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
990 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
1010 d_accept_deflate = deflate;
1012 if (d_accept_deflate) {
1013 if (find(d_request_headers.begin(), d_request_headers.end(),
1014 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1015 d_request_headers.push_back(
string(
"Accept-Encoding: deflate, gzip, compress"));
1016 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1017 ostream_iterator<string>(cerr,
"\n")));
1020 vector<string>::iterator i;
1021 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1022 bind2nd(equal_to<string>(),
1023 string(
"Accept-Encoding: deflate, gzip, compress")));
1024 d_request_headers.erase(i, d_request_headers.end());
1040 vector<string>::iterator i;
1041 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1042 HeaderMatch(
"XDAP-Accept:"));
1043 if (i != d_request_headers.end())
1044 d_request_headers.erase(i);
1047 d_dap_client_protocol_major = major;
1048 d_dap_client_protocol_minor = minor;
1049 ostringstream xdap_accept;
1050 xdap_accept <<
"XDAP-Accept: " << major <<
"." << minor;
1052 d_request_headers.push_back(xdap_accept.str());
1054 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1055 ostream_iterator<string>(cerr,
"\n")));
1083 d_upstring = u +
":" + p;
void set_cache_enabled(bool mode)
void set_credentials(const string &u, const string &p)
static HTTPCache * instance(const string &cache_root, bool force=false)
void set_max_size(unsigned long size)
ObjectType
The type of object in the stream coming from the data server.
HTTPResponse * fetch_url(const string &url)
top level DAP object to house generic methods
A class for software fault reporting.
void parse_mime_header(const string &header, string &name, string &value)
int match(const char *s, int len, int pos=0)
Does the pattern match.
ObjectType get_description_type(const string &value)
void close_temp(FILE *s, const string &name)
string get_temp_file(FILE *&stream)
void set_accept_deflate(bool defalte)
void set_always_validate(bool validate)
void set_xdap_protocol(int major, int minor)
void set_default_expiration(int exp_time)
A class for error processing.
void set_expire_ignored(bool mode)
void set_max_entry_size(unsigned long size)