libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 #include <cerrno>
49 
50 //#define DODS_DEBUG2
51 //#define HTTP_TRACE
52 //#define DODS_DEBUG
53 
54 #undef USE_GETENV
55 
56 
57 #include "debug.h"
58 #include "mime_util.h"
59 #include "media_types.h"
60 #include "GNURegex.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
63 #include "RCReader.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
66 
67 using namespace std;
68 
69 namespace libdap {
70 
71 // These global variables are not MT-Safe, but I'm leaving them as is because
72 // they are used only for debugging (set them in a debugger like gdb or ddd).
73 // They are not static because I think that many debuggers cannot access
74 // static variables. 08/07/02 jhrg
75 
76 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
77 int www_trace = 0;
78 
79 // Set this to 1 to turn on libcurl's VERY verbose mode.
80 int www_trace_extensive = 0;
81 
82 // Keep the temporary files; useful for debugging.
83 int dods_keep_temps = 0;
84 
85 #define CLIENT_ERR_MIN 400
86 #define CLIENT_ERR_MAX 417
87 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88  {
89  "Bad Request:",
90  "Unauthorized: Contact the server administrator.",
91  "Payment Required.",
92  "Forbidden: Contact the server administrator.",
93  "Not Found: The data source or server could not be found.\n\
94  Often this means that the OPeNDAP server is missing or needs attention.\n\
95  Please contact the server administrator.",
96  "Method Not Allowed.",
97  "Not Acceptable.",
98  "Proxy Authentication Required.",
99  "Request Time-out.",
100  "Conflict.",
101  "Gone:.",
102  "Length Required.",
103  "Precondition Failed.",
104  "Request Entity Too Large.",
105  "Request URI Too Large.",
106  "Unsupported Media Type.",
107  "Requested Range Not Satisfiable.",
108  "Expectation Failed."
109  };
110 
111 #define SERVER_ERR_MIN 500
112 #define SERVER_ERR_MAX 505
113 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114  {
115  "Internal Server Error.",
116  "Not Implemented.",
117  "Bad Gateway.",
118  "Service Unavailable.",
119  "Gateway Time-out.",
120  "HTTP Version Not Supported."
121  };
122 
125 static string
126 http_status_to_string(int status)
127 {
128  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129  return string(http_client_errors[status - CLIENT_ERR_MIN]);
130  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131  return string(http_server_errors[status - SERVER_ERR_MIN]);
132  else
133  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134 }
135 
136 static ObjectType
137 determine_object_type(const string &header_value)
138 {
139  // DAP4 Data: application/vnd.opendap.dap4.data
140  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141 
142  string::size_type plus = header_value.find('+');
143  string base_type;
144  string type_extension = "";
145  if (plus != string::npos) {
146  base_type= header_value.substr(0, plus);
147  type_extension = header_value.substr(plus+1);
148  }
149  else
150  base_type = header_value;
151 
152  if (base_type == DMR_Content_Type
153  || (base_type.find("application/") != string::npos
154  && base_type.find("dap4.dataset-metadata") != string::npos)) {
155  if (type_extension == "xml")
156  return dap4_dmr;
157  else
158  return unknown_type;
159  }
160  else if (base_type == DAP4_DATA_Content_Type
161  || (base_type.find("application/") != string::npos
162  && base_type.find("dap4.data") != string::npos)) {
163  return dap4_data;
164  }
165  else if (header_value.find("text/html") != string::npos) {
166  return web_error;
167  }
168  else
169  return unknown_type;
170 }
171 
176 class ParseHeader : public unary_function<const string &, void>
177 {
178  ObjectType type; // What type of object is in the stream?
179  string server; // Server's version string.
180  string protocol; // Server's protocol version.
181  string location; // Url returned by server
182 
183 public:
184  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185  { }
186 
187  void operator()(const string &line)
188  {
189  string name, value;
190  parse_mime_header(line, name, value);
191 
192  DBG2(cerr << name << ": " << value << endl);
193 
194  // Content-Type is used to determine the content of DAP4 responses, but allow the
195  // Content-Description header to override CT o preserve operation with DAP2 servers.
196  // jhrg 11/12/13
197  if (type == unknown_type && name == "content-type") {
198  type = determine_object_type(value); // see above
199  }
200  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201  type = get_description_type(value); // defined in mime_util.cc
202  }
203  // The second test (== "dods/0.0") tests if xopendap-server has already
204  // been seen. If so, use that header in preference to the old
205  // XDODS-Server header. jhrg 2/7/06
206  else if (name == "xdods-server" && server == "dods/0.0") {
207  server = value;
208  }
209  else if (name == "xopendap-server") {
210  server = value;
211  }
212  else if (name == "xdap") {
213  protocol = value;
214  }
215  else if (server == "dods/0.0" && name == "server") {
216  server = value;
217  }
218  else if (name == "location") {
219  location = value;
220  }
221  }
222 
223  ObjectType get_object_type()
224  {
225  return type;
226  }
227 
228  string get_server()
229  {
230  return server;
231  }
232 
233  string get_protocol()
234  {
235  return protocol;
236  }
237 
238  string get_location() {
239  return location;
240  }
241 };
242 
258 static size_t
259 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260 {
261  DBG2(cerr << "Inside the header parser." << endl);
262  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263 
264  // Grab the header, minus the trailing newline. Or \r\n pair.
265  string complete_line;
266  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268  else
269  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270 
271  // Store all non-empty headers that are not HTTP status codes
272  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273  DBG(cerr << "Header line: " << complete_line << endl);
274  hdrs->push_back(complete_line);
275  }
276 
277  return size * nmemb;
278 }
279 
281 static int
282 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283 {
284  string message(msg, size);
285 
286  switch (info) {
287  case CURLINFO_TEXT:
288  cerr << "Text: " << message; break;
289  case CURLINFO_HEADER_IN:
290  cerr << "Header in: " << message; break;
291  case CURLINFO_HEADER_OUT:
292  cerr << "Header out: " << message; break;
293  case CURLINFO_DATA_IN:
294  if (www_trace_extensive)
295  cerr << "Data in: " << message; break;
296  case CURLINFO_DATA_OUT:
297  if (www_trace_extensive)
298  cerr << "Data out: " << message; break;
299  case CURLINFO_END:
300  cerr << "End: " << message; break;
301 #ifdef CURLINFO_SSL_DATA_IN
302  case CURLINFO_SSL_DATA_IN:
303  cerr << "SSL Data in: " << message; break;
304 #endif
305 #ifdef CURLINFO_SSL_DATA_OUT
306  case CURLINFO_SSL_DATA_OUT:
307  cerr << "SSL Data out: " << message; break;
308 #endif
309  default:
310  if (www_trace_extensive)
311  cerr << "Curl info: " << message; break;
312  }
313  return 0;
314 }
315 
319 void
320 HTTPConnect::www_lib_init()
321 {
322  d_curl = curl_easy_init();
323  if (!d_curl)
324  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
325 
326  // Now set options that will remain constant for the duration of this
327  // CURL object.
328 
329  // Set the proxy host.
330  if (!d_rcr->get_proxy_server_host().empty()) {
331  DBG(cerr << "Setting up a proxy server." << endl);
332  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
333  << endl);
334  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
335  << endl);
336  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
337  << endl);
338  curl_easy_setopt(d_curl, CURLOPT_PROXY,
339  d_rcr->get_proxy_server_host().c_str());
340  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
341  d_rcr->get_proxy_server_port());
342 
343  // As of 4/21/08 only NTLM, Digest and Basic work.
344 #ifdef CURLOPT_PROXYAUTH
345  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
346 #endif
347 
348  // Password might not be required. 06/21/04 jhrg
349  if (!d_rcr->get_proxy_server_userpw().empty())
350  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
351  d_rcr->get_proxy_server_userpw().c_str());
352  }
353 
354  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
355  // We have to set FailOnError to false for any of the non-Basic
356  // authentication schemes to work. 07/28/03 jhrg
357  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
358 
359  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
360  // choosing the the 'safest' one supported by the server.
361  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
362  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
363 
364  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
365  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
366  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
367  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
368  // param of save_raw_http_headers to a vector<string> object.
369 
370  // Follow 302 (redirect) responses
371  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
372  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
373 
374  // If the user turns off SSL validation...
375  if (d_rcr->get_validate_ssl() == 0) {
376  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
377  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
378  }
379 
380  // Set libcurl to use netrc to access data behind URS auth.
381  // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
382  curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
383 
384  // Look to see if cookies are turned on in the .dodsrc file. If so,
385  // activate here. We honor 'session cookies' (cookies without an
386  // expiration date) here so that session-based SSO systems will work as
387  // expected.
388  if (!d_cookie_jar.empty()) {
389  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
390  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
391  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
392  }
393 
394  if (www_trace) {
395  cerr << "Curl version: " << curl_version() << endl;
396  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
397  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
398  }
399 }
400 
404 class BuildHeaders : public unary_function<const string &, void>
405 {
406  struct curl_slist *d_cl;
407 
408 public:
409  BuildHeaders() : d_cl(0)
410  {}
411 
412  void operator()(const string &header)
413  {
414  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
415  << endl);
416  d_cl = curl_slist_append(d_cl, header.c_str());
417  }
418 
419  struct curl_slist *get_headers()
420  {
421  return d_cl;
422  }
423 };
424 
439 long
440 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
441 {
442  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
443 
444 #ifdef WIN32
445  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
446  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
447  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
448  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
449  // this issue is that one should not pass a FILE * to a windows DLL. Close
450  // inspection of libcurl yields that their default write function when using
451  // the CURLOPT_WRITEDATA is just "fwrite".
452  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
453  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
454 #else
455  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
456 #endif
457 
458  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
459  ostream_iterator<string>(cerr, "\n")));
460 
461  BuildHeaders req_hdrs;
462  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
463  req_hdrs);
464  if (headers)
465  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
466 
467  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
468 
469  // Turn off the proxy for this URL?
470  bool temporary_proxy = false;
471  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
472  DBG(cerr << "Suppress proxy for url: " << url << endl);
473  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
474  }
475 
476  string::size_type at_sign = url.find('@');
477  // Assume username:password present *and* assume it's an HTTP URL; it *is*
478  // HTTPConnect, after all. 7 is position after "http://"; the second arg
479  // to substr() is the sub string length.
480  if (at_sign != url.npos)
481  d_upstring = url.substr(7, at_sign - 7);
482 
483  if (!d_upstring.empty())
484  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
485 
486  // Pass save_raw_http_headers() a pointer to the vector<string> where the
487  // response headers may be stored. Callers can use the resp_hdrs
488  // value/result parameter to get the raw response header information .
489  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
490 
491  // This is the call that causes curl to go and get the remote resource and "write it down"
492  // utilizing the configuration state that has been previously conditioned by various perturbations
493  // of calls to curl_easy_setopt().
494  CURLcode res = curl_easy_perform(d_curl);
495 
496  // Free the header list and null the value in d_curl.
497  curl_slist_free_all(req_hdrs.get_headers());
498  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
499 
500  // Reset the proxy?
501  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
502  curl_easy_setopt(d_curl, CURLOPT_PROXY,
503  d_rcr->get_proxy_server_host().c_str());
504 
505  if (res != 0)
506  throw Error(d_error_buffer);
507 
508  long status;
509  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
510  if (res != 0)
511  throw Error(d_error_buffer);
512 
513  char *ct_ptr = 0;
514  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
515  if (res == CURLE_OK && ct_ptr)
516  d_content_type = ct_ptr;
517  else
518  d_content_type = "";
519 
520  return status;
521 }
522 
526 bool
527 HTTPConnect::url_uses_proxy_for(const string &url)
528 {
529  if (d_rcr->is_proxy_for_used()) {
530  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
531  int index = 0, matchlen;
532  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
533  }
534 
535  return false;
536 }
537 
541 bool
542 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
543 {
544  return d_rcr->is_no_proxy_for_used()
545  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
546 }
547 
548 // Public methods. Mostly...
549 
556 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
557  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
558 
559 {
560  d_accept_deflate = rcr->get_deflate();
561  d_rcr = rcr;
562 
563  // Load in the default headers to send with a request. The empty Pragma
564  // headers overrides libcurl's default Pragma: no-cache header (which
565  // will disable caching by Squid, et c.). The User-Agent header helps
566  // make server logs more readable. 05/05/03 jhrg
567  d_request_headers.push_back(string("Pragma:"));
568  string user_agent = string("User-Agent: ") + string(CNAME)
569  + string("/") + string(CVER);
570  d_request_headers.push_back(user_agent);
571  if (d_accept_deflate)
572  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
573 
574  // HTTPCache::instance returns a valid ptr or 0.
575  if (d_rcr->get_use_cache())
576  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
577  else
578  d_http_cache = 0;
579 
580  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
581  << ")" << endl);
582 
583  if (d_http_cache) {
584  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
585  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
586  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
587  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
588  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
589  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
590  }
591 
592  d_cookie_jar = rcr->get_cookie_jar();
593 
594  www_lib_init(); // This may throw either Error or InternalErr
595 }
596 
597 HTTPConnect::~HTTPConnect()
598 {
599  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
600 
601  curl_easy_cleanup(d_curl);
602 
603  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
604 }
605 
607 class HeaderMatch : public unary_function<const string &, bool> {
608  const string &d_header;
609  public:
610  HeaderMatch(const string &header) : d_header(header) {}
611  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
612 };
613 
626 HTTPResponse *
627 HTTPConnect::fetch_url(const string &url)
628 {
629 #ifdef HTTP_TRACE
630  cout << "GET " << url << " HTTP/1.0" << endl;
631 #endif
632 
633  HTTPResponse *stream;
634 
635  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
636  stream = caching_fetch_url(url);
637  }
638  else {
639  stream = plain_fetch_url(url);
640  }
641 
642 #ifdef HTTP_TRACE
643  stringstream ss;
644  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
645  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
646  ss << stream->get_headers()->at(i) << endl;
647  }
648  cout << ss.str();
649 #endif
650 
651  ParseHeader parser;
652 
653  // An apparent quirk of libcurl is that it does not pass the Content-type
654  // header to the callback used to save them, but check and add it from the
655  // saved state variable only if it's not there (without this a test failed
656  // in HTTPCacheTest). jhrg 11/12/13
657  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
658  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
659  stream->get_headers()->push_back("Content-Type: " + d_content_type);
660 
661  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
662 
663 #ifdef HTTP_TRACE
664  cout << endl << endl;
665 #endif
666 
667  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
668  if (parser.get_location() != "" &&
669  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
670  delete stream;
671  return fetch_url(parser.get_location());
672  }
673 
674  stream->set_type(parser.get_object_type()); // uses the value of content-description
675 
676  stream->set_version(parser.get_server());
677  stream->set_protocol(parser.get_protocol());
678 
679  if (d_use_cpp_streams) {
680  stream->transform_to_cpp();
681  }
682 
683  return stream;
684 }
685 
686 // Look around for a reasonable place to put a temporary file. Check first
687 // the value of the TMPDIR env var. If that does not yeild a path that's
688 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
689 // defined in stdio.h. If both come up empty, then use `./'.
690 
691 // Change this to a version that either returns a string or an open file
692 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
693 // (see open()) to make it more secure. Ideal solution: get deserialize()
694 // methods to read from a stream returned by libcurl, not from a temporary
695 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
696 static string
697 get_tempfile_template(const string &file_template)
698 {
699  string c;
700 
701  // Windows has one idea of the standard name(s) for a temporary files dir
702 #ifdef WIN32
703  // white list for a WIN32 directory
704  Regex directory("[-a-zA-Z0-9_:\\]*");
705 
706  // If we're OK to use getenv(), try it.
707 #ifdef USE_GETENV
708  c = getenv("TEMP");
709  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710  goto valid_temp_directory;
711 
712  c= getenv("TMP");
713  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
714  goto valid_temp_directory;
715 #endif // USE_GETENV
716 
717  // The windows default
718  c = "c:\tmp";
719  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
720  goto valid_temp_directory;
721 
722 #else // Unix/Linux/OSX has another...
723  // white list for a directory
724  Regex directory("[-a-zA-Z0-9_/]*");
725 #ifdef USE_GETENV
726  c = getenv("TMPDIR");
727  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
728  goto valid_temp_directory;
729 #endif // USE_GETENV
730 
731  // Unix defines this sometimes - if present, use it.
732 #ifdef P_tmpdir
733  if (access(P_tmpdir, W_OK | R_OK) == 0) {
734  c = P_tmpdir;
735  goto valid_temp_directory;
736  }
737 #endif
738 
739  // The Unix default
740  c = "/tmp";
741  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
742  goto valid_temp_directory;
743 
744 #endif // WIN32
745 
746  // If we found nothing useful, use the current directory
747  c = ".";
748 
749 valid_temp_directory:
750 
751 #ifdef WIN32
752  c += "\\" + file_template;
753 #else
754  c += "/" + file_template;
755 #endif
756 
757  return c;
758 }
759 
778 string
779 get_temp_file(FILE *&stream) throw(Error)
780 {
781  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
782 
783  vector<char> pathname(dods_temp.length() + 1);
784 
785  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
786 
787  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
788 
789  // Open truncated for update. NB: mkstemp() returns a file descriptor.
790 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
791  stream = fopen(_mktemp(&pathname[0]), "w+b");
792 #else
793  // Make sure that temp files are accessible only by the owner.
794  int mask = umask(077);
795  if (mask < 0)
796  throw Error("Could not set the file creation mask: " + string(strerror(errno)));
797  int fd = mkstemp(&pathname[0]);
798  if (fd < 0)
799  throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
800 
801  stream = fdopen(fd, "w+");
802  umask(mask);
803 #endif
804 
805  if (!stream)
806  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
807 
808  dods_temp = &pathname[0];
809  return dods_temp;
810 }
811 
812 
818 void
819 close_temp(FILE *s, const string &name)
820 {
821  int res = fclose(s);
822  if (res)
823  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
824 
825  res = unlink(name.c_str());
826  if (res != 0)
827  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
828 }
829 
851 HTTPResponse *
852 HTTPConnect::caching_fetch_url(const string &url)
853 {
854  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
855 
856  vector<string> *headers = new vector<string>;
857  string file_name;
858  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
859  if (!s) {
860  // url not in cache; get it and cache it
861  DBGN(cerr << "no; getting response and caching." << endl);
862  delete headers; headers = 0;
863  time_t now = time(0);
864  HTTPResponse *rs = plain_fetch_url(url);
865  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
866 
867  return rs;
868  }
869  else { // url in cache
870  DBGN(cerr << "yes... ");
871 
872  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
873  DBGN(cerr << "and it's valid; using cached response." << endl);
874  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
875  return crs;
876  }
877  else { // url in cache but not valid; validate
878  DBGN(cerr << "but it's not valid; validating... ");
879 
880  d_http_cache->release_cached_response(s); // This closes 's'
881  headers->clear();
882  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
883  FILE *body = 0;
884  string dods_temp = get_temp_file(body);
885  time_t now = time(0); // When was the request made (now).
886  long http_status;
887 
888  try {
889  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
890  rewind(body);
891  }
892  catch (Error &e) {
893  close_temp(body, dods_temp);
894  delete headers;
895  throw ;
896  }
897 
898  switch (http_status) {
899  case 200: { // New headers and new body
900  DBGN(cerr << "read a new response; caching." << endl);
901 
902  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
903  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
904 
905  return rs;
906  }
907 
908  case 304: { // Just new headers, use cached body
909  DBGN(cerr << "cached response valid; updating." << endl);
910 
911  close_temp(body, dods_temp);
912  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
913  string file_name;
914  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
915  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
916  return crs;
917  }
918 
919  default: { // Oops.
920  close_temp(body, dods_temp);
921  if (http_status >= 400) {
922  delete headers; headers = 0;
923  string msg = "Error while reading the URL: ";
924  msg += url;
925  msg
926  += ".\nThe OPeNDAP server returned the following message:\n";
927  msg += http_status_to_string(http_status);
928  throw Error(msg);
929  }
930  else {
931  delete headers; headers = 0;
932  throw InternalErr(__FILE__, __LINE__,
933  "Bad response from the HTTP server: " + long_to_string(http_status));
934  }
935  }
936  }
937  }
938  }
939 
940  throw InternalErr(__FILE__, __LINE__, "Should never get here");
941 }
942 
954 HTTPResponse *
955 HTTPConnect::plain_fetch_url(const string &url)
956 {
957  DBG(cerr << "Getting URL: " << url << endl);
958  FILE *stream = 0;
959  string dods_temp = get_temp_file(stream);
960  vector<string> *resp_hdrs = new vector<string>;
961 
962  int status = -1;
963  try {
964  status = read_url(url, stream, resp_hdrs); // Throws Error.
965  if (status >= 400) {
966  // delete resp_hdrs; resp_hdrs = 0;
967  string msg = "Error while reading the URL: ";
968  msg += url;
969  msg += ".\nThe OPeNDAP server returned the following message:\n";
970  msg += http_status_to_string(status);
971  throw Error(msg);
972  }
973  }
974 
975  catch (Error &e) {
976  delete resp_hdrs;
977  close_temp(stream, dods_temp);
978  throw;
979  }
980 
981 #if 0
982  if (d_use_cpp_streams) {
983  fclose(stream);
984  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
985  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
986  }
987  else {
988 #endif
989  rewind(stream);
990  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
991 #if 0
992 }
993 #endif
994 }
995 
1007 void
1009 {
1010  d_accept_deflate = deflate;
1011 
1012  if (d_accept_deflate) {
1013  if (find(d_request_headers.begin(), d_request_headers.end(),
1014  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1015  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1016  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1017  ostream_iterator<string>(cerr, "\n")));
1018  }
1019  else {
1020  vector<string>::iterator i;
1021  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1022  bind2nd(equal_to<string>(),
1023  string("Accept-Encoding: deflate, gzip, compress")));
1024  d_request_headers.erase(i, d_request_headers.end());
1025  }
1026 }
1027 
1036 void
1037 HTTPConnect::set_xdap_protocol(int major, int minor)
1038 {
1039  // Look for, and remove if one exists, an XDAP-Accept header
1040  vector<string>::iterator i;
1041  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1042  HeaderMatch("XDAP-Accept:"));
1043  if (i != d_request_headers.end())
1044  d_request_headers.erase(i);
1045 
1046  // Record and add the new header value
1047  d_dap_client_protocol_major = major;
1048  d_dap_client_protocol_minor = minor;
1049  ostringstream xdap_accept;
1050  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1051 
1052  d_request_headers.push_back(xdap_accept.str());
1053 
1054  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1055  ostream_iterator<string>(cerr, "\n")));
1056 }
1057 
1073 void
1074 HTTPConnect::set_credentials(const string &u, const string &p)
1075 {
1076  if (u.empty())
1077  return;
1078 
1079  // Store the credentials locally.
1080  d_username = u;
1081  d_password = p;
1082 
1083  d_upstring = u + ":" + p;
1084 }
1085 
1086 } // namespace libdap
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_credentials(const string &u, const string &p)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
STL namespace.
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:627
top level DAP object to house generic methods
Definition: AISConnect.cc:30
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:912
int match(const char *s, int len, int pos=0)
Does the pattern match.
Definition: GNURegex.cc:115
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:819
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:779
void set_accept_deflate(bool defalte)
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void set_xdap_protocol(int major, int minor)
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
A class for error processing.
Definition: Error.h:92
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772