libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCache.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <cerrno>
40 
41 #include <iostream>
42 #include <sstream>
43 #include <algorithm>
44 #include <iterator>
45 #include <set>
46 
47 #include "Error.h"
48 #include "InternalErr.h"
49 #include "ResponseTooBigErr.h"
50 #ifndef WIN32
51 #include "SignalHandler.h"
52 #endif
53 #include "HTTPCacheInterruptHandler.h"
54 #include "HTTPCacheTable.h"
55 #include "HTTPCache.h"
56 #include "HTTPCacheMacros.h"
57 #include "SignalHandlerRegisteredErr.h"
58 
59 #include "util_mit.h"
60 #include "debug.h"
61 
62 using namespace std;
63 
64 namespace libdap {
65 
66 HTTPCache *HTTPCache::_instance = 0;
67 
68 // instance_mutex is used to ensure that only one instance is created.
69 // That is, it protects the body of the HTTPCache::instance() method. This
70 // mutex is initialized from within the static function once_init_routine()
71 // and the call to that takes place using pthread_once_init() where the mutex
72 // once_block is used to protect that call. All of this ensures that no matter
73 // how many threads call the instance() method, only one instance is ever
74 // made.
75 static pthread_mutex_t instance_mutex;
76 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77 
78 
79 #define NO_LM_EXPIRATION 24*3600 // 24 hours
80 
81 #define DUMP_FREQUENCY 10 // Dump index every x loads
82 
83 #define MEGA 0x100000L
84 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
85 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
86 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
87 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
88 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
89 
90 static void
91 once_init_routine()
92 {
93  int status;
94  status = INIT(&instance_mutex);
95 
96  if (status != 0)
97  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
98 }
99 
128 HTTPCache *
129 HTTPCache::instance(const string &cache_root, bool force)
130 {
131  int status = pthread_once(&once_block, once_init_routine);
132  if (status != 0)
133  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
134 
135  LOCK(&instance_mutex);
136 
137  DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
138 
139  try {
140  if (!_instance) {
141  _instance = new HTTPCache(cache_root, force);
142 
143  DBG(cerr << "New instance: " << _instance << ", cache root: "
144  << _instance->d_cache_root << endl);
145 
146  atexit(delete_instance);
147 
148 #ifndef WIN32
149  // Register the interrupt handler. If we've already registered
150  // one, barf. If this becomes a problem, hack SignalHandler so
151  // that we can chain these handlers... 02/10/04 jhrg
152  //
153  // Technically we're leaking memory here. However, since this
154  // class is a singleton, we know that only three objects will
155  // ever be created and they will all exist until the process
156  // exits. We can let this slide... 02/12/04 jhrg
157  EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
158  if (old_eh) {
159  SignalHandler::instance()->register_handler(SIGINT, old_eh);
161  "Could not register event handler for SIGINT without superseding an existing one.");
162  }
163 
164  old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
165  if (old_eh) {
166  SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
168  "Could not register event handler for SIGPIPE without superseding an existing one.");
169  }
170 
171  old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
172  if (old_eh) {
173  SignalHandler::instance()->register_handler(SIGTERM, old_eh);
175  "Could not register event handler for SIGTERM without superseding an existing one.");
176  }
177 #endif
178  }
179  }
180  catch (...) {
181  DBG2(cerr << "The constructor threw an Error!" << endl);
182  UNLOCK(&instance_mutex);
183  throw;
184  }
185 
186  UNLOCK(&instance_mutex);
187  DBGN(cerr << "returning " << hex << _instance << dec << endl);
188 
189  return _instance;
190 }
191 
195 void
196 HTTPCache::delete_instance()
197 {
198  DBG(cerr << "Entering delete_instance()..." << endl);
199 
200  if (HTTPCache::_instance) {
201  DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
202  delete HTTPCache::_instance;
203  HTTPCache::_instance = 0;
204 
205  //Now remove the signal handlers
206  delete SignalHandler::instance()->remove_handler(SIGINT);
207  delete SignalHandler::instance()->remove_handler(SIGPIPE);
208  delete SignalHandler::instance()->remove_handler(SIGTERM);
209  }
210 
211  DBG(cerr << "Exiting delete_instance()" << endl);
212 }
213 
228 HTTPCache::HTTPCache(string cache_root, bool force) :
229  d_locked_open_file(0),
230  d_cache_enabled(false),
231  d_cache_protected(false),
232 
233  d_cache_disconnected(DISCONNECT_NONE),
234 
235  d_expire_ignored(false),
236  d_always_validate(false),
237  d_total_size(CACHE_TOTAL_SIZE * MEGA),
238  d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
239  d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
240  d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
241  d_default_expiration(NO_LM_EXPIRATION),
242  d_max_age(-1),
243  d_max_stale(-1),
244  d_min_fresh(-1),
245  d_http_cache_table(0)
246 {
247  DBG(cerr << "Entering the constructor for " << this << "... ");
248 #if 0
249  int status = pthread_once(&once_block, once_init_routine);
250  if (status != 0)
251  throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
252 #endif
253  INIT(&d_cache_mutex);
254 
255  // This used to throw an Error object if we could not get the
256  // single user lock. However, that results in an invalid object. It's
257  // better to have an instance that has default values. If we cannot get
258  // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
259  //
260  // I fixed this block so that the cache root is set before we try to get
261  // the single user lock. That was the fix for bug #661. To make that
262  // work, I had to move the call to create_cache_root out of
263  // set_cache_root(). 09/08/03 jhrg
264 
265  set_cache_root(cache_root);
266  int block_size;
267 
268  if (!get_single_user_lock(force))
269  throw Error(internal_error, "Could not get single user lock for the cache");
270 
271 #ifdef WIN32
272  // Windows is unable to provide us this information. 4096 appears
273  // a best guess. It is likely to be in the range [2048, 8192] on
274  // windows, but will the level of truth of that statement vary over
275  // time ?
276  block_size = 4096;
277 #else
278  struct stat s;
279  if (stat(cache_root.c_str(), &s) == 0)
280  block_size = s.st_blksize;
281  else
282  throw Error(internal_error, "Could not set file system block size.");
283 #endif
284  d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
285  d_cache_enabled = true;
286 
287  DBGN(cerr << "exiting" << endl);
288 }
289 
302 HTTPCache::~HTTPCache()
303 {
304  DBG(cerr << "Entering the destructor for " << this << "... ");
305 
306  try {
307  if (startGC())
308  perform_garbage_collection();
309 
310  d_http_cache_table->cache_index_write();
311  }
312  catch (Error &e) {
313  // If the cache index cannot be written, we've got problems. However,
314  // unless we're debugging, still free up the cache table in memory.
315  // How should we let users know they cache index is not being
316  // written?? 10/03/02 jhrg
317  DBG(cerr << e.get_error_message() << endl);
318  }
319 
320  delete d_http_cache_table;
321 
322  release_single_user_lock();
323 
324  DBGN(cerr << "exiting destructor." << endl);
325  DESTROY(&d_cache_mutex);
326 }
327 
328 
332 
336 bool
337 HTTPCache::stopGC() const
338 {
339  return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
340 }
341 
348 bool
349 HTTPCache::startGC() const
350 {
351  DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
352  return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
353 }
354 
369 void
370 HTTPCache::perform_garbage_collection()
371 {
372  DBG(cerr << "Performing garbage collection" << endl);
373 
374  // Remove all the expired responses.
375  expired_gc();
376 
377  // Remove entries larger than max_entry_size.
378  too_big_gc();
379 
380  // Remove entries starting with zero hits, 1, ..., until stopGC()
381  // returns true.
382  hits_gc();
383 }
384 
390 void
391 HTTPCache::expired_gc()
392 {
393  if (!d_expire_ignored) {
394  d_http_cache_table->delete_expired_entries();
395  }
396 }
397 
414 void
415 HTTPCache::hits_gc()
416 {
417  int hits = 0;
418 
419  if (startGC()) {
420  while (!stopGC()) {
421  d_http_cache_table->delete_by_hits(hits);
422  hits++;
423  }
424  }
425 }
426 
431 void HTTPCache::too_big_gc() {
432  if (startGC())
433  d_http_cache_table->delete_by_size(d_max_entry_size);
434 }
435 
437 
448 bool HTTPCache::get_single_user_lock(bool force)
449 {
450  if (!d_locked_open_file) {
451  FILE * fp = NULL;
452 
453  try {
454  // It's OK to call create_cache_root if the directory already
455  // exists.
456  create_cache_root(d_cache_root);
457  }
458  catch (Error &e) {
459  // We need to catch and return false because this method is
460  // called from a ctor and throwing at this point will result in a
461  // partially constructed object. 01/22/04 jhrg
462  DBG(cerr << "Failure to create the cache root" << endl);
463  return false;
464  }
465 
466  // Try to read the lock file. If we can open for reading, it exists.
467  string lock = d_cache_root + CACHE_LOCK;
468  if ((fp = fopen(lock.c_str(), "r")) != NULL) {
469  int res = fclose(fp);
470  if (res) {
471  DBG(cerr << "Failed to close " << (void *)fp << endl);
472  }
473  if (force)
474  REMOVE(lock.c_str());
475  else
476  return false;
477  }
478 
479  if ((fp = fopen(lock.c_str(), "w")) == NULL) {
480  DBG(cerr << "Could not open for write access" << endl);
481  return false;
482  }
483 
484  d_locked_open_file = fp;
485  return true;
486  }
487 
488  DBG(cerr << "locked_open_file is true" << endl);
489  return false;
490 }
491 
494 void
495 HTTPCache::release_single_user_lock()
496 {
497  if (d_locked_open_file) {
498  int res = fclose(d_locked_open_file);
499  if (res) {
500  DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
501  }
502  d_locked_open_file = 0;
503  }
504 
505  string lock = d_cache_root + CACHE_LOCK;
506  REMOVE(lock.c_str());
507 }
508 
511 
515 string
516 HTTPCache::get_cache_root() const
517 {
518  return d_cache_root;
519 }
520 
521 
530 void
531 HTTPCache::create_cache_root(const string &cache_root)
532 {
533 #ifdef WIN32
534  string::size_type cur = cache_root[1] == ':' ? 3 : 1;
535  typedef int mode_t;
536 
537  while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538  string dir = cache_root.substr(0, cur);
539  struct stat stat_info;
540  if (stat(dir.c_str(), &stat_info) == -1) {
541  DBG2(cerr << "Cache....... Creating " << dir << endl);
542  mode_t mask = UMASK(0);
543  if (MKDIR(dir.c_str(), 0777) < 0) {
544  DBG2(cerr << "Error: can't create." << endl);
545  UMASK(mask);
546  throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
547  }
548  UMASK(mask);
549  }
550  else {
551  DBG2(cerr << "Cache....... Found " << dir << endl);
552  }
553  cur++;
554  }
555 #else
556  // OSX and Linux
557 
558  // Save the mask
559  mode_t mask = umask(0);
560 
561  // Ignore the error if the directory exists
562  errno = 0;
563  if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
564  umask(mask);
565  throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
566  }
567 
568  // Restore themask
569  umask(mask);
570 
571 #endif
572 }
573 
588 void
589 HTTPCache::set_cache_root(const string &root)
590 {
591  if (root != "") {
592  d_cache_root = root;
593  // cache root should end in /.
594  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
595  d_cache_root += DIR_SEPARATOR_CHAR;
596  }
597  else {
598  // If no cache root has been indicated then look for a suitable
599  // location.
600 #ifdef USE_GETENV
601  char * cr = (char *) getenv("DODS_CACHE");
602  if (!cr) cr = (char *) getenv("TMP");
603  if (!cr) cr = (char *) getenv("TEMP");
604  if (!cr) cr = (char*)CACHE_LOCATION;
605  d_cache_root = cr;
606 #else
607  d_cache_root = CACHE_LOCATION;
608 #endif
609 
610  if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
611  d_cache_root += DIR_SEPARATOR_CHAR;
612 
613  d_cache_root += CACHE_ROOT;
614  }
615 
616  // Test d_hhtp_cache_table because this method can be called before that
617  // instance is created and also can be called later to change the cache
618  // root. jhrg 05.14.08
619  if (d_http_cache_table)
620  d_http_cache_table->set_cache_root(d_cache_root);
621 }
622 
634 void
635 HTTPCache::set_cache_enabled(bool mode)
636 {
637  lock_cache_interface();
638 
639  d_cache_enabled = mode;
640 
641  unlock_cache_interface();
642 }
643 
646 bool
647 HTTPCache::is_cache_enabled() const
648 {
649  DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
650  << endl);
651  return d_cache_enabled;
652 }
653 
663 void
664 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
665 {
666  lock_cache_interface();
667 
668  d_cache_disconnected = mode;
669 
670  unlock_cache_interface();
671 }
672 
676 HTTPCache::get_cache_disconnected() const
677 {
678  return d_cache_disconnected;
679 }
680 
689 void
690 HTTPCache::set_expire_ignored(bool mode)
691 {
692  lock_cache_interface();
693 
694  d_expire_ignored = mode;
695 
696  unlock_cache_interface();
697 }
698 
699 /* Is the cache ignoring Expires headers returned with responses that have
700  been cached? */
701 
702 bool
703 HTTPCache::is_expire_ignored() const
704 {
705  return d_expire_ignored;
706 }
707 
723 void
724 HTTPCache::set_max_size(unsigned long size)
725 {
726  lock_cache_interface();
727 
728  try {
729  unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
730  MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
731  unsigned long old_size = d_total_size;
732  d_total_size = new_size;
733  d_folder_size = d_total_size / CACHE_FOLDER_PCT;
734  d_gc_buffer = d_total_size / CACHE_GC_PCT;
735 
736  if (new_size < old_size && startGC()) {
737  perform_garbage_collection();
738  d_http_cache_table->cache_index_write();
739  }
740  }
741  catch (...) {
742  unlock_cache_interface();
743  DBGN(cerr << "Unlocking interface." << endl);
744  throw;
745  }
746 
747  DBG2(cerr << "Cache....... Total cache size: " << d_total_size
748  << " with " << d_folder_size
749  << " bytes for meta information and folders and at least "
750  << d_gc_buffer << " bytes free after every gc" << endl);
751 
752  unlock_cache_interface();
753 }
754 
757 unsigned long
758 HTTPCache::get_max_size() const
759 {
760  return d_total_size / MEGA;
761 }
762 
771 void
772 HTTPCache::set_max_entry_size(unsigned long size)
773 {
774  lock_cache_interface();
775 
776  try {
777  unsigned long new_size = size * MEGA;
778  if (new_size > 0 && new_size < d_total_size - d_folder_size) {
779  unsigned long old_size = d_max_entry_size;
780  d_max_entry_size = new_size;
781  if (new_size < old_size && startGC()) {
782  perform_garbage_collection();
783  d_http_cache_table->cache_index_write();
784  }
785  }
786  }
787  catch (...) {
788  unlock_cache_interface();
789  throw;
790  }
791 
792  DBG2(cerr << "Cache...... Max entry cache size is "
793  << d_max_entry_size << endl);
794 
795  unlock_cache_interface();
796 }
797 
802 unsigned long
803 HTTPCache::get_max_entry_size() const
804 {
805  return d_max_entry_size / MEGA;
806 }
807 
818 void
819 HTTPCache::set_default_expiration(const int exp_time)
820 {
821  lock_cache_interface();
822 
823  d_default_expiration = exp_time;
824 
825  unlock_cache_interface();
826 }
827 
830 int
831 HTTPCache::get_default_expiration() const
832 {
833  return d_default_expiration;
834 }
835 
840 void
841 HTTPCache::set_always_validate(bool validate)
842 {
843  d_always_validate = validate;
844 }
845 
849 bool
850 HTTPCache::get_always_validate() const
851 {
852  return d_always_validate;
853 }
854 
871 void
872 HTTPCache::set_cache_control(const vector<string> &cc)
873 {
874  lock_cache_interface();
875 
876  try {
877  d_cache_control = cc;
878 
879  vector<string>::const_iterator i;
880  for (i = cc.begin(); i != cc.end(); ++i) {
881  string header = (*i).substr(0, (*i).find(':'));
882  string value = (*i).substr((*i).find(": ") + 2);
883  if (header != "Cache-Control") {
884  throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
885  }
886  else {
887  if (value == "no-cache" || value == "no-store")
888  d_cache_enabled = false;
889  else if (value.find("max-age") != string::npos) {
890  string max_age = value.substr(value.find("=" + 1));
891  d_max_age = parse_time(max_age.c_str());
892  }
893  else if (value == "max-stale")
894  d_max_stale = 0; // indicates will take anything;
895  else if (value.find("max-stale") != string::npos) {
896  string max_stale = value.substr(value.find("=" + 1));
897  d_max_stale = parse_time(max_stale.c_str());
898  }
899  else if (value.find("min-fresh") != string::npos) {
900  string min_fresh = value.substr(value.find("=" + 1));
901  d_min_fresh = parse_time(min_fresh.c_str());
902  }
903  }
904  }
905  }
906  catch (...) {
907  unlock_cache_interface();
908  throw;
909  }
910 
911  unlock_cache_interface();
912 }
913 
914 
919 vector<string>
920 HTTPCache::get_cache_control()
921 {
922  return d_cache_control;
923 }
924 
926 
935 bool
936 HTTPCache::is_url_in_cache(const string &url)
937 {
938  DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
939 
940  HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
941  bool status = entry != 0;
942  if (entry) {
943  entry->unlock_read_response();
944  }
945  return status;
946 }
947 
953 bool
954 is_hop_by_hop_header(const string &header)
955 {
956  return header.find("Connection") != string::npos
957  || header.find("Keep-Alive") != string::npos
958  || header.find("Proxy-Authenticate") != string::npos
959  || header.find("Proxy-Authorization") != string::npos
960  || header.find("Transfer-Encoding") != string::npos
961  || header.find("Upgrade") != string::npos;
962 }
963 
975 void
976 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
977 {
978  string fname = cachename + CACHE_META;
979  d_open_files.push_back(fname);
980 
981  FILE *dest = fopen(fname.c_str(), "w");
982  if (!dest) {
983  throw InternalErr(__FILE__, __LINE__,
984  "Could not open named cache entry file.");
985  }
986 
987  vector<string>::const_iterator i;
988  for (i = headers.begin(); i != headers.end(); ++i) {
989  if (!is_hop_by_hop_header(*i)) {
990  int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
991  if (s != 1) {
992  fclose(dest);
993  throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
994  }
995  s = fwrite("\n", 1, 1, dest);
996  if (s != 1) {
997  fclose(dest);
998  throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
999  }
1000  }
1001  }
1002 
1003  int res = fclose(dest);
1004  if (res) {
1005  DBG(cerr << "HTTPCache::write_metadata - Failed to close "
1006  << dest << endl);
1007  }
1008 
1009  d_open_files.pop_back();
1010 }
1011 
1022 void
1023 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1024 {
1025  FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1026  if (!md) {
1027  throw InternalErr(__FILE__, __LINE__,
1028  "Could not open named cache entry meta data file.");
1029  }
1030 
1031  char line[1024];
1032  while (!feof(md) && fgets(line, 1024, md)) {
1033  line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1034  headers.push_back(string(line));
1035  }
1036 
1037  int res = fclose(md);
1038  if (res) {
1039  DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1040  << md << endl);
1041  }
1042 }
1043 
1065 int
1066 HTTPCache::write_body(const string &cachename, const FILE *src)
1067 {
1068  d_open_files.push_back(cachename);
1069 
1070  FILE *dest = fopen(cachename.c_str(), "wb");
1071  if (!dest) {
1072  throw InternalErr(__FILE__, __LINE__,
1073  "Could not open named cache entry file.");
1074  }
1075 
1076  // Read and write in 1k blocks; an attempt at doing this efficiently.
1077  // 09/30/02 jhrg
1078  char line[1024];
1079  size_t n;
1080  int total = 0;
1081  while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1082  total += fwrite(line, 1, n, dest);
1083  DBG2(sleep(3));
1084  }
1085 
1086  if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1087  int res = fclose(dest);
1088  res = res & unlink(cachename.c_str());
1089  if (res) {
1090  DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1091  << dest << endl);
1092  }
1093  throw InternalErr(__FILE__, __LINE__,
1094  "I/O error transferring data to the cache.");
1095  }
1096 
1097  rewind(const_cast<FILE *>(src));
1098 
1099  int res = fclose(dest);
1100  if (res) {
1101  DBG(cerr << "HTTPCache::write_body - Failed to close "
1102  << dest << endl);
1103  }
1104 
1105  d_open_files.pop_back();
1106 
1107  return total;
1108 }
1109 
1118 FILE *
1119 HTTPCache::open_body(const string &cachename)
1120 {
1121  DBG(cerr << "cachename: " << cachename << endl);
1122 
1123  FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1124  if (!src)
1125  throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1126 
1127  return src;
1128 }
1129 
1155 bool
1156 HTTPCache::cache_response(const string &url, time_t request_time,
1157  const vector<string> &headers, const FILE *body)
1158 {
1159  lock_cache_interface();
1160 
1161  DBG(cerr << "Caching url: " << url << "." << endl);
1162 
1163  try {
1164  // If this is not an http or https URL, don't cache.
1165  if (url.find("http:") == string::npos &&
1166  url.find("https:") == string::npos) {
1167  unlock_cache_interface();
1168  return false;
1169  }
1170 
1171  // This does nothing if url is not already in the cache. It's
1172  // more efficient to do this than to first check and see if the entry
1173  // exists. 10/10/02 jhrg
1174  d_http_cache_table->remove_entry_from_cache_table(url);
1175 
1177  entry->lock_write_response();
1178 
1179  try {
1180  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1181  if (entry->is_no_cache()) {
1182  DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1183  << "(" << url << ")" << endl);
1184  entry->unlock_write_response();
1185  delete entry; entry = 0;
1186  unlock_cache_interface();
1187  return false;
1188  }
1189 
1190  // corrected_initial_age, freshness_lifetime, response_time.
1191  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1192 
1193  d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1194  // move these write function to cache table
1195  entry->set_size(write_body(entry->get_cachename(), body));
1196  write_metadata(entry->get_cachename(), headers);
1197  d_http_cache_table->add_entry_to_cache_table(entry);
1198  entry->unlock_write_response();
1199  }
1200  catch (ResponseTooBigErr &e) {
1201  // Oops. Bummer. Clean up and exit.
1202  DBG(cerr << e.get_error_message() << endl);
1203  REMOVE(entry->get_cachename().c_str());
1204  REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1205  DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1206  << ")" << endl);
1207  entry->unlock_write_response();
1208  delete entry; entry = 0;
1209  unlock_cache_interface();
1210  return false;
1211  }
1212 
1213  if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1214  if (startGC())
1215  perform_garbage_collection();
1216 
1217  d_http_cache_table->cache_index_write(); // resets new_entries
1218  }
1219  }
1220  catch (...) {
1221  unlock_cache_interface();
1222  throw;
1223  }
1224 
1225  unlock_cache_interface();
1226 
1227  return true;
1228 }
1229 
1248 vector<string>
1249 HTTPCache::get_conditional_request_headers(const string &url)
1250 {
1251  lock_cache_interface();
1252 
1253  HTTPCacheTable::CacheEntry *entry = 0;
1254  vector<string> headers;
1255 
1256  DBG(cerr << "Getting conditional request headers for " << url << endl);
1257 
1258  try {
1259  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1260  if (!entry)
1261  throw Error(internal_error, "There is no cache entry for the URL: " + url);
1262 
1263  if (entry->get_etag() != "")
1264  headers.push_back(string("If-None-Match: ") + entry->get_etag());
1265 
1266  if (entry->get_lm() > 0) {
1267  time_t lm = entry->get_lm();
1268  headers.push_back(string("If-Modified-Since: ")
1269  + date_time_str(&lm));
1270  }
1271  else if (entry->get_max_age() > 0) {
1272  time_t max_age = entry->get_max_age();
1273  headers.push_back(string("If-Modified-Since: ")
1274  + date_time_str(&max_age));
1275  }
1276  else if (entry->get_expires() > 0) {
1277  time_t expires = entry->get_expires();
1278  headers.push_back(string("If-Modified-Since: ")
1279  + date_time_str(&expires));
1280  }
1281  entry->unlock_read_response();
1282  unlock_cache_interface();
1283  }
1284  catch (...) {
1285  unlock_cache_interface();
1286  if (entry) {
1287  entry->unlock_read_response();
1288  }
1289  throw;
1290  }
1291 
1292  return headers;
1293 }
1294 
1298 struct HeaderLess: binary_function<const string&, const string&, bool>
1299 {
1300  bool operator()(const string &s1, const string &s2) const {
1301  return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1302  }
1303 };
1304 
1318 void
1319 HTTPCache::update_response(const string &url, time_t request_time,
1320  const vector<string> &headers)
1321 {
1322  lock_cache_interface();
1323 
1324  HTTPCacheTable::CacheEntry *entry = 0;
1325  DBG(cerr << "Updating the response headers for: " << url << endl);
1326 
1327  try {
1328  entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1329  if (!entry)
1330  throw Error(internal_error, "There is no cache entry for the URL: " + url);
1331 
1332  // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1333  d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1334 
1335  // Update corrected_initial_age, freshness_lifetime, response_time.
1336  d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1337 
1338  // Merge the new headers with those in the persistent store. How:
1339  // Load the new headers into a set, then merge the old headers. Since
1340  // set<> ignores duplicates, old headers with the same name as a new
1341  // header will got into the bit bucket. Define a special compare
1342  // functor to make sure that headers are compared using only their
1343  // name and not their value too.
1344  set<string, HeaderLess> merged_headers;
1345 
1346  // Load in the new headers
1347  copy(headers.begin(), headers.end(),
1348  inserter(merged_headers, merged_headers.begin()));
1349 
1350  // Get the old headers and load them in.
1351  vector<string> old_headers;
1352  read_metadata(entry->get_cachename(), old_headers);
1353  copy(old_headers.begin(), old_headers.end(),
1354  inserter(merged_headers, merged_headers.begin()));
1355 
1356  // Read the values back out. Use reverse iterators with back_inserter
1357  // to preserve header order. NB: vector<> does not support push_front
1358  // so we can't use front_inserter(). 01/09/03 jhrg
1359  vector<string> result;
1360  copy(merged_headers.rbegin(), merged_headers.rend(),
1361  back_inserter(result));
1362 
1363  write_metadata(entry->get_cachename(), result);
1364  entry->unlock_write_response();
1365  unlock_cache_interface();
1366  }
1367  catch (...) {
1368  if (entry) {
1369  entry->unlock_read_response();
1370  }
1371  unlock_cache_interface();
1372  throw;
1373  }
1374 }
1375 
1387 bool
1388 HTTPCache::is_url_valid(const string &url)
1389 {
1390  lock_cache_interface();
1391 
1392  bool freshness;
1393  HTTPCacheTable::CacheEntry *entry = 0;
1394 
1395  DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1396 
1397  try {
1398  if (d_always_validate) {
1399  unlock_cache_interface();
1400  return false; // force re-validation.
1401  }
1402 
1403  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1404  if (!entry)
1405  throw Error(internal_error, "There is no cache entry for the URL: " + url);
1406 
1407  // If we supported range requests, we'd need code here to check if
1408  // there was only a partial response in the cache. 10/02/02 jhrg
1409 
1410  // In case this entry is of type "must-revalidate" then we consider it
1411  // invalid.
1412  if (entry->get_must_revalidate()) {
1413  entry->unlock_read_response();
1414  unlock_cache_interface();
1415  return false;
1416  }
1417 
1418  time_t resident_time = time(NULL) - entry->get_response_time();
1419  time_t current_age = entry->get_corrected_initial_age() + resident_time;
1420 
1421  // Check that the max-age, max-stale, and min-fresh directives
1422  // given in the request cache control header is followed.
1423  if (d_max_age >= 0 && current_age > d_max_age) {
1424  DBG(cerr << "Cache....... Max-age validation" << endl);
1425  entry->unlock_read_response();
1426  unlock_cache_interface();
1427  return false;
1428  }
1429  if (d_min_fresh >= 0
1430  && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1431  DBG(cerr << "Cache....... Min-fresh validation" << endl);
1432  entry->unlock_read_response();
1433  unlock_cache_interface();
1434  return false;
1435  }
1436 
1437  freshness = (entry->get_freshness_lifetime()
1438  + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1439  entry->unlock_read_response();
1440  unlock_cache_interface();
1441  }
1442  catch (...) {
1443  if (entry) {
1444  entry->unlock_read_response();
1445  }
1446  unlock_cache_interface();
1447  throw;
1448  }
1449 
1450  return freshness;
1451 }
1452 
1480 FILE * HTTPCache::get_cached_response(const string &url,
1481  vector<string> &headers, string &cacheName) {
1482  lock_cache_interface();
1483 
1484  FILE *body = 0;
1485  HTTPCacheTable::CacheEntry *entry = 0;
1486 
1487  DBG(cerr << "Getting the cached response for " << url << endl);
1488 
1489  try {
1490  entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1491  if (!entry) {
1492  unlock_cache_interface();
1493  return 0;
1494  }
1495 
1496  cacheName = entry->get_cachename();
1497  read_metadata(entry->get_cachename(), headers);
1498 
1499  DBG(cerr << "Headers just read from cache: " << endl);
1500  DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1501 
1502  body = open_body(entry->get_cachename());
1503 
1504  DBG(cerr << "Returning: " << url << " from the cache." << endl);
1505 
1506  d_http_cache_table->bind_entry_to_data(entry, body);
1507  }
1508  catch (...) {
1509  // Why make this unlock operation conditional on entry?
1510  if (entry)
1511  unlock_cache_interface();
1512  if (body != 0)
1513  fclose(body);
1514  throw;
1515  }
1516 
1517  unlock_cache_interface();
1518 
1519  return body;
1520 }
1521 
1533 FILE *
1534 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1535 {
1536  string discard_name;
1537  return get_cached_response(url, headers, discard_name);
1538 }
1539 
1550 FILE *
1551 HTTPCache::get_cached_response(const string &url)
1552 {
1553  string discard_name;
1554  vector<string> discard_headers;
1555  return get_cached_response(url, discard_headers, discard_name);
1556 }
1557 
1570 void
1571 HTTPCache::release_cached_response(FILE *body)
1572 {
1573  lock_cache_interface();
1574 
1575  try {
1576  // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1577  d_http_cache_table->uncouple_entry_from_data(body);
1578  }
1579  catch (...) {
1580  unlock_cache_interface();
1581  throw;
1582  }
1583 
1584  unlock_cache_interface();
1585 }
1586 
1599 void
1600 HTTPCache::purge_cache()
1601 {
1602  lock_cache_interface();
1603 
1604  try {
1605  if (d_http_cache_table->is_locked_read_responses())
1606  throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1607 
1608  d_http_cache_table->delete_all_entries();
1609  }
1610  catch (...) {
1611  unlock_cache_interface();
1612  throw;
1613  }
1614 
1615  unlock_cache_interface();
1616 }
1617 
1618 } // namespace libdap
time_t parse_time(const char *str, bool expand)
Definition: util_mit.cc:129
STL namespace.
top level DAP object to house generic methods
Definition: AISConnect.cc:30
A class for software fault reporting.
Definition: InternalErr.h:64
bool is_hop_by_hop_header(const string &header)
Definition: HTTPCache.cc:954
string date_time_str(time_t *calendar, bool local)
Definition: util_mit.cc:281
std::string get_error_message() const
Definition: Error.cc:278
A class for error processing.
Definition: Error.h:92