libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCache.cc
Go to the documentation of this file.
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28#include <cerrno>
29#include <cstring>
30#include <fcntl.h>
31#include <sys/stat.h>
32#include <unistd.h>
33
34#include <algorithm>
35#include <iterator>
36#include <memory>
37#include <mutex>
38#include <set>
39#include <sstream>
40
41#include "Error.h"
42#include "InternalErr.h"
43#include "ResponseTooBigErr.h"
44
45#include "HTTPCache.h"
46#include "HTTPCacheTable.h"
47
48#include "debug.h"
49#include "util.h"
50#include "util_mit.h"
51
52using namespace std;
53
54const string CACHE_LOCATION{"/tmp/"};
55
56const string CACHE_ROOT{"dods-cache/"};
57
58const string CACHE_INDEX{".index"};
59const string CACHE_LOCK{".lock"};
60const string CACHE_META{".meta"};
61const string CACHE_EMPTY_ETAG{"@cache@"};
62
63const string DIR_SEPARATOR_CHAR{"/"};
64
65namespace libdap {
66
80
81HTTPCache::HTTPCache(const string &cache_root) {
82 // This used to throw an Error object if we could not get the
83 // single user lock. However, that results in an invalid object. It's
84 // better to have an instance that has default values. If we cannot get
85 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
86 //
87 // I fixed this block so that the cache root is set before we try to get
88 // the single user lock. That was the fix for bug #661. To make that
89 // work, I had to move the call to create_cache_root out of
90 // set_cache_root(). 09/08/03 jhrg
91
92 try {
93 set_cache_root(cache_root); // sets d_cache_root, even if cache_root is empty.
94 // It's OK to call create_cache_root if the directory already exists.
95 create_cache_root(d_cache_root);
96 string lock = d_cache_root + CACHE_LOCK;
97 d_cache_lock_fd = m_initialize_cache_lock(lock);
98 d_cache_lock_file = lock;
99
100 struct stat s = {};
101 int block_size;
102 if (stat(cache_root.c_str(), &s) == 0)
103 block_size = s.st_blksize;
104 else
105 block_size = 4096;
106
107 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
108 d_cache_enabled = true;
109 } catch (const Error &) {
110 // Write to a log here. 2/18/23 jhrg
111 d_cache_enabled = false;
112 DBG(cerr << "Failure to get the cache lock" << endl);
113 }
114}
115
123
125 try {
126 if (startGC())
127 perform_garbage_collection();
128
129 d_http_cache_table->cache_index_write();
130 delete d_http_cache_table;
131 } catch (const Error &e) {
132 // If the cache index cannot be written, we've got problems. However,
133 // unless we're debugging, still free up the cache table in memory.
134 // How should we let users know they cache index is not being
135 // written?? 10/03/02 jhrg
136 // Write a log message here. 2/18/23 jhrg
137 DBG(cerr << e.get_error_message() << endl);
138 }
139
140 close(d_cache_lock_fd);
141}
142
146
149
150bool HTTPCache::stopGC() const {
151 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
152}
153
158bool HTTPCache::startGC() const {
159 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
160 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
161}
162
176
177void HTTPCache::perform_garbage_collection() {
178 DBG(cerr << "Performing garbage collection" << endl);
179
180 // Remove all the expired responses.
181 expired_gc();
182
183 // Remove entries larger than max_entry_size.
184 too_big_gc();
185
186 // Remove entries starting with zero hits, 1, ..., until stopGC()
187 // returns true.
188 hits_gc();
189}
190
195
196void HTTPCache::expired_gc() {
197 if (!d_expire_ignored) {
198 d_http_cache_table->delete_expired_entries();
199 }
200}
201
217void HTTPCache::hits_gc() {
218 int hits = 0;
219
220 if (startGC()) {
221 while (!stopGC()) {
222 d_http_cache_table->delete_by_hits(hits);
223 hits++;
224 }
225 }
226}
227
232void HTTPCache::too_big_gc() {
233 if (startGC())
234 d_http_cache_table->delete_by_size(d_max_entry_size);
235}
236
238
241
242static inline string get_errno() {
243 const char *s_err = strerror(errno);
244 return s_err ? s_err : "unknown error";
245}
246
247// Build a lock of a certain type.
248//
249// Using whence == SEEK_SET with start and len set to zero means lock the whole file.
250// jhrg 9/8/18
251static inline struct flock *lock(short type) {
252 static struct flock lock;
253 lock.l_type = type;
254 lock.l_whence = SEEK_SET;
255 lock.l_start = 0;
256 lock.l_len = 0;
257 lock.l_pid = getpid();
258
259 return &lock;
260}
261
271static bool create_locked_file(const string &file_name, int &ref_fd) {
272 DBG(cerr << "BEGIN file: " << file_name << endl);
273
274 int fd;
275 if ((fd = open(file_name.c_str(), O_CREAT | O_EXCL | O_RDWR, 0660)) < 0) {
276 if (errno == EEXIST) {
277 return false;
278 } else {
279 throw InternalErr(__FILE__, __LINE__, file_name + ": " + get_errno());
280 }
281 }
282
283 struct flock *l = lock(F_WRLCK);
284 // F_SETLKW == set lock, blocking
285 if (fcntl(fd, F_SETLKW, l) == -1) {
286 close(fd);
287 ostringstream oss;
288 oss << "cache process: " << l->l_pid << " triggered a locking error for '" << file_name << "': " << get_errno();
289 throw InternalErr(__FILE__, __LINE__, oss.str());
290 }
291
292 DBG(cerr << "END file: " << file_name << endl);
293
294 // Success
295 ref_fd = fd;
296 return true;
297}
298
310int HTTPCache::m_initialize_cache_lock(const string &cache_lock) const {
311 DBG(cerr << "BEGIN" << endl);
312
313 int fd;
314 if (create_locked_file(cache_lock, fd)) {
315 // This leaves the fd file descriptor open, but unlocked
316 m_unlock_cache(fd);
317 } else {
318 if ((fd = open(cache_lock.c_str(), O_RDWR)) == -1) {
319 throw InternalErr(__FILE__, __LINE__,
320 "Failed to open cache lock file: " + cache_lock + " errno: " + get_errno());
321 }
322 }
323
324 DBG(cerr << "END" << endl);
325 return fd;
326}
327
336void HTTPCache::m_lock_cache_write(int fd) {
337 DBG(cerr << "d_cache_info_fd: " << d_cache_info_fd << endl);
338
339 // F_SETLKW == set lock, blocking
340 if (fcntl(fd, F_SETLKW, lock(F_WRLCK)) == -1) {
341 throw InternalErr(__FILE__, __LINE__, "An error occurred trying to lock the cache-control file" + get_errno());
342 }
343
344 DBG(cerr << "lock status: " << lockStatus(d_cache_info_fd) << endl);
345}
346
350void HTTPCache::m_lock_cache_read(int fd) {
351 DBG(cerr << "d_cache_info_fd: " << d_cache_info_fd << endl);
352
353 if (fcntl(fd, F_SETLKW, lock(F_RDLCK)) == -1) {
354 throw InternalErr(__FILE__, __LINE__, "An error occurred trying to lock the cache-control file" + get_errno());
355 }
356
357 DBG(cerr << "lock status: " << lockStatus(d_cache_info_fd) << endl);
358}
359
365void HTTPCache::m_unlock_cache(int fd) {
366 DBG(cerr << "d_cache_info_fd: " << d_cache_info_fd << endl);
367
368 if (fcntl(fd, F_SETLK, lock(F_UNLCK)) == -1) {
369 throw InternalErr(__FILE__, __LINE__,
370 "An error occurred trying to unlock the cache-control file" + get_errno());
371 }
372
373 DBG(cerr << "lock status: " << lockStatus(d_cache_info_fd) << endl);
374}
375
391void HTTPCache::m_exclusive_to_shared_lock(int fd) {
392 struct flock lock;
393 lock.l_type = F_RDLCK;
394 lock.l_whence = SEEK_SET;
395 lock.l_start = 0;
396 lock.l_len = 0;
397 lock.l_pid = getpid();
398
399 if (fcntl(fd, F_SETLKW, &lock) == -1) {
400 throw InternalErr(__FILE__, __LINE__, get_errno());
401 }
402
403 DBG(cerr << "lock status: " << lockStatus(fd) << endl);
404}
405
407
410
413
414string HTTPCache::get_cache_root() const { return d_cache_root; }
415
423
424void HTTPCache::create_cache_root(const string &cache_root) const {
425 // Save the mask
426 mode_t mask = umask(S_IRWXO);
427
428 // Ignore the error if the directory exists
429 errno = 0;
430 if (mkdir(cache_root.c_str(), S_IRWXU | S_IRWXG) < 0 && errno != EEXIST) {
431 umask(mask);
432 throw Error("HTTPCache::create_cache_root: Could not create the directory for the cache at '" + cache_root +
433 "' (" + strerror(errno) + ").");
434 }
435
436 // Restore the mask
437 umask(mask);
438}
439
440inline bool ends_with(std::string const &value, std::string const &ending) {
441 if (ending.size() > value.size())
442 return false;
443 return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
444}
445
456
457void HTTPCache::set_cache_root(const string &root) {
458 if (!root.empty()) {
459 d_cache_root = root;
460 // cache root should end in /.
461 if (!ends_with(d_cache_root, DIR_SEPARATOR_CHAR))
462 d_cache_root += DIR_SEPARATOR_CHAR;
463 } else {
464 // If no cache root has been indicated then look for a suitable location.
465 d_cache_root = CACHE_LOCATION;
466
467 if (!ends_with(d_cache_root, DIR_SEPARATOR_CHAR))
468 d_cache_root += DIR_SEPARATOR_CHAR;
469
470 d_cache_root += CACHE_ROOT;
471 }
472
473 // Test d_http_cache_table because this method can be called before that
474 // instance is created and also can be called later to change the cache
475 // root. jhrg 05.14.08
476 if (d_http_cache_table)
477 d_http_cache_table->set_cache_root(d_cache_root);
478}
479
490
492 lock_guard<mutex> lock{d_cache_mutex};
493
494 d_cache_enabled = mode;
495}
496
498
499bool HTTPCache::is_cache_enabled() const { return d_cache_enabled; }
500
510
512 lock_guard<mutex> lock{d_cache_mutex};
513
514 d_cache_disconnected = mode;
515}
516
518
519CacheDisconnectedMode HTTPCache::get_cache_disconnected() const { return d_cache_disconnected; }
520
528
530 lock_guard<mutex> lock{d_cache_mutex};
531
532 d_expire_ignored = mode;
533}
534
535/* Is the cache ignoring Expires headers returned with responses that have
536 been cached? */
537
538bool HTTPCache::is_expire_ignored() const { return d_expire_ignored; }
539
554
555void HTTPCache::set_max_size(unsigned long size) {
556 lock_guard<mutex> lock{d_cache_mutex};
557
558 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
559 unsigned long old_size = d_total_size;
560 d_total_size = new_size;
561 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
562 d_gc_buffer = d_total_size / CACHE_GC_PCT;
563
564 if (new_size < old_size && startGC()) {
565 perform_garbage_collection();
566 d_http_cache_table->cache_index_write();
567 }
568}
569
571
572unsigned long HTTPCache::get_max_size() const { return d_total_size / MEGA; }
573
581
582void HTTPCache::set_max_entry_size(unsigned long size) {
583 lock_guard<mutex> lock{d_cache_mutex};
584
585 unsigned long new_size = size * MEGA;
586 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
587 unsigned long old_size = d_max_entry_size;
588 d_max_entry_size = new_size;
589 if (new_size < old_size && startGC()) {
590 perform_garbage_collection();
591 d_http_cache_table->cache_index_write();
592 }
593 }
594}
595
599
600unsigned long HTTPCache::get_max_entry_size() const { return d_max_entry_size / MEGA; }
601
611
612void HTTPCache::set_default_expiration(const int exp_time) {
613 lock_guard<mutex> lock{d_cache_mutex};
614
615 d_default_expiration = exp_time;
616}
617
619
620int HTTPCache::get_default_expiration() const { return d_default_expiration; }
621
625
626void HTTPCache::set_always_validate(bool validate) { d_always_validate = validate; }
627
630
631bool HTTPCache::get_always_validate() const { return d_always_validate; }
632
648
649void HTTPCache::set_cache_control(const vector<string> &cc) {
650 lock_guard<mutex> lock{d_cache_mutex};
651
652 d_cache_control = cc;
653
654 for (auto &line : cc) {
655 string header = line.substr(0, line.find(':'));
656 string value = line.substr(line.find(": ") + 2);
657 if (header != "Cache-Control") {
658 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
659 } else {
660 if (value == "no-cache" || value == "no-store")
661 d_cache_enabled = false;
662 else if (value.find("max-age") != string::npos) {
663 string max_age = value.substr(value.find('=') + 1);
664 d_max_age = parse_time(max_age.c_str());
665 } else if (value == "max-stale")
666 d_max_stale = 0; // indicates will take anything;
667 else if (value.find("max-stale") != string::npos) {
668 string max_stale = value.substr(value.find('=') + 1);
669 d_max_stale = parse_time(max_stale.c_str());
670 } else if (value.find("min-fresh") != string::npos) {
671 string min_fresh = value.substr(value.find('=') + 1);
672 d_min_fresh = parse_time(min_fresh.c_str());
673 }
674 }
675 }
676}
677
681
682vector<string> HTTPCache::get_cache_control() const { return d_cache_control; }
683
685
697
698bool HTTPCache::is_url_in_cache(const string &url) {
699 lock_guard<mutex> lock{d_cache_mutex};
700
701 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_read_locked_entry_from_cache_table(url);
702
703 if (entry) {
704 entry->unlock_read_response();
705 return true;
706 }
707 return false;
708}
709
714
715bool is_hop_by_hop_header(const string &header) {
716 return header.find("Connection") != string::npos || header.find("Keep-Alive") != string::npos ||
717 header.find("Proxy-Authenticate") != string::npos || header.find("Proxy-Authorization") != string::npos ||
718 header.find("Transfer-Encoding") != string::npos || header.find("Upgrade") != string::npos;
719}
720
731
732void HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) {
733 string fname = cachename + CACHE_META;
734 d_open_files.push_back(fname);
735
736 FILE *dest = fopen(fname.c_str(), "w");
737 if (!dest) {
738 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry file.");
739 }
740
741 vector<string>::const_iterator i;
742 for (auto &header : headers) /*i = headers.begin(); i != headers.end(); ++i)*/ {
743 if (!is_hop_by_hop_header(header)) {
744 size_t s = fwrite(header.c_str(), header.size(), 1, dest);
745 if (s != 1) {
746 fclose(dest);
747 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
748 }
749 s = fwrite("\n", 1, 1, dest);
750 if (s != 1) {
751 fclose(dest);
752 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
753 }
754 }
755 }
756
757 int res = fclose(dest);
758 if (res) {
759 DBG(cerr << "HTTPCache::write_metadata - Failed to close " << dest << endl);
760 }
761
762 d_open_files.pop_back();
763}
764
774
775void HTTPCache::read_metadata(const string &cachename, vector<string> &headers) const {
776 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
777 if (!md) {
778 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry meta data file.");
779 }
780
781 const size_t line_buf_len = 1024;
782 char line[line_buf_len];
783 while (!feof(md) && fgets(line, line_buf_len, md)) {
784 line[std::min(line_buf_len, strnlen(line, line_buf_len)) - 1] = '\0'; // erase newline
785 headers.emplace_back(line);
786 }
787
788 int res = fclose(md);
789 if (res) {
790 DBG(cerr << "HTTPCache::read_metadata - Failed to close " << md << endl);
791 }
792}
793
814
815int HTTPCache::write_body(const string &cachename, const FILE *src) {
816 d_open_files.push_back(cachename);
817
818 FILE *dest = fopen(cachename.c_str(), "wb");
819 if (!dest) {
820 throw InternalErr(__FILE__, __LINE__, "Could not open named cache entry file.");
821 }
822
823 // Read and write in 1k blocks; an attempt at doing this efficiently.
824 // 09/30/02 jhrg
825 char line[1024];
826 size_t n;
827 int total = 0;
828 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
829 total += fwrite(line, 1, n, dest);
830 DBG2(sleep(3));
831 }
832
833 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
834 int res = fclose(dest);
835 res = res & unlink(cachename.c_str());
836 if (res) {
837 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " << dest << endl);
838 }
839 throw InternalErr(__FILE__, __LINE__, "I/O error transferring data to the cache.");
840 }
841
842 rewind(const_cast<FILE *>(src));
843
844 int res = fclose(dest);
845 if (res) {
846 DBG(cerr << "HTTPCache::write_body - Failed to close " << dest << endl);
847 }
848
849 d_open_files.pop_back();
850
851 return total;
852}
853
861
862FILE *HTTPCache::open_body(const string &cachename) {
863 DBG(cerr << "cachename: " << cachename << endl);
864
865 FILE *src = fopen(cachename.c_str(), "rb"); // Read only
866 if (!src)
867 throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
868
869 return src;
870}
871
897
898bool HTTPCache::cache_response(const string &url, time_t request_time, const vector<string> &headers,
899 const FILE *body) {
900
901 // If this is not an http or https URL, don't cache.
902 if (url.find("http:") == string::npos && url.find("https:") == string::npos) {
903 return false;
904 }
905
906 lock_guard<mutex> lock{d_cache_mutex};
907 mp_lock_guard write_lock{d_cache_lock_fd,
908 mp_lock_guard::operation::write}; // Blocks until the write lock is acquired.
909
910 // This does nothing if url is not already in the cache. It's
911 // more efficient to do this than to first check and see if the entry
912 // exists. 10/10/02 jhrg
913 d_http_cache_table->remove_entry_from_cache_table(url);
914 auto *entry = new HTTPCacheTable::CacheEntry(url);
915 entry->lock_write_response();
916
917 try {
918 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
919 if (entry->is_no_cache()) {
920 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url << ")" << endl);
921 entry->unlock_write_response();
922 delete entry;
923 return false;
924 }
925
926 // corrected_initial_age, freshness_lifetime, response_time.
927 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
928
929 d_http_cache_table->create_location(entry); // cachename, cache_body_fd
930 // move these write function to cache table
931 entry->set_size(write_body(entry->get_cachename(), body));
932 write_metadata(entry->get_cachename(), headers);
933 d_http_cache_table->add_entry_to_cache_table(entry);
934 entry->unlock_write_response();
935 } catch (const ResponseTooBigErr &e) {
936 // Oops. Bummer. Clean up and exit.
937 DBG(cerr << e.get_error_message() << endl);
938 remove(entry->get_cachename().c_str());
939 remove(string(entry->get_cachename() + CACHE_META).c_str());
940 entry->unlock_write_response();
941 delete entry;
942 return false;
943 }
944
945 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
946 if (startGC())
947 perform_garbage_collection();
948
949 d_http_cache_table->cache_index_write(); // resets new_entries
950 }
951
952 return true;
953}
954
972
973vector<string> HTTPCache::get_conditional_request_headers(const string &url) {
974
975 HTTPCacheTable::CacheEntry *entry = nullptr;
976 vector<string> headers;
977
978 lock_guard<mutex> lock{d_cache_mutex};
979 mp_lock_guard read_lock{d_cache_lock_fd, mp_lock_guard::operation::read}; // Blocks until the lock is acquired.
980
981 try {
982 entry = d_http_cache_table->get_read_locked_entry_from_cache_table(url);
983 if (!entry)
984 throw Error(internal_error, "There is no cache entry for the URL: " + url);
985
986 if (!entry->get_etag().empty())
987 headers.push_back(string("If-None-Match: ") + entry->get_etag());
988
989 if (entry->get_lm() > 0) {
990 time_t lm = entry->get_lm();
991 headers.push_back(string("If-Modified-Since: ") + date_time_str(&lm));
992 } else if (entry->get_max_age() > 0) {
993 time_t max_age = entry->get_max_age();
994 headers.push_back(string("If-Modified-Since: ") + date_time_str(&max_age));
995 } else if (entry->get_expires() > 0) {
996 time_t expires = entry->get_expires();
997 headers.push_back(string("If-Modified-Since: ") + date_time_str(&expires));
998 }
999 entry->unlock_read_response();
1000 } catch (...) {
1001 if (entry) {
1002 entry->unlock_read_response();
1003 }
1004 throw;
1005 }
1006
1007 return headers;
1008}
1009
1012
1013struct HeaderLess : binary_function<const string &, const string &, bool> {
1014 bool operator()(const string &s1, const string &s2) const {
1015 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1016 }
1017};
1018
1031
1032void HTTPCache::update_response(const string &url, time_t request_time, const vector<string> &headers) {
1033 HTTPCacheTable::CacheEntry *entry = nullptr;
1034
1035 try {
1036 lock_guard<mutex> lock{d_cache_mutex};
1037 mp_lock_guard write_lock{d_cache_lock_fd,
1038 mp_lock_guard::operation::write}; // Blocks until the lock is acquired.
1039
1040 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1041 if (!entry)
1042 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1043
1044 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1045 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1046
1047 // Update corrected_initial_age, freshness_lifetime, response_time.
1048 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1049
1050 // Merge the new headers with those in the persistent store. How:
1051 // Load the new headers into a set, then merge the old headers. Since
1052 // set<> ignores duplicates, old headers with the same name as a new
1053 // header will go into the bit bucket. Define a special compare
1054 // functor to make sure that headers are compared using only their
1055 // name and not their value too.
1056 set<string, HeaderLess> merged_headers;
1057
1058 // Load in the new headers
1059 copy(headers.begin(), headers.end(), inserter(merged_headers, merged_headers.begin()));
1060
1061 // Get the old headers and load them in.
1062 vector<string> old_headers;
1063 read_metadata(entry->get_cachename(), old_headers);
1064 copy(old_headers.begin(), old_headers.end(), inserter(merged_headers, merged_headers.begin()));
1065
1066 // Read the values back out. Use reverse iterators with back_inserter
1067 // to preserve header order. NB: vector<> does not support push_front
1068 // so we can't use front_inserter(). 01/09/03 jhrg
1069 vector<string> result;
1070 copy(merged_headers.rbegin(), merged_headers.rend(), back_inserter(result));
1071
1072 write_metadata(entry->get_cachename(), result);
1073 entry->unlock_write_response();
1074 } catch (...) {
1075 if (entry) {
1076 entry->unlock_read_response();
1077 }
1078 throw;
1079 }
1080}
1081
1092
1093bool HTTPCache::is_url_valid(const string &url) {
1094
1095 bool freshness;
1096 HTTPCacheTable::CacheEntry *entry = nullptr;
1097
1098 try {
1099 if (d_always_validate) {
1100 return false; // force re-validation.
1101 }
1102
1103 lock_guard<mutex> lock{d_cache_mutex};
1104 mp_lock_guard read_lock{d_cache_lock_fd, mp_lock_guard::operation::read}; // Blocks until the lock is acquired.
1105
1106 entry = d_http_cache_table->get_read_locked_entry_from_cache_table(url);
1107 if (!entry)
1108 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1109
1110 // If we supported range requests, we'd need code here to check if
1111 // there was only a partial response in the cache. 10/02/02 jhrg
1112
1113 // In case this entry is of type "must-revalidate" then we consider it
1114 // invalid.
1115 if (entry->get_must_revalidate()) {
1116 entry->unlock_read_response();
1117 return false;
1118 }
1119
1120 time_t resident_time = time(nullptr) - entry->get_response_time();
1121 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1122
1123 // Check that the max-age, max-stale, and min-fresh directives
1124 // given in the request cache control header is followed.
1125 if (d_max_age >= 0 && current_age > d_max_age) {
1126 entry->unlock_read_response();
1127 return false;
1128 }
1129 if (d_min_fresh >= 0 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1130 entry->unlock_read_response();
1131 return false;
1132 }
1133
1134 freshness = (entry->get_freshness_lifetime() + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1135 entry->unlock_read_response();
1136 } catch (...) {
1137 if (entry) {
1138 entry->unlock_read_response();
1139 }
1140 throw;
1141 }
1142
1143 return freshness;
1144}
1145
1174
1175FILE *HTTPCache::get_cached_response(const string &url, vector<string> &headers, string &cacheName) {
1176 FILE *body = nullptr;
1177 HTTPCacheTable::CacheEntry *entry = nullptr;
1178
1179 try {
1180 lock_guard<mutex> lock{d_cache_mutex};
1181 mp_lock_guard read_lock{d_cache_lock_fd, mp_lock_guard::operation::read}; // Blocks until the lock is acquired.
1182
1183 DBG(cerr << "Getting the cached response for " << url << endl);
1184
1185 entry = d_http_cache_table->get_read_locked_entry_from_cache_table(url);
1186 if (!entry) {
1187 return nullptr;
1188 }
1189
1190 cacheName = entry->get_cachename();
1191 read_metadata(entry->get_cachename(), headers);
1192
1193 DBG(cerr << "Headers just read from cache: " << endl);
1194 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1195
1196 body = open_body(entry->get_cachename());
1197
1198 DBG(cerr << "Returning: " << url << " from the cache." << endl);
1199
1200 d_http_cache_table->bind_entry_to_data(entry, body);
1201
1202 // Set 'read_lock' so that it will not unlock the cache when it goes out of scope.
1203 // The client must call release_cached_response() to unlock the cache.
1204 read_lock.release();
1205 } catch (...) {
1206 if (body != nullptr)
1207 fclose(body);
1208 throw;
1209 }
1210
1211 return body;
1212}
1213
1224
1225FILE *HTTPCache::get_cached_response(const string &url, vector<string> &headers) {
1226 string discard_name;
1227 return get_cached_response(url, headers, discard_name);
1228}
1229
1239
1240FILE *HTTPCache::get_cached_response(const string &url) {
1241 string discard_name;
1242 vector<string> discard_headers;
1243 return get_cached_response(url, discard_headers, discard_name);
1244}
1245
1257
1259 lock_guard<mutex> lock{d_cache_mutex};
1260
1261 // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1262 d_http_cache_table->uncouple_entry_from_data(body);
1263 m_unlock_cache(d_cache_lock_fd);
1264}
1265
1277
1279 lock_guard<mutex> lock{d_cache_mutex};
1280 mp_lock_guard write_lock{d_cache_lock_fd, mp_lock_guard::operation::write}; // Blocks until the lock is acquired.
1281
1282 if (d_http_cache_table->is_locked_read_responses())
1283 throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1284
1285 d_http_cache_table->delete_all_entries();
1286}
1287
1288} // namespace libdap
#define internal_error
Internal server error (500)
Definition Error.h:63
const string CACHE_ROOT
Definition HTTPCache.cc:56
const string CACHE_LOCATION
Definition HTTPCache.cc:54
const string CACHE_LOCK
Definition HTTPCache.cc:59
const string CACHE_INDEX
Definition HTTPCache.cc:58
const string CACHE_EMPTY_ETAG
Definition HTTPCache.cc:61
const string CACHE_META
Definition HTTPCache.cc:60
const string DIR_SEPARATOR_CHAR
Definition HTTPCache.cc:63
#define CACHE_GC_PCT
Definition HTTPCache.h:44
#define MIN_CACHE_TOTAL_SIZE
Definition HTTPCache.h:45
#define DUMP_FREQUENCY
Definition HTTPCache.h:37
#define MEGA
Definition HTTPCache.h:41
#define CACHE_FOLDER_PCT
Definition HTTPCache.h:43
A class for error processing.
Definition Error.h:92
std::string get_error_message() const
Definition Error.cc:212
unsigned long get_current_size() const
CacheDisconnectedMode get_cache_disconnected() const
Definition HTTPCache.cc:519
void set_expire_ignored(bool mode)
Definition HTTPCache.cc:529
void set_default_expiration(int exp_time)
Definition HTTPCache.cc:612
bool is_url_valid(const std::string &url)
std::vector< std::string > get_conditional_request_headers(const std::string &url)
Definition HTTPCache.cc:973
std::string get_cache_root() const
Definition HTTPCache.cc:414
void set_cache_disconnected(CacheDisconnectedMode mode)
Definition HTTPCache.cc:511
void release_cached_response(FILE *response)
unsigned long get_max_entry_size() const
Definition HTTPCache.cc:600
void set_cache_enabled(bool mode)
Definition HTTPCache.cc:491
FILE * get_cached_response(const std::string &url, std::vector< std::string > &headers, std::string &cacheName)
unsigned long get_max_size() const
Definition HTTPCache.cc:572
bool cache_response(const std::string &url, time_t request_time, const std::vector< std::string > &headers, const FILE *body)
Definition HTTPCache.cc:898
std::vector< std::string > get_cache_control() const
Definition HTTPCache.cc:682
void set_max_entry_size(unsigned long size)
Definition HTTPCache.cc:582
bool get_always_validate() const
Definition HTTPCache.cc:631
int get_default_expiration() const
Definition HTTPCache.cc:620
void set_always_validate(bool validate)
Definition HTTPCache.cc:626
void update_response(const std::string &url, time_t request_time, const std::vector< std::string > &headers)
void set_max_size(unsigned long size)
Definition HTTPCache.cc:555
bool is_expire_ignored() const
Definition HTTPCache.cc:538
void set_cache_control(const std::vector< std::string > &cc)
Definition HTTPCache.cc:649
virtual ~HTTPCache()
Definition HTTPCache.cc:124
bool is_cache_enabled() const
Definition HTTPCache.cc:499
A class for software fault reporting.
Definition InternalErr.h:61
#define DBGN(x)
Definition debug.h:59
#define DBG(x)
Definition debug.h:58
#define DBG2(x)
Definition debug.h:74
top level DAP object to house generic methods
Definition AISConnect.cc:30
bool is_hop_by_hop_header(const string &header)
Definition HTTPCache.cc:715
string long_to_string(long val, int base)
Definition util.cc:946
bool ends_with(std::string const &value, std::string const &ending)
Definition HTTPCache.cc:440
string date_time_str(time_t *calendar, bool local)
Definition util_mit.cc:260
time_t parse_time(const char *str, bool expand)
Definition util_mit.cc:144