35 #include <sys/types.h> 48 #include "InternalErr.h" 49 #include "ResponseTooBigErr.h" 51 #include "SignalHandler.h" 53 #include "HTTPCacheInterruptHandler.h" 54 #include "HTTPCacheTable.h" 55 #include "HTTPCache.h" 56 #include "HTTPCacheMacros.h" 57 #include "SignalHandlerRegisteredErr.h" 66 HTTPCache *HTTPCache::_instance = 0;
75 static pthread_mutex_t instance_mutex;
76 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
79 #define NO_LM_EXPIRATION 24*3600 // 24 hours 81 #define DUMP_FREQUENCY 10 // Dump index every x loads 83 #define MEGA 0x100000L 84 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 85 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 86 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 87 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 88 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 94 status = INIT(&instance_mutex);
97 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
129 HTTPCache::instance(
const string &cache_root,
bool force)
131 int status = pthread_once(&once_block, once_init_routine);
133 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
135 LOCK(&instance_mutex);
137 DBG(cerr <<
"Entering instance(); (" << hex << _instance << dec <<
")" <<
"... ");
141 _instance =
new HTTPCache(cache_root, force);
143 DBG(cerr <<
"New instance: " << _instance <<
", cache root: " 144 << _instance->d_cache_root << endl);
146 atexit(delete_instance);
159 SignalHandler::instance()->register_handler(SIGINT, old_eh);
161 "Could not register event handler for SIGINT without superseding an existing one.");
166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
168 "Could not register event handler for SIGPIPE without superseding an existing one.");
173 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
175 "Could not register event handler for SIGTERM without superseding an existing one.");
181 DBG2(cerr <<
"The constructor threw an Error!" << endl);
182 UNLOCK(&instance_mutex);
186 UNLOCK(&instance_mutex);
187 DBGN(cerr <<
"returning " << hex << _instance << dec << endl);
196 HTTPCache::delete_instance()
198 DBG(cerr <<
"Entering delete_instance()..." << endl);
200 if (HTTPCache::_instance) {
201 DBG(cerr <<
"Deleting the cache: " << HTTPCache::_instance << endl);
202 delete HTTPCache::_instance;
203 HTTPCache::_instance = 0;
206 delete SignalHandler::instance()->remove_handler(SIGINT);
207 delete SignalHandler::instance()->remove_handler(SIGPIPE);
208 delete SignalHandler::instance()->remove_handler(SIGTERM);
211 DBG(cerr <<
"Exiting delete_instance()" << endl);
228 HTTPCache::HTTPCache(
string cache_root,
bool force) :
229 d_locked_open_file(0),
230 d_cache_enabled(
false),
231 d_cache_protected(
false),
233 d_cache_disconnected(DISCONNECT_NONE),
235 d_expire_ignored(
false),
236 d_always_validate(
false),
237 d_total_size(CACHE_TOTAL_SIZE * MEGA),
238 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
239 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
240 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
241 d_default_expiration(NO_LM_EXPIRATION),
245 d_http_cache_table(0)
247 DBG(cerr <<
"Entering the constructor for " <<
this <<
"... ");
249 int status = pthread_once(&once_block, once_init_routine);
251 throw InternalErr(__FILE__, __LINE__,
"Could not initialize the HTTP Cache mutex. Exiting.");
253 INIT(&d_cache_mutex);
265 set_cache_root(cache_root);
268 if (!get_single_user_lock(force))
269 throw Error(internal_error,
"Could not get single user lock for the cache");
279 if (stat(cache_root.c_str(), &s) == 0)
280 block_size = s.st_blksize;
282 throw Error(internal_error,
"Could not set file system block size.");
284 d_http_cache_table =
new HTTPCacheTable(d_cache_root, block_size);
285 d_cache_enabled =
true;
287 DBGN(cerr <<
"exiting" << endl);
302 HTTPCache::~HTTPCache()
304 DBG(cerr <<
"Entering the destructor for " <<
this <<
"... ");
308 perform_garbage_collection();
310 d_http_cache_table->cache_index_write();
320 delete d_http_cache_table;
322 release_single_user_lock();
324 DBGN(cerr <<
"exiting destructor." << endl);
325 DESTROY(&d_cache_mutex);
337 HTTPCache::stopGC()
const 339 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
349 HTTPCache::startGC()
const 351 DBG(cerr <<
"startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
352 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
370 HTTPCache::perform_garbage_collection()
372 DBG(cerr <<
"Performing garbage collection" << endl);
391 HTTPCache::expired_gc()
393 if (!d_expire_ignored) {
394 d_http_cache_table->delete_expired_entries();
421 d_http_cache_table->delete_by_hits(hits);
431 void HTTPCache::too_big_gc() {
433 d_http_cache_table->delete_by_size(d_max_entry_size);
448 bool HTTPCache::get_single_user_lock(
bool force)
450 if (!d_locked_open_file) {
456 create_cache_root(d_cache_root);
462 DBG(cerr <<
"Failure to create the cache root" << endl);
467 string lock = d_cache_root + CACHE_LOCK;
468 if ((fp = fopen(lock.c_str(),
"r")) != NULL) {
469 int res = fclose(fp);
471 DBG(cerr <<
"Failed to close " << (
void *)fp << endl);
474 REMOVE(lock.c_str());
479 if ((fp = fopen(lock.c_str(),
"w")) == NULL) {
480 DBG(cerr <<
"Could not open for write access" << endl);
484 d_locked_open_file = fp;
488 DBG(cerr <<
"locked_open_file is true" << endl);
495 HTTPCache::release_single_user_lock()
497 if (d_locked_open_file) {
498 int res = fclose(d_locked_open_file);
500 DBG(cerr <<
"Failed to close " << (
void *)d_locked_open_file << endl) ;
502 d_locked_open_file = 0;
505 string lock = d_cache_root + CACHE_LOCK;
506 REMOVE(lock.c_str());
516 HTTPCache::get_cache_root()
const 531 HTTPCache::create_cache_root(
const string &cache_root)
534 string::size_type cur = cache_root[1] ==
':' ? 3 : 1;
537 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538 string dir = cache_root.substr(0, cur);
539 struct stat stat_info;
540 if (stat(dir.c_str(), &stat_info) == -1) {
541 DBG2(cerr <<
"Cache....... Creating " << dir << endl);
542 mode_t mask = UMASK(0);
543 if (MKDIR(dir.c_str(), 0777) < 0) {
544 DBG2(cerr <<
"Error: can't create." << endl);
546 throw Error(
string(
"Could not create the directory for the cache. Failed when building path at ") + dir +
string(
"."));
551 DBG2(cerr <<
"Cache....... Found " << dir << endl);
559 mode_t mask = umask(0);
563 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
565 throw Error(
"Could not create the directory for the cache at '" + cache_root +
"' (" + strerror(errno) +
").");
589 HTTPCache::set_cache_root(
const string &root)
594 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
595 d_cache_root += DIR_SEPARATOR_CHAR;
601 char * cr = (
char *) getenv(
"DODS_CACHE");
602 if (!cr) cr = (
char *) getenv(
"TMP");
603 if (!cr) cr = (
char *) getenv(
"TEMP");
604 if (!cr) cr = (
char*)CACHE_LOCATION;
607 d_cache_root = CACHE_LOCATION;
610 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
611 d_cache_root += DIR_SEPARATOR_CHAR;
613 d_cache_root += CACHE_ROOT;
619 if (d_http_cache_table)
620 d_http_cache_table->set_cache_root(d_cache_root);
635 HTTPCache::set_cache_enabled(
bool mode)
637 lock_cache_interface();
639 d_cache_enabled = mode;
641 unlock_cache_interface();
647 HTTPCache::is_cache_enabled()
const 649 DBG2(cerr <<
"In HTTPCache::is_cache_enabled: (" << d_cache_enabled <<
")" 651 return d_cache_enabled;
666 lock_cache_interface();
668 d_cache_disconnected = mode;
670 unlock_cache_interface();
676 HTTPCache::get_cache_disconnected()
const 678 return d_cache_disconnected;
690 HTTPCache::set_expire_ignored(
bool mode)
692 lock_cache_interface();
694 d_expire_ignored = mode;
696 unlock_cache_interface();
703 HTTPCache::is_expire_ignored()
const 705 return d_expire_ignored;
724 HTTPCache::set_max_size(
unsigned long size)
726 lock_cache_interface();
729 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
730 MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
731 unsigned long old_size = d_total_size;
732 d_total_size = new_size;
733 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
734 d_gc_buffer = d_total_size / CACHE_GC_PCT;
736 if (new_size < old_size && startGC()) {
737 perform_garbage_collection();
738 d_http_cache_table->cache_index_write();
742 unlock_cache_interface();
743 DBGN(cerr <<
"Unlocking interface." << endl);
747 DBG2(cerr <<
"Cache....... Total cache size: " << d_total_size
748 <<
" with " << d_folder_size
749 <<
" bytes for meta information and folders and at least " 750 << d_gc_buffer <<
" bytes free after every gc" << endl);
752 unlock_cache_interface();
758 HTTPCache::get_max_size()
const 760 return d_total_size / MEGA;
772 HTTPCache::set_max_entry_size(
unsigned long size)
774 lock_cache_interface();
777 unsigned long new_size = size * MEGA;
778 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
779 unsigned long old_size = d_max_entry_size;
780 d_max_entry_size = new_size;
781 if (new_size < old_size && startGC()) {
782 perform_garbage_collection();
783 d_http_cache_table->cache_index_write();
788 unlock_cache_interface();
792 DBG2(cerr <<
"Cache...... Max entry cache size is " 793 << d_max_entry_size << endl);
795 unlock_cache_interface();
803 HTTPCache::get_max_entry_size()
const 805 return d_max_entry_size / MEGA;
819 HTTPCache::set_default_expiration(
const int exp_time)
821 lock_cache_interface();
823 d_default_expiration = exp_time;
825 unlock_cache_interface();
831 HTTPCache::get_default_expiration()
const 833 return d_default_expiration;
841 HTTPCache::set_always_validate(
bool validate)
843 d_always_validate = validate;
850 HTTPCache::get_always_validate()
const 852 return d_always_validate;
872 HTTPCache::set_cache_control(
const vector<string> &cc)
874 lock_cache_interface();
877 d_cache_control = cc;
879 vector<string>::const_iterator i;
880 for (i = cc.begin(); i != cc.end(); ++i) {
881 string header = (*i).substr(0, (*i).find(
':'));
882 string value = (*i).substr((*i).find(
": ") + 2);
883 if (header !=
"Cache-Control") {
884 throw InternalErr(__FILE__, __LINE__,
"Expected cache control header not found.");
887 if (value ==
"no-cache" || value ==
"no-store")
888 d_cache_enabled =
false;
889 else if (value.find(
"max-age") != string::npos) {
890 string max_age = value.substr(value.find(
"=" + 1));
893 else if (value ==
"max-stale")
895 else if (value.find(
"max-stale") != string::npos) {
896 string max_stale = value.substr(value.find(
"=" + 1));
899 else if (value.find(
"min-fresh") != string::npos) {
900 string min_fresh = value.substr(value.find(
"=" + 1));
907 unlock_cache_interface();
911 unlock_cache_interface();
920 HTTPCache::get_cache_control()
922 return d_cache_control;
936 HTTPCache::is_url_in_cache(
const string &url)
938 DBG(cerr <<
"Is this url in the cache? (" << url <<
")" << endl);
941 bool status = entry != 0;
943 entry->unlock_read_response();
956 return header.find(
"Connection") != string::npos
957 || header.find(
"Keep-Alive") != string::npos
958 || header.find(
"Proxy-Authenticate") != string::npos
959 || header.find(
"Proxy-Authorization") != string::npos
960 || header.find(
"Transfer-Encoding") != string::npos
961 || header.find(
"Upgrade") != string::npos;
976 HTTPCache::write_metadata(
const string &cachename,
const vector<string> &headers)
978 string fname = cachename + CACHE_META;
979 d_open_files.push_back(fname);
981 FILE *dest = fopen(fname.c_str(),
"w");
984 "Could not open named cache entry file.");
987 vector<string>::const_iterator i;
988 for (i = headers.begin(); i != headers.end(); ++i) {
990 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
993 throw InternalErr(__FILE__, __LINE__,
"could not write header: '" + (*i) +
"' " + long_to_string(s));
995 s = fwrite(
"\n", 1, 1, dest);
998 throw InternalErr(__FILE__, __LINE__,
"could not write header: " + long_to_string(s));
1003 int res = fclose(dest);
1005 DBG(cerr <<
"HTTPCache::write_metadata - Failed to close " 1009 d_open_files.pop_back();
1023 HTTPCache::read_metadata(
const string &cachename, vector<string> &headers)
1025 FILE *md = fopen(
string(cachename + CACHE_META).c_str(),
"r");
1028 "Could not open named cache entry meta data file.");
1032 while (!feof(md) && fgets(line, 1024, md)) {
1033 line[min(1024, static_cast<int>(strlen(line)))-1] =
'\0';
1034 headers.push_back(
string(line));
1037 int res = fclose(md);
1039 DBG(cerr <<
"HTTPCache::read_metadata - Failed to close " 1066 HTTPCache::write_body(
const string &cachename,
const FILE *src)
1068 d_open_files.push_back(cachename);
1070 FILE *dest = fopen(cachename.c_str(),
"wb");
1073 "Could not open named cache entry file.");
1081 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1082 total += fwrite(line, 1, n, dest);
1086 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1087 int res = fclose(dest);
1088 res = res & unlink(cachename.c_str());
1090 DBG(cerr <<
"HTTPCache::write_body - Failed to close/unlink " 1094 "I/O error transferring data to the cache.");
1097 rewind(const_cast<FILE *>(src));
1099 int res = fclose(dest);
1101 DBG(cerr <<
"HTTPCache::write_body - Failed to close " 1105 d_open_files.pop_back();
1119 HTTPCache::open_body(
const string &cachename)
1121 DBG(cerr <<
"cachename: " << cachename << endl);
1123 FILE *src = fopen(cachename.c_str(),
"rb");
1125 throw InternalErr(__FILE__, __LINE__,
"Could not open cache file.");
1156 HTTPCache::cache_response(
const string &url, time_t request_time,
1157 const vector<string> &headers,
const FILE *body)
1159 lock_cache_interface();
1161 DBG(cerr <<
"Caching url: " << url <<
"." << endl);
1165 if (url.find(
"http:") == string::npos &&
1166 url.find(
"https:") == string::npos) {
1167 unlock_cache_interface();
1174 d_http_cache_table->remove_entry_from_cache_table(url);
1177 entry->lock_write_response();
1180 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1181 if (entry->is_no_cache()) {
1182 DBG(cerr <<
"Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1183 <<
"(" << url <<
")" << endl);
1184 entry->unlock_write_response();
1185 delete entry; entry = 0;
1186 unlock_cache_interface();
1191 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1193 d_http_cache_table->create_location(entry);
1195 entry->set_size(write_body(entry->get_cachename(), body));
1196 write_metadata(entry->get_cachename(), headers);
1197 d_http_cache_table->add_entry_to_cache_table(entry);
1198 entry->unlock_write_response();
1203 REMOVE(entry->get_cachename().c_str());
1204 REMOVE(
string(entry->get_cachename() + CACHE_META).c_str());
1205 DBG(cerr <<
"Too big; deleting HTTPCacheTable::CacheEntry: " << entry <<
"(" << url
1207 entry->unlock_write_response();
1208 delete entry; entry = 0;
1209 unlock_cache_interface();
1213 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1215 perform_garbage_collection();
1217 d_http_cache_table->cache_index_write();
1221 unlock_cache_interface();
1225 unlock_cache_interface();
1249 HTTPCache::get_conditional_request_headers(
const string &url)
1251 lock_cache_interface();
1254 vector<string> headers;
1256 DBG(cerr <<
"Getting conditional request headers for " << url << endl);
1259 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1261 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1263 if (entry->get_etag() !=
"")
1264 headers.push_back(
string(
"If-None-Match: ") + entry->get_etag());
1266 if (entry->get_lm() > 0) {
1267 time_t lm = entry->get_lm();
1268 headers.push_back(
string(
"If-Modified-Since: ")
1271 else if (entry->get_max_age() > 0) {
1272 time_t max_age = entry->get_max_age();
1273 headers.push_back(
string(
"If-Modified-Since: ")
1276 else if (entry->get_expires() > 0) {
1277 time_t expires = entry->get_expires();
1278 headers.push_back(
string(
"If-Modified-Since: ")
1281 entry->unlock_read_response();
1282 unlock_cache_interface();
1285 unlock_cache_interface();
1287 entry->unlock_read_response();
1298 struct HeaderLess:
binary_function<const string&, const string&, bool>
1300 bool operator()(
const string &s1,
const string &s2)
const {
1301 return s1.substr(0, s1.find(
':')) < s2.substr(0, s2.find(
':'));
1319 HTTPCache::update_response(
const string &url, time_t request_time,
1320 const vector<string> &headers)
1322 lock_cache_interface();
1325 DBG(cerr <<
"Updating the response headers for: " << url << endl);
1328 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1330 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1333 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1336 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1344 set<string, HeaderLess> merged_headers;
1347 copy(headers.begin(), headers.end(),
1348 inserter(merged_headers, merged_headers.begin()));
1351 vector<string> old_headers;
1352 read_metadata(entry->get_cachename(), old_headers);
1353 copy(old_headers.begin(), old_headers.end(),
1354 inserter(merged_headers, merged_headers.begin()));
1359 vector<string> result;
1360 copy(merged_headers.rbegin(), merged_headers.rend(),
1361 back_inserter(result));
1363 write_metadata(entry->get_cachename(), result);
1364 entry->unlock_write_response();
1365 unlock_cache_interface();
1369 entry->unlock_read_response();
1371 unlock_cache_interface();
1388 HTTPCache::is_url_valid(
const string &url)
1390 lock_cache_interface();
1395 DBG(cerr <<
"Is this URL valid? (" << url <<
")" << endl);
1398 if (d_always_validate) {
1399 unlock_cache_interface();
1403 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1405 throw Error(internal_error,
"There is no cache entry for the URL: " + url);
1412 if (entry->get_must_revalidate()) {
1413 entry->unlock_read_response();
1414 unlock_cache_interface();
1418 time_t resident_time = time(NULL) - entry->get_response_time();
1419 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1423 if (d_max_age >= 0 && current_age > d_max_age) {
1424 DBG(cerr <<
"Cache....... Max-age validation" << endl);
1425 entry->unlock_read_response();
1426 unlock_cache_interface();
1429 if (d_min_fresh >= 0
1430 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1431 DBG(cerr <<
"Cache....... Min-fresh validation" << endl);
1432 entry->unlock_read_response();
1433 unlock_cache_interface();
1437 freshness = (entry->get_freshness_lifetime()
1438 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1439 entry->unlock_read_response();
1440 unlock_cache_interface();
1444 entry->unlock_read_response();
1446 unlock_cache_interface();
1480 FILE * HTTPCache::get_cached_response(
const string &url,
1481 vector<string> &headers,
string &cacheName) {
1482 lock_cache_interface();
1487 DBG(cerr <<
"Getting the cached response for " << url << endl);
1490 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1492 unlock_cache_interface();
1496 cacheName = entry->get_cachename();
1497 read_metadata(entry->get_cachename(), headers);
1499 DBG(cerr <<
"Headers just read from cache: " << endl);
1500 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr,
"\n")));
1502 body = open_body(entry->get_cachename());
1504 DBG(cerr <<
"Returning: " << url <<
" from the cache." << endl);
1506 d_http_cache_table->bind_entry_to_data(entry, body);
1511 unlock_cache_interface();
1517 unlock_cache_interface();
1534 HTTPCache::get_cached_response(
const string &url, vector<string> &headers)
1536 string discard_name;
1537 return get_cached_response(url, headers, discard_name);
1551 HTTPCache::get_cached_response(
const string &url)
1553 string discard_name;
1554 vector<string> discard_headers;
1555 return get_cached_response(url, discard_headers, discard_name);
1571 HTTPCache::release_cached_response(FILE *body)
1573 lock_cache_interface();
1577 d_http_cache_table->uncouple_entry_from_data(body);
1580 unlock_cache_interface();
1584 unlock_cache_interface();
1600 HTTPCache::purge_cache()
1602 lock_cache_interface();
1605 if (d_http_cache_table->is_locked_read_responses())
1606 throw Error(internal_error,
"Attempt to purge the cache with entries in use.");
1608 d_http_cache_table->delete_all_entries();
1611 unlock_cache_interface();
1615 unlock_cache_interface();
time_t parse_time(const char *str, bool expand)
top level DAP object to house generic methods
A class for software fault reporting.
bool is_hop_by_hop_header(const string &header)
string date_time_str(time_t *calendar, bool local)
std::string get_error_message() const
A class for error processing.