libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPCacheTable.cc
Go to the documentation of this file.
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28#include <cerrno>
29#include <cstring>
30
31#include <algorithm>
32#include <iostream>
33#include <iterator>
34#include <set>
35#include <sstream>
36
37#include <sys/stat.h>
38#include <unistd.h> // for stat
39
40#include "Error.h"
41#include "InternalErr.h"
42#include "debug.h"
43#include "util_mit.h" // for parse_time()
44
45#include "HTTPCacheTable.h"
46
47const int CACHE_TABLE_SIZE = 1499;
48
49using namespace std;
50
51namespace libdap {
52
56int get_hash(const string &url) {
57 int hash = 0;
58
59 for (const char *ptr = url.c_str(); *ptr; ptr++)
60 hash = ((hash * 3 + (*(const unsigned char *)ptr)) % CACHE_TABLE_SIZE);
61
62 return hash;
63}
64
65HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size)
66 : d_cache_root(cache_root), d_block_size(block_size) {
67 d_cache_index = cache_root + CACHE_INDEX;
68
69 d_cache_table.resize(CACHE_TABLE_SIZE);
70
71 cache_index_read();
72}
73
75 for (auto &row : d_cache_table) {
76 for (auto &entry : row) {
77 delete entry;
78 entry = nullptr;
79 }
80 }
81}
82
88 if (!t)
89 t = time(nullptr); // nullptr == now
90
91 for (auto &row : d_cache_table) {
92 for (auto &entry : row) {
93 // Remove an entry if it has expired.
94 if (entry && !entry->readers &&
95 (entry->freshness_lifetime < (entry->corrected_initial_age + (t - entry->response_time)))) {
96 DBG(cerr << "Deleting expired cache entry: " << entry->url << endl);
97 remove_cache_entry(entry); // deletes the files in the cache
98 delete entry;
99 entry = nullptr;
100 }
101 }
102 // Remove the null entries from the vector.
103 row.erase(remove(row.begin(), row.end(), nullptr), row.end());
104 }
105}
106
112 for (auto &row : d_cache_table) {
113 for (auto &entry : row) {
114 // Remove an entry if it has not had enough cache hits.
115 if (entry && !entry->readers && entry->hits <= hits) {
116 DBG(cerr << "Deleting cache entry (too few hits): " << entry->url << endl);
117 remove_cache_entry(entry); // deletes the files in the cache
118 delete entry;
119 entry = nullptr;
120 }
121 }
122 // Remove the null entries from the vector.
123 row.erase(remove(row.begin(), row.end(), nullptr), row.end());
124 }
125}
126
131void HTTPCacheTable::delete_by_size(unsigned long size) {
132 for (auto &row : d_cache_table) {
133 for (auto &entry : row) {
134 // Remove an entry if it is too big.
135 if (entry && !entry->readers && entry->size > size) {
136 DBG(cerr << "Deleting cache entry (too few hits): " << entry->url << endl);
137 remove_cache_entry(entry); // deletes the files in the cache
138 delete entry;
139 entry = nullptr;
140 }
141 }
142 // Remove the null entries from the vector.
143 row.erase(remove(row.begin(), row.end(), nullptr), row.end());
144 }
145}
146
151
153
159
160bool HTTPCacheTable::cache_index_delete() {
161 d_new_entries = 0;
162
163 return (remove(d_cache_index.c_str()) == 0);
164}
165
173
174bool HTTPCacheTable::cache_index_read() {
175 FILE *fp = fopen(d_cache_index.c_str(), "r");
176 // If the cache index can't be opened that's OK; start with an empty
177 // cache. 09/05/02 jhrg
178 if (!fp) {
179 return false;
180 }
181
182 char line[1024];
183 while (!feof(fp) && fgets(line, 1024, fp)) {
184 add_entry_to_cache_table(cache_index_parse_line(line));
185 DBG2(cerr << line << endl);
186 }
187
188 int res = fclose(fp);
189 if (res) {
190 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
191 }
192
193 d_new_entries = 0;
194
195 return true;
196}
197
204
205HTTPCacheTable::CacheEntry *HTTPCacheTable::cache_index_parse_line(const char *line) {
206 // Read the line and create the cache object
207 auto entry = new HTTPCacheTable::CacheEntry;
208 istringstream iss(line);
209 iss >> entry->url;
210 iss >> entry->cachename;
211
212 iss >> entry->etag;
213 if (entry->etag == CACHE_EMPTY_ETAG)
214 entry->etag = "";
215
216 iss >> entry->lm;
217 iss >> entry->expires;
218 iss >> entry->size;
219 iss >> entry->range; // range is not used. 10/02/02 jhrg
220
221 iss >> entry->hash;
222 iss >> entry->hits;
223 iss >> entry->freshness_lifetime;
224 iss >> entry->response_time;
225 iss >> entry->corrected_initial_age;
226
227 iss >> entry->must_revalidate;
228
229 return entry;
230}
231
233
234class WriteOneCacheEntry : public unary_function<HTTPCacheTable::CacheEntry *, void> {
235 FILE *d_fp;
236
237public:
238 explicit WriteOneCacheEntry(FILE *fp) : d_fp(fp) {}
239
240 void operator()(const HTTPCacheTable::CacheEntry *e) {
241 if (e &&
242 (fprintf(d_fp, "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n", e->url.c_str(), e->cachename.c_str(),
243 e->etag.empty() ? CACHE_EMPTY_ETAG.c_str() : e->etag.c_str(), (e->lm), (e->expires), e->size,
244 e->range ? '1' : '0', // not used. 10/02/02 jhrg
245 e->hash, e->hits, e->freshness_lifetime, e->response_time, e->corrected_initial_age,
246 e->must_revalidate ? '1' : '0') < 0))
247 throw Error(internal_error, "Cache Index. Error writing cache index\n");
248 }
249};
250
261 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
262
263 // Open the file for writing.
264 FILE *fp = nullptr;
265 if ((fp = fopen(d_cache_index.c_str(), "wb")) == nullptr) {
266 throw Error(string("Cache Index. Can't open `") + d_cache_index + "' for writing");
267 }
268
269 // Walk through the list and write it out. The format is really
270 // simple as we keep it all in ASCII.
271 WriteOneCacheEntry woc(fp);
272 for (const auto &row : d_cache_table) {
273 for (auto &entry : row) {
274 if (entry) {
275 woc(entry);
276 }
277 }
278 }
279
280 /* Done writing */
281 int res = fclose(fp);
282 if (res) {
283 DBG(cerr << "HTTPCache::cache_index_write - Failed to close " << (void *)fp << endl);
284 }
285
286 d_new_entries = 0;
287}
288
290
302
303string HTTPCacheTable::create_hash_directory(int hash) {
304 ostringstream path;
305 path << d_cache_root << hash;
306
307 // Save the mask
308 mode_t mask = umask(S_IRWXO); // 0007
309
310 // Ignore the error if the directory exists (S_IRWXU | S_IRWXG = 0770)
311 errno = 0;
312 if (mkdir(path.str().c_str(), S_IRWXU | S_IRWXG) < 0 && errno != EEXIST) {
313 umask(mask);
314 throw Error(internal_error,
315 "HTTPCacheTable::create_hash_directory: Could not create the directory for the cache at '" +
316 path.str() + "' (" + strerror(errno) + ").");
317 }
318
319 // Restore the mask
320 umask(mask);
321
322 return path.str();
323}
324
338
340 string hash_dir = create_hash_directory(entry->hash);
341 hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
342
343 // mkstemp uses the storage passed to it; must be writable and local.
344 vector<char> templat(hash_dir.size() + 1);
345 strncpy(templat.data(), hash_dir.c_str(), hash_dir.size() + 1);
346
347 // Open truncated for update. NB: mkstemp() returns a file descriptor.
348 // man mkstemp says "... The file is opened with the O_EXCL flag,
349 // guaranteeing that when mkstemp returns successfully we are the only
350 // user." 09/19/02 jhrg
351 int fd = mkstemp(templat.data()); // fd mode is 666 or 600 (Unix)
352 if (fd < 0) {
353 // close(fd); Calling close() when fd is < 0 is a bad idea! jhrg 7/2/15
354 throw Error(internal_error,
355 "The HTTP Cache could not create a file to hold the response; it will not be cached.");
356 }
357
358 entry->cachename = templat.data();
359 close(fd);
360}
361
363static inline unsigned int entry_disk_space(int size, unsigned int block_size) {
364 unsigned int num_of_blocks = (size + block_size) / block_size;
365
366 DBG(cerr << "size: " << size << ", block_size: " << block_size << ", num_of_blocks: " << num_of_blocks << endl);
367
368 return num_of_blocks * block_size;
369}
370
372
374
381 int hash = entry->hash;
382 if (hash > CACHE_TABLE_SIZE - 1 || hash < 0)
383 throw InternalErr(__FILE__, __LINE__, "Hash value too large!");
384
385 d_cache_table[hash].push_back(entry);
386
387 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << ", entry->size: " << entry->size
388 << ", block size: " << d_block_size << endl);
389
390 d_current_size += entry_disk_space(entry->size, d_block_size);
391
392 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
393
395}
396
400HTTPCacheTable::CacheEntry *HTTPCacheTable::get_read_locked_entry_from_cache_table(const string &url) /*const*/
401{
402 return get_read_locked_entry_from_cache_table(get_hash(url), url);
403}
404
414HTTPCacheTable::CacheEntry *HTTPCacheTable::get_read_locked_entry_from_cache_table(int hash,
415 const string &url) /*const*/
416{
417 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
418 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
419 if (!d_cache_table[hash].empty()) {
420 for (auto entry : d_cache_table[hash]) {
421 // Must test entry because perform_garbage_collection may have
422 // removed this entry; the CacheEntry will then be null.
423 // Not so sure that's true now given the use of erase-remove. jhrg 2/17/23
424 if (entry && entry->url == url) {
425 entry->lock_read_response(); // Lock the response
426 return entry;
427 }
428 }
429 }
430 return nullptr;
431}
432
442 int hash = get_hash(url);
443 if (!d_cache_table[hash].empty()) {
444 for (auto entry : d_cache_table[hash]) {
445 if (entry && entry->url == url) {
446 entry->lock_write_response(); // Lock the response
447 return entry;
448 }
449 }
450 }
451
452 return nullptr;
453}
454
462void HTTPCacheTable::remove_cache_entry(const HTTPCacheTable::CacheEntry *entry) {
463 // This should never happen; all calls to this method are protected by
464 // the caller, hence the InternalErr.
465 if (entry->readers)
466 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
467
468 remove(entry->cachename.c_str());
469 remove(string(entry->cachename + CACHE_META).c_str());
470
471 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
472
473 unsigned int eds = entry_disk_space(entry->size, get_block_size());
474 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
475
476 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
477}
478
486 int hash = get_hash(url);
487 auto &row = d_cache_table[hash];
488 for (auto &entry : row) {
489 if (entry && entry->url == url) {
490 entry->lock_write_response();
491 remove_cache_entry(entry);
492 entry->unlock_write_response();
493 delete entry;
494 entry = nullptr;
495 }
496 }
497 // Remove the null entries from the vector.
498 row.erase(remove(row.begin(), row.end(), nullptr), row.end());
499}
500
502 for (auto &row : d_cache_table) {
503 for (auto &entry : row) {
504 if (entry) {
505 remove_cache_entry(entry); // deletes the files in the cache
506 delete entry;
507 entry = nullptr;
508 }
509 }
510 // Remove the null entries from the vector.
511 row.erase(remove(row.begin(), row.end(), nullptr), row.end());
512 }
513
514 cache_index_delete();
515}
516
529
530void HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time) {
531 entry->response_time = time(nullptr);
532 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
533 time_t corrected_received_age = max(apparent_age, entry->age);
534 time_t response_delay = entry->response_time - request_time;
535 entry->corrected_initial_age = corrected_received_age + response_delay;
536
537 // Estimate an expires time using the max-age and expires time. If we
538 // don't have an explicit expires time then set it to 10% of the LM date
539 // (although max 24 h). If no LM date is available then use 24 hours.
540 time_t freshness_lifetime = entry->max_age;
541 if (freshness_lifetime < 0) {
542 if (entry->expires < 0) {
543 if (entry->lm < 0) {
544 freshness_lifetime = default_expiration;
545 } else {
546 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
547 }
548 } else
549 freshness_lifetime = entry->expires - entry->date;
550 }
551
552 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
553
554 DBG2(cerr << "Cache....... Received Age " << entry->age << ", corrected " << entry->corrected_initial_age
555 << ", freshness lifetime " << entry->freshness_lifetime << endl);
556}
557
568
569void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
570 const vector<string> &headers) {
571 for (const auto &line : headers) {
572 // skip a blank header.
573 if (line.empty())
574 continue;
575
576 string::size_type colon = line.find(':');
577
578 // skip a header with no colon in it.
579 if (colon == string::npos)
580 continue;
581
582 string header = line.substr(0, line.find(':'));
583 string value = line.substr(line.find(": ") + 2);
584 DBG2(cerr << "Header: " << header << endl);
585 DBG2(cerr << "Value: " << value << endl);
586
587 if (header == "ETag") {
588 entry->etag = value;
589 } else if (header == "Last-Modified") {
590 entry->lm = parse_time(value.c_str());
591 } else if (header == "Expires") {
592 entry->expires = parse_time(value.c_str());
593 } else if (header == "Date") {
594 entry->date = parse_time(value.c_str());
595 } else if (header == "Age") {
596 entry->age = parse_time(value.c_str());
597 } else if (header == "Content-Length") {
598 unsigned long clength = strtoul(value.c_str(), 0, 0);
599 if (clength > max_entry_size)
600 entry->set_no_cache(true);
601 } else if (header == "Cache-Control") {
602 // Ignored Cache-Control values: public, private, no-transform,
603 // proxy-revalidate, s-max-age. These are used by shared caches.
604 // See section 14.9 of RFC 2612. 10/02/02 jhrg
605 if (value == "no-cache" || value == "no-store")
606 // Note that we *can* store a 'no-store' response in volatile
607 // memory according to RFC 2616 (section 14.9.2) but those
608 // will be rare coming from DAP servers. 10/02/02 jhrg
609 entry->set_no_cache(true);
610 else if (value == "must-revalidate")
611 entry->must_revalidate = true;
612 else if (value.find("max-age") != string::npos) {
613 string max_age = value.substr(value.find('=') + 1);
614 entry->max_age = parse_time(max_age.c_str());
615 }
616 }
617 }
618}
619
621
623 entry->hits++; // Mark hit
624 d_locked_entries[body] = entry; // record lock, see release_cached_r...
625}
626
628 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
629 if (!entry)
630 throw InternalErr("There is no cache entry for the response given.");
631
632 d_locked_entries.erase(body);
633 entry->unlock_read_response();
634
635 if (entry->readers < 0)
636 throw InternalErr("An unlocked entry was released");
637}
638
639bool HTTPCacheTable::is_locked_read_responses() const { return !d_locked_entries.empty(); }
640
641} // namespace libdap
#define internal_error
Internal server error (500)
Definition Error.h:63
const int CACHE_TABLE_SIZE
#define LM_EXPIRATION(t)
const string CACHE_INDEX
Definition HTTPCache.cc:58
const string CACHE_EMPTY_ETAG
Definition HTTPCache.cc:61
const string CACHE_META
Definition HTTPCache.cc:60
A class for error processing.
Definition Error.h:92
void create_location(CacheEntry *entry)
void calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
void delete_expired_entries(time_t time=0)
Delete all the expired entries in the cache.
void bind_entry_to_data(CacheEntry *entry, FILE *body)
unsigned int get_block_size() const
void remove_entry_from_cache_table(const std::string &url)
void delete_by_size(unsigned long size)
Delete all the entries in the cache that are larger than size bytes.
void delete_by_hits(int hits)
Delete all the entries in the cache that have fewer than hits hits.
void add_entry_to_cache_table(CacheEntry *entry)
void set_current_size(unsigned long sz)
unsigned long get_current_size() const
CacheEntry * get_write_locked_entry_from_cache_table(const std::string &url)
void uncouple_entry_from_data(FILE *body)
void parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size, const std::vector< std::string > &headers)
bool is_locked_read_responses() const
A class for software fault reporting.
Definition InternalErr.h:61
#define DBG(x)
Definition debug.h:58
#define DBG2(x)
Definition debug.h:74
top level DAP object to house generic methods
Definition AISConnect.cc:30
int get_hash(const string &url)
time_t parse_time(const char *str, bool expand)
Definition util_mit.cc:144