124 std::string d_cache_dir;
128 unsigned long long d_max_cache_size_in_bytes = 0;
131 unsigned long long d_purge_size = 0;
134 int d_cache_info_fd = -1;
136 const std::string CACHE_INFO_FILE_NAME =
"cache_info";
138 static std::string get_lock_type_string(
int lock_type) {
139 return (lock_type == LOCK_EX) ?
"Exclusive":
"Shared";
148 CacheLock() =
default;
149 CacheLock(
const CacheLock &) =
delete;
150 explicit CacheLock(
int fd) : d_fd(fd) {}
151 CacheLock &operator=(
const CacheLock &) =
delete;
153 if (flock(d_fd, LOCK_UN) < 0)
154 ERROR(
"Could not unlock the FileCache.");
163 bool lock_the_cache(
int lock_type,
const std::string &msg =
"")
const {
165 ERROR(
"Call to CacheLock::lock_the_cache with uninitialized lock object.");
168 if (flock(d_fd, lock_type) < 0) {
170 ERROR(msg + get_lock_type_string(lock_type) + get_errno());
172 ERROR(msg + get_errno() );
187 int create_key(
const std::string &key) {
190 if ((fd = open(key_file_name.c_str(), O_CREAT | O_EXCL | O_RDWR, 0666)) < 0) {
191 if (errno == EEXIST) {
192 INFO_LOG(
"Could not create the key/file; it already exists: " + key +
" " + get_errno() );
196 ERROR(
"Error creating key/file: " + key +
" " + get_errno());
207 bool files_in_cache(std::vector<std::string> &files)
const {
210 const struct dirent *ent;
211 if ((dir = opendir (d_cache_dir.c_str())) !=
nullptr) {
213 while ((ent = readdir (dir)) !=
nullptr) {
217 if (strcmp(ent->d_name,
".") == 0 || strcmp(ent->d_name,
"..") == 0
218 || strcmp(ent->d_name, CACHE_INFO_FILE_NAME.c_str()) == 0)
225 ERROR(
"Could not open the cache directory (" + d_cache_dir +
").");
233 bool invariant()
const {
234 if (d_cache_info_fd < 0)
240 static unsigned long long get_file_size(
int fd) {
242 if (fstat(fd, &sb) != 0)
250 bool open_cache_info() {
251 if (d_cache_dir.empty())
255 if ((d_cache_info_fd = open(
BESUtil::pathConcat(d_cache_dir, CACHE_INFO_FILE_NAME).c_str(), O_RDWR | O_CREAT | O_EXCL, 0666)) >= 0) {
256 unsigned long long size = 0;
257 if (write(d_cache_info_fd, &size,
sizeof(size)) !=
sizeof(size))
260 else if ((d_cache_info_fd = open(
BESUtil::pathConcat(d_cache_dir, CACHE_INFO_FILE_NAME).c_str(), O_RDWR, 0666)) < 0) {
268 unsigned long long get_cache_info_size()
const
270 if (d_cache_info_fd == -1) {
271 ERROR(
"Cache info file not open.");
274 if (lseek(d_cache_info_fd, 0, SEEK_SET) == -1) {
275 ERROR(
"Could not seek to the beginning of the cache info file.");
278 unsigned long long size = 0;
279 if (read(d_cache_info_fd, &size,
sizeof(size)) !=
sizeof(size)) {
280 ERROR(
"Could not read the cache info file.");
288 bool update_cache_info_size(
unsigned long long size)
const {
289 if (d_cache_info_fd == -1) {
290 ERROR(
"Cache info file not open.");
293 if (lseek(d_cache_info_fd, 0, SEEK_SET) == -1) {
294 ERROR(
"Could not seek to the beginning of the cache info file.");
297 if (write(d_cache_info_fd, &size,
sizeof(size)) !=
sizeof(size)) {
298 ERROR(
"Could not write to the cache info file.");
304 friend class FileCacheTest;
314 static std::string
hash_key(
const std::string &key,
bool log_it =
false) {
315 unsigned char md[SHA256_DIGEST_LENGTH];
316 SHA256(
reinterpret_cast<const unsigned char *
>(key.c_str()), key.size(), md);
317 std::stringstream hex_stream;
319 hex_stream << std::hex << std::setw(2) << std::setfill(
'0') << (
int)b;
322 INFO_LOG(
":hash_key() " + key +
" -> " + hex_stream.str());
323 return {hex_stream.str()};
332 Item(
const Item &) =
delete;
333 explicit Item(
int fd) : d_fd(fd) { }
334 Item &operator=(
const Item &) =
delete;
345 void set_fd(
int fd) {
349 bool lock_the_item(
int lock_type,
const std::string &msg =
"")
const {
351 ERROR(
"Call to Item::lock_the_item() with uninitialized item file descriptor.");
354 if (flock(d_fd, lock_type) < 0) {
356 ERROR(
"Could not get " + get_lock_type_string(lock_type) +
" lock: " + get_errno() );
358 ERROR(msg +
": " + get_errno());
362#if FORCE_ACCESS_TIME_UPDATE
363 futimes(d_fd,
nullptr);
376 class PutItem :
public Item {
381 explicit PutItem(FileCache &fc) : d_fc(fc) {}
382 PutItem(
const PutItem &) =
delete;
383 const PutItem &operator=(
const PutItem &) =
delete;
384 ~PutItem()
override {
386 CacheLock lock(d_fc.d_cache_info_fd);
387 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in ~PutItem() for descriptor: " + std::to_string(get_fd())))
389 if (!d_fc.update_cache_info_size(d_fc.get_cache_info_size() + get_file_size(get_fd()))) {
390 ERROR(
"Could not update the cache info file while unlocking a put item: " + get_errno() );
395 FileCache() =
default;
396 FileCache(
const FileCache &) =
delete;
397 FileCache &operator=(
const FileCache &rhs) =
delete;
399 virtual ~FileCache() {
400 if (d_cache_info_fd != -1) {
401 close(d_cache_info_fd);
413 virtual bool initialize(
const std::string &cache_dir,
long long size,
long long purge_size) {
414 if (size < 0 || purge_size < 0) {
415 ERROR_LOG(
"FileCache::initialize() - size and purge_size must be >= 0\n");
420 if (stat(cache_dir.c_str(), &sb) != 0) {
422 if (stat(cache_dir.c_str(), &sb) != 0) {
423 ERROR_LOG(
"FileCache::initialize() - could not stat the cache directory: " + cache_dir);
428 d_cache_dir = cache_dir;
430 if (!open_cache_info()) {
431 ERROR_LOG(
"FileCache::initialize() - could not open the cache info file: " + cache_dir);
435 d_max_cache_size_in_bytes = (
unsigned long long)size;
436 d_purge_size = (
unsigned long long)purge_size;
450 bool put(
const std::string &key,
const std::string &file_name) {
452 CacheLock lock(d_cache_info_fd);
453 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in put(1) for: " + key))
457 int fd = create_key(key);
465 if (!fdl.lock_the_item(LOCK_EX,
"locking the just created key/file in put(1): " + key))
470 if ((fd2 = open(file_name.c_str(), O_RDONLY)) < 0) {
471 ERROR(
"Error reading from source file: " + file_name +
" " + get_errno());
479 std::vector<char> buf(std::min(MEGABYTE, get_file_size(fd2)));
481 while ((n = read(fd2, buf.data(), buf.size())) > 0) {
482 if (write(fd, buf.data(), n) != n) {
483 ERROR(
"Error writing to destination file: " + key +
" " + get_errno());
489 if (!update_cache_info_size(get_cache_info_size() + get_file_size(fd)))
496 bool put_data(
const std::string &key,
const std::string &data) {
498 CacheLock lock(d_cache_info_fd);
499 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in put_data() for: " + key))
503 int fd = create_key(key);
511 if (!fdl.lock_the_item(LOCK_EX,
"locking the just created key/file in put_data(): " + key))
517 if (write(fd, data.c_str(), data.size()) != data.size()) {
518 ERROR(
"Error writing to data to cache file: " + key +
" " + get_errno());
523 if (!update_cache_info_size(get_cache_info_size() + get_file_size(fd)))
546 CacheLock lock(d_cache_info_fd);
547 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in put(2) for: " + key))
551 int fd = create_key(key);
559 if (!item.lock_the_item(LOCK_EX,
"locking the just created key/file in put(2): " + key))
574 bool get(
const std::string &key,
Item &item,
int lock_type = LOCK_SH | LOCK_NB) {
576 CacheLock lock(d_cache_info_fd);
577 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in get() for: " + key))
582 int fd = open(key_file_name.c_str(), O_RDONLY, 0666);
587 ERROR(
"Error opening the cache item in get for: " + key +
" " + get_errno());
593 if (!item.lock_the_item(lock_type,
"locking the item in get() for: " + key))
611 bool del(
const std::string &key,
int lock_type = LOCK_EX | LOCK_NB) {
613 CacheLock lock(d_cache_info_fd);
614 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in del()."))
618 int fd = open(key_file_name.c_str(), O_WRONLY, 0666);
620 ERROR(
"Error opening the cache item in del() for: " + key +
" " + get_errno());
625 if (!item.lock_the_item(lock_type,
"locking the cache item in del() for: " + key))
628 auto file_size = get_file_size(fd);
630 if (remove(key_file_name.c_str()) != 0) {
631 ERROR(
"Error removing " + key +
" from cache directory (" + d_cache_dir +
") - " + get_errno());
635 if (!update_cache_info_size(get_cache_info_size() - file_size))
648 CacheLock lock(d_cache_info_fd);
649 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in clear()."))
652 std::vector<std::string> files;
653 if (!files_in_cache(files)) {
657 for (
const auto &file: files) {
658 if (remove(file.c_str()) != 0) {
659 ERROR(
"Error removing " + file +
" from cache directory (" + d_cache_dir +
") - " + get_errno());
680 CacheLock lock(d_cache_info_fd);
681 if (!lock.lock_the_cache(LOCK_EX,
"locking the cache in purge()."))
684 uint64_t ci_size = get_cache_info_size();
685 if (ci_size < d_max_cache_size_in_bytes)
691 item_info(std::string name, off_t size) :d_name(std::move(name)), d_size(size) {}
695 std::multimap<unsigned long, struct item_info, std::less<>> items;
696 uint64_t total_size = 0;
698 std::vector<std::string> files;
699 if (!files_in_cache(files))
702 for (
const auto &file: files) {
705 if (stat(file.c_str(), &sb) < 0) {
706 ERROR(
"Error getting info on " + file +
" in purge() - " + get_errno());
710 items.insert(std::pair<unsigned long, item_info>(sb.st_atime, item_info(file, sb.st_size)));
711 total_size += sb.st_size;
714 if (ci_size != total_size) {
715 ERROR(
"Error cache_info and the measured size of items differ by " + std::to_string(total_size) +
" bytes.");
719 uint64_t removed_bytes = 0;
720 for (
const auto &item: items) {
721 if (removed_bytes > d_purge_size)
726 int fd = open(item.second.d_name.c_str(), O_WRONLY, 0666);
728 ERROR(
"Error opening the cache item in purge() for: " + item.second.d_name +
" " + get_errno());
732 if (!item_lock.lock_the_item(LOCK_EX | LOCK_NB,
"locking the cache item in purge() for: " + item.second.d_name))
735 if (remove(item.second.d_name.c_str()) != 0) {
736 ERROR(
"Error removing " + item.second.d_name +
" from cache directory in purge() - " + get_errno());
741 removed_bytes += item.second.d_size;
746 if (!update_cache_info_size(ci_size - removed_bytes)) {
747 ERROR(
"Error updating the cache_info size in purge() - " + get_errno());
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.