bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
BESDapFunctionResponseCache.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of HYrax, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2016 OPeNDAP, Inc.
7// Author: Nathan David Potter <ndp@opendap.org>
8// James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#include "config.h"
27
28//#define DODS_DEBUG
29
30#include <cstdio>
31#include <unistd.h>
32#include <sys/stat.h>
33
34#include <iostream>
35#include <string>
36#include <fstream>
37#include <sstream>
38
39#ifdef HAVE_TR1_FUNCTIONAL
40#include <tr1/functional>
41#endif
42
43#include <libdap/DDS.h>
44#include <libdap/ConstraintEvaluator.h>
45#include <libdap/DDXParserSAX2.h>
46
47#include <libdap/XDRStreamMarshaller.h>
48#include <libdap/XDRStreamUnMarshaller.h>
49#include <libdap/XDRFileUnMarshaller.h>
50
51#include <libdap/D4StreamMarshaller.h>
52#include <libdap/D4StreamUnMarshaller.h>
53
54#include <libdap/Sequence.h> // We have to special-case these; see read_data_ddx()
55
56#include <libdap/debug.h>
57#include <libdap/mime_util.h> // for last_modified_time() and rfc_822_date()
58#include <libdap/util.h>
59
60#include "CacheTypeFactory.h"
61#include "CacheMarshaller.h"
62#include "CacheUnMarshaller.h"
63
64#include "BESDapFunctionResponseCache.h"
65#include "BESDapResponseBuilder.h"
66#include "BESInternalError.h"
67
68#include "BESUtil.h"
69#include "TheBESKeys.h"
70#include "BESLog.h"
71#include "BESDebug.h"
72
73#define DEBUG_KEY "response_cache"
74
75#ifdef HAVE_TR1_FUNCTIONAL
76#define HASH_OBJ std::tr1::hash
77#else
78#define HASH_OBJ std::hash
79#endif
80
81using namespace std;
82using namespace libdap;
83
84const string DATA_MARK = "--DATA:";
85
86// If the size of the constraint is larger then this value, don't cache the response.
87const unsigned int max_cacheable_ce_len = 4096;
88const unsigned int max_collisions = 50; // It's hard to believe this could happen
89
90const unsigned int default_cache_size = 20; // 20 GB
91const string default_cache_prefix = "rc";
92const string default_cache_dir = ""; // I'm making the default empty so that no key == no caching. jhrg 9.26.16
93
94const string BESDapFunctionResponseCache::PATH_KEY = "DAP.FunctionResponseCache.path";
95const string BESDapFunctionResponseCache::PREFIX_KEY = "DAP.FunctionResponseCache.prefix";
96const string BESDapFunctionResponseCache::SIZE_KEY = "DAP.FunctionResponseCache.size";
97
98BESDapFunctionResponseCache *BESDapFunctionResponseCache::d_instance = 0;
99bool BESDapFunctionResponseCache::d_enabled = true;
100
101unsigned long BESDapFunctionResponseCache::get_cache_size_from_config()
102{
103 bool found;
104 string size;
105 unsigned long size_in_megabytes = default_cache_size;
106 TheBESKeys::TheKeys()->get_value(SIZE_KEY, size, found);
107 if (found) {
108 BESDEBUG(DEBUG_KEY,
109 "BESDapFunctionResponseCache::getCacheSizeFromConfig(): Located BES key " << SIZE_KEY<< "=" << size << endl);
110 istringstream iss(size);
111 iss >> size_in_megabytes;
112 }
113
114 return size_in_megabytes;
115}
116
117string BESDapFunctionResponseCache::get_cache_prefix_from_config()
118{
119 bool found;
120 string prefix = default_cache_prefix;
121 TheBESKeys::TheKeys()->get_value(PREFIX_KEY, prefix, found);
122 if (found) {
123 BESDEBUG(DEBUG_KEY,
124 "BESDapFunctionResponseCache::getCachePrefixFromConfig(): Located BES key " << PREFIX_KEY<< "=" << prefix << endl);
125 prefix = BESUtil::lowercase(prefix);
126 }
127
128 return prefix;
129}
130
131// If the cache prefix is the empty string, the cache is turned off.
132string BESDapFunctionResponseCache::get_cache_dir_from_config()
133{
134 bool found;
135
136 string cacheDir = default_cache_dir;
137 TheBESKeys::TheKeys()->get_value(PATH_KEY, cacheDir, found);
138 if (found) {
139 BESDEBUG(DEBUG_KEY,
140 "BESDapFunctionResponseCache::getCacheDirFromConfig(): Located BES key " << PATH_KEY<< "=" << cacheDir << endl);
141 }
142
143 return cacheDir;
144}
145
164BESDapFunctionResponseCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
165{
166 if (d_enabled && d_instance == 0) {
167 if (!cache_dir.empty() && dir_exists(cache_dir)) {
168 d_instance = new BESDapFunctionResponseCache(cache_dir, prefix, size);
169 d_enabled = d_instance->cache_enabled();
170 if(!d_enabled){
171 delete d_instance;
172 d_instance = NULL;
173 BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
174 "Cache is DISABLED"<< endl);
175 }
176 else {
177 #ifdef HAVE_ATEXIT
178 atexit(delete_instance);
179 #endif
180 BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
181 "Cache is ENABLED"<< endl);
182 }
183 }
184 }
185
186 BESDEBUG(DEBUG_KEY,
187 "BESDapFunctionResponseCache::get_instance(dir,prefix,size) - d_instance: " << d_instance << endl);
188
189 return d_instance;
190}
191
193BESDapFunctionResponseCache::get_instance()
194{
195 if (d_enabled && d_instance == 0) {
196 string cache_dir = get_cache_dir_from_config();
197 if (!cache_dir.empty() && dir_exists(cache_dir)) {
198 d_instance = new BESDapFunctionResponseCache(get_cache_dir_from_config(), get_cache_prefix_from_config(),
199 get_cache_size_from_config());
200 d_enabled = d_instance->cache_enabled();
201 if(!d_enabled){
202 delete d_instance;
203 d_instance = NULL;
204 BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
205 "Cache is DISABLED"<< endl);
206 }
207 else {
208 #ifdef HAVE_ATEXIT
209 atexit(delete_instance);
210 #endif
211 BESDEBUG("cache", "BESDapFunctionResponseCache::"<<__func__ << "() - " <<
212 "Cache is ENABLED"<< endl);
213 }
214 }
215 }
216
217 BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::get_instance() - d_instance: " << (void *) d_instance << endl);
218
219 return d_instance;
220}
222
232bool BESDapFunctionResponseCache::is_valid(const string &cache_file_name, const string &dataset)
233{
234 // If the cached response is zero bytes in size, it's not valid. This is true
235 // because a DAP data object, even if it has no data still has a metadata part.
236 // jhrg 10/20/15
237
238 off_t entry_size = 0;
239 time_t entry_time = 0;
240 struct stat buf;
241 if (stat(cache_file_name.c_str(), &buf) == 0) {
242 entry_size = buf.st_size;
243 entry_time = buf.st_mtime;
244 }
245 else {
246 return false;
247 }
248
249 if (entry_size == 0) return false;
250
251 time_t dataset_time = entry_time;
252 if (stat(dataset.c_str(), &buf) == 0) {
253 dataset_time = buf.st_mtime;
254 }
255
256 // Trick: if the d_dataset is not a file, stat() returns error and
257 // the times stay equal and the code uses the cache entry.
258
259 // TODO Fix this so that the code can get a LMT from the correct handler.
260 if (dataset_time > entry_time) return false;
261
262 return true;
263}
264
265string BESDapFunctionResponseCache::get_resource_id(DDS *dds, const string &constraint)
266{
267 return dds->filename() + "#" + constraint;
268}
269
270bool BESDapFunctionResponseCache::can_be_cached(DDS *dds, const string &constraint)
271{
272 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " constraint + dds->filename() length: "
273 << constraint.size() + dds->filename().size() << endl);
274
275 return (constraint.size() + dds->filename().size() <= max_cacheable_ce_len);
276}
277
285string BESDapFunctionResponseCache::get_hash_basename(const string &resource_id)
286{
287 // Get a hash function for strings
288 HASH_OBJ<string> str_hash;
289 size_t hashValue = str_hash(resource_id);
290 stringstream hashed_id;
291 hashed_id << hashValue;
292 string cache_file_name = get_cache_directory();
293 cache_file_name.append("/").append(get_cache_file_prefix()).append(hashed_id.str());
294
295 return cache_file_name;
296}
297
319DDS *
320BESDapFunctionResponseCache::get_or_cache_dataset(DDS *dds, const string &constraint)
321{
322 // Build the response_id. Since the response content is a function of both the dataset AND the constraint,
323 // glue them together to get a unique id for the response.
324 string resourceId = dds->filename() + "#" + constraint;
325
326 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resourceId: '" << resourceId << "'" << endl);
327
328 // Get a hash function for strings
329 HASH_OBJ<string> str_hash;
330
331 // Use the hash function to hash the resourceId.
332 size_t hashValue = str_hash(resourceId);
333 stringstream hashed_id;
334 hashed_id << hashValue;
335
336 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " hashed_id: '" << hashed_id.str() << "'" << endl);
337
338 // Use the parent class's get_cache_file_name() method and its associated machinery to get the file system path for the cache file.
339 // We store it in a variable called basename because the value is later extended as part of the collision avoidance code.
340 string cache_file_name = BESFileLockingCache::get_cache_file_name(hashed_id.str(), false);
341
342 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cache_file_name: '" << cache_file_name << "'" << endl);
343
344 // Does the cached dataset exist? if yes, ret_dds points to it. If no,
345 // cache_file_name is updated to be the correct name for write_dataset_
346 // to_cache().
347 DDS *ret_dds = 0;
348 if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
349 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file: " << cache_file_name << endl);
350 ret_dds->filename(dds->filename());
351 }
352 else if ((ret_dds = write_dataset_to_cache(dds, resourceId, constraint, cache_file_name))) {
353 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data written to cache file: " << cache_file_name << endl);
354 }
355 // get_read_lock() returns immediately if the file does not exist,
356 // but blocks waiting to get a shared lock if the file does exist.
357 else if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
358 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file (2nd try): " << cache_file_name << endl);
359 ret_dds->filename(dds->filename());
360 }
361
362 BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Used cache_file_name: " << cache_file_name << " for resource ID: " << resourceId << endl);
363
364 return ret_dds;
365}
366
383DDS *
384BESDapFunctionResponseCache::load_from_cache(const string &resource_id, string &cache_file_name)
385{
386 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resource_id: " << resource_id << endl);
387
388 DDS *cached_dds = 0; // nullptr
389
390 unsigned long suffix_counter = 0;
391 bool keep_looking = true;
392 do {
393 if (suffix_counter > max_collisions) {
394 stringstream ss;
395 ss << "Cache error! There are " << suffix_counter << " hash collisions for the resource '" << resource_id
396 << "' And that is a bad bad thing.";
397 throw BESInternalError(ss.str(), __FILE__, __LINE__);
398 }
399
400 // Build cache_file_name and cache_id_file_name from baseName
401 stringstream cfname;
402 cfname << cache_file_name << "_" << suffix_counter++;
403
404 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " candidate cache_file_name: " << cfname.str() << endl);
405
406 int fd; // unused
407 if (!get_read_lock(cfname.str(), fd)) {
408 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " !get_read_lock(cfname.str(), fd): " << fd << endl);
409 // If get_read_lock() returns false, that means the cache file doesn't exist.
410 // Set keep_looking to false and exit the loop.
411 keep_looking = false;
412 // Set the cache file name to the current value of cfname.str() - this is
413 // the name that does not exist and should be used by write_dataset_to_cache()
414 cache_file_name = cfname.str();
415 }
416 else {
417 // If get_read_lock() returns true, the cache file exists; look and see if
418 // it's the correct one. If so, cached_dds will be true and we exit.
419
420 // Read the first line from the cache file and see if it matches the resource id
421 ifstream cache_file_istream(cfname.str().c_str());
422 char line[max_cacheable_ce_len];
423 cache_file_istream.getline(line, max_cacheable_ce_len);
424 string cached_resource_id;
425 cached_resource_id.assign(line);
426
427 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cached_resource_id: " << cached_resource_id << endl);
428
429 if (cached_resource_id.compare(resource_id) == 0) {
430 // WooHoo Cache Hit!
431 BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::load_from_cache() - Cache Hit!" << endl);
432
433 // non-null value value for cached_dds will exit the loop
434 cached_dds = read_cached_data(cache_file_istream);
435 }
436
437 unlock_and_close(cfname.str());
438 }
439 } while (!cached_dds && keep_looking);
440
441 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Cache " << (cached_dds!=0?"HIT":"MISS") << " for: " << cache_file_name << endl);
442
443 return cached_dds;
444}
445
450DDS *
451BESDapFunctionResponseCache::read_cached_data(istream &cached_data)
452{
453 // Build a CachedSequence; all other types are as BaseTypeFactory builds
454 CacheTypeFactory factory;
455 DDS *fdds = new DDS(&factory);
456
457 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - BEGIN" << endl);
458
459 // Parse the DDX; throw an exception on error.
460 DDXParser ddx_parser(fdds->get_factory());
461
462 // Parse the DDX, reading up to and including the next boundary.
463 // Return the CID for the matching data part
464 string data_cid; // Not used. jhrg 5/5/16
465 try {
466 ddx_parser.intern_stream(cached_data, fdds, data_cid, DATA_MARK);
467 }
468 catch (Error &e) { // Catch the libdap::Error and throw BESInternalError
469 throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
470 }
471
472 CacheUnMarshaller um(cached_data);
473
474 for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
475 (*i)->deserialize(um, fdds);
476 }
477
478 // mark everything as read. And 'to send.' That is, make sure that when a response
479 // is retrieved from the cache, all of the variables are marked as 'to be sent.'
480 for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
481 (*i)->set_read_p(true);
482 (*i)->set_send_p(true);
483
484 // For Sequences, deserialize() will update the 'current row number,' which
485 // is the correct behavior but which will also confuse serialize(). Reset the
486 // current row number here so serialize() can start working from row 0. jhrg 5/13/16
487 // Note: Now uses the recursive version of reset_row_number. jhrg 5/16/16
488 if ((*i)->type() == dods_sequence_c) {
489 static_cast<Sequence*>(*i)->reset_row_number(true);
490 }
491 }
492
493 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - END." << endl);
494
495 fdds->set_factory(0); // Make sure there is no left-over cruft in the returned DDS
496
497 return fdds;
498}
499
515DDS *
516BESDapFunctionResponseCache::write_dataset_to_cache(DDS *dds, const string &resource_id, const string &func_ce,
517 const string &cache_file_name)
518{
519 BESDEBUG(DEBUG_KEY, __FUNCTION__ << " BEGIN " << resource_id << ": "
520 << func_ce << ": " << cache_file_name << endl);
521
522 DDS *fdds = 0; // will hold the return value
523
524 int fd;
525 if (create_and_lock(cache_file_name, fd)) {
526 // If here, the cache_file_name could not be locked for read access;
527 // try to build it. First make an empty files and get an exclusive lock on them.
528 BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Caching " << resource_id << ", func_ce: " << func_ce << endl);
529
530 // Get an output stream directed at the locked cache file
531 ofstream cache_file_ostream(cache_file_name.c_str(), ios::out|ios::app|ios::binary);
532 if (!cache_file_ostream.is_open())
533 throw BESInternalError("Could not open '" + cache_file_name + "' to write cached response.", __FILE__, __LINE__);
534
535 try {
536 // Write the resource_id to the first line of the cache file
537 cache_file_ostream << resource_id << endl;
538
539 // Evaluate the function
540 ConstraintEvaluator func_eval;
541 func_eval.parse_constraint(func_ce, *dds);
542 fdds = func_eval.eval_function_clauses(*dds);
543
544 fdds->print_xml_writer(cache_file_ostream, true, "");
545
546 cache_file_ostream << DATA_MARK << endl;
547
548 // Define the scope of the StreamMarshaller because for some types it will use
549 // a child thread to send data and it's dtor will wait for that thread to complete.
550 // We want that before we close the output stream (cache_file_stream) jhrg 5/6/16
551 {
552 ConstraintEvaluator new_ce;
553 CacheMarshaller m(cache_file_ostream);
554
555 for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
556 if ((*i)->send_p()) {
557 (*i)->serialize(new_ce, *fdds, m, false);
558 }
559 }
560 }
561
562 // Change the exclusive locks on the new file to a shared lock. This keeps
563 // other processes from purging the new file and ensures that the reading
564 // process can use it.
566
567 // Now update the total cache size info and purge if needed. The new file's
568 // name is passed into the purge method because this process cannot detect its
569 // own lock on the file.
570 unsigned long long size = update_cache_info(cache_file_name);
571 if (cache_too_big(size)) update_and_purge(cache_file_name);
572
573 unlock_and_close(cache_file_name);
574 }
575 catch (...) {
576 // Bummer. There was a problem doing The Stuff. Now we gotta clean up.
577 cache_file_ostream.close();
578 this->purge_file(cache_file_name);
579 unlock_and_close(cache_file_name);
580 throw;
581 }
582 }
583
584 return fdds;
585}
586
Cache the results from server functions.
virtual libdap::DDS * get_or_cache_dataset(libdap::DDS *dds, const std::string &constraint)
Return a DDS loaded with data that can be serialized back to a client.
virtual void unlock_and_close(const std::string &target)
std::string get_cache_directory() const
virtual unsigned long long update_cache_info(const std::string &target)
Update the cache info file to include 'target'.
virtual bool create_and_lock(const std::string &target, int &fd)
Create a file in the cache and lock it for write access.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
virtual bool get_read_lock(const std::string &target, int &fd)
Get a read-only lock on the file if it exists.
static bool dir_exists(const std::string &dir)
std::string get_cache_file_prefix() const
virtual void purge_file(const std::string &file)
Purge a single file from the cache.
virtual bool cache_too_big(unsigned long long current_size) const
look at the cache size; is it too large? Look at the cache size and see if it is too big.
virtual void update_and_purge(const std::string &new_file)
Purge files from the cache.
virtual std::string get_cache_file_name(const std::string &src, bool mangle=true)
exception thrown if internal error encountered
static std::string lowercase(const std::string &s)
Definition BESUtil.cc:257
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85