bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
EffectiveUrlCache.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES http package, part of the Hyrax data server.
4
5// Copyright (c) 2020 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24// Authors:
25// ndp Nathan Potter <ndp@opendap.org>
26
27#include "config.h"
28
29#include <mutex>
30
31#include <sstream>
32#include <string>
33
34#include "TheBESKeys.h"
35#include "BESDebug.h"
36#include "BESStopWatch.h"
37#include "BESUtil.h"
38#include "CurlUtils.h"
39#include "HttpError.h"
40#include "HttpNames.h"
41#include "EffectiveUrl.h"
42#include "EffectiveUrlCache.h"
43
44using namespace std;
45
46constexpr auto MODULE = "euc";
47constexpr auto MODULE_TIMER = "euc:timer";
48constexpr auto MODULE_DUMPER = "euc:dump";
49
50#define prolog std::string("EffectiveUrlCache::").append(__func__).append("() - ")
51
52namespace http {
53
59shared_ptr <EffectiveUrl> EffectiveUrlCache::get_cached_eurl(string const &url_key) {
60 shared_ptr<EffectiveUrl> effective_url(nullptr);
61 auto it = d_effective_urls.find(url_key);
62 if (it != d_effective_urls.end()) {
63 effective_url = (*it).second;
64 }
65 return effective_url;
66}
67
75shared_ptr <EffectiveUrl> EffectiveUrlCache::get_effective_url(shared_ptr <url> source_url) {
76
77 BESDEBUG(MODULE, prolog << "BEGIN url: " << source_url->str() << endl);
78 BESDEBUG(MODULE_DUMPER, prolog << "dump: " << endl << dump() << endl);
79
80 // Lock access to the cache, the d_effective_urls map. Released when the lock goes out of scope.
81 std::lock_guard<std::mutex> lock_me(d_cache_lock_mutex);
82
83 if (!is_enabled()) {
84 BESDEBUG(MODULE, prolog << "CACHE IS DISABLED." << endl);
85 return shared_ptr<EffectiveUrl>(new EffectiveUrl(source_url));
86 }
87
88 // if it's not an HTTP url there is nothing to cache.
89 if (source_url->str().find(HTTP_PROTOCOL) != 0 && source_url->str().find(HTTPS_PROTOCOL) != 0) {
90 BESDEBUG(MODULE, prolog << "END Not an HTTP request, SKIPPING." << endl);
91 return shared_ptr<EffectiveUrl>(new EffectiveUrl(source_url));
92 }
93
94 if (!d_skip_regex)
95 set_skip_regex();
96
97 if (d_skip_regex) {
98 size_t match_length = 0;
99 match_length = d_skip_regex->match(source_url->str().c_str(), (int) source_url->str().size());
100 if (match_length == source_url->str().size()) {
101 BESDEBUG(MODULE, prolog << "END Candidate url matches the "
102 "no_redirects_regex_pattern [" << d_skip_regex->pattern() <<
103 "][match_length=" << match_length << "] SKIPPING." << endl);
104 return shared_ptr<EffectiveUrl>(new EffectiveUrl(source_url));
105 }
106 BESDEBUG(MODULE, prolog << "Candidate url: '" << source_url->str()
107 << "' does NOT match the skip_regex pattern [" << d_skip_regex->pattern() << "]"
108 << endl);
109 } else {
110 BESDEBUG(MODULE, prolog << "The cache_effective_urls_skip_regex() was NOT SET " << endl);
111 }
112
113 shared_ptr<EffectiveUrl> effective_url = get_cached_eurl(source_url->str());
114 bool retrieve_and_cache = !effective_url || effective_url->is_expired();
115
116 // It not found or expired, (re)load.
117 if (retrieve_and_cache) {
118 BESDEBUG(MODULE, prolog << "Acquiring effective URL for " << source_url->str() << endl);
119 {
120 BES_STOPWATCH_START(MODULE_TIMER, prolog + "Retrieve and cache effective url for source url: " + source_url->str());
121 try {
122 // This code throws an HttpError exception if there is a problem.
123 effective_url = curl::get_redirect_url(source_url);
124 }
125 catch (http::HttpError &http_error) {
126 string err_msg = prolog + "Hyrax encountered a Service Chaining Error while "
127 "attempting to retrieve a redirect URL.\n"
128 "This is most likely problem with TEA, the AWS URL "
129 "signing service.\n" + http_error.get_message();
130 http_error.set_message(err_msg);
131 throw;
132 }
133 }
134 BESDEBUG(MODULE, prolog << " source_url: " << source_url->str() << " ("
135 << (source_url->is_trusted() ? "" : "NOT ") << "trusted)" << endl);
136 BESDEBUG(MODULE, prolog << "effective_url: " << effective_url->dump() << " ("
137 << (source_url->is_trusted() ? "" : "NOT ") << "trusted)" << endl);
138
139 d_effective_urls[source_url->str()] = effective_url;
140
141 BESDEBUG(MODULE, prolog << "Updated record for " << source_url->str() << " cache size: "
142 << d_effective_urls.size() << endl);
143
144 // Since we don't want there to be a concurrency issue when we release the lock, we don't
145 // return the instance of shared_ptr<EffectiveUrl> that we placed in the cache. Rather
146 // we make a clone and return that. It will have its own lifecycle independent of
147 // the instance we placed in the cache - it can be modified and the one in the cache
148 // is unchanged. Trusted state was established from source_url when effective_url was
149 // created in curl::retrieve_effective_url()
150 effective_url = make_shared<EffectiveUrl>(effective_url);
151 } else {
152 // Here we have a !expired instance of a shared_ptr<EffectiveUrl> retrieved from the cache.
153 // Now we need to make a copy to return, inheriting trust from the requesting URL.
154 effective_url = make_shared<EffectiveUrl>(effective_url, source_url->is_trusted());
155 }
156
157 BESDEBUG(MODULE_DUMPER, prolog << "dump: " << endl << dump() << endl);
158 BESDEBUG(MODULE, prolog << "END" << endl);
159
160 return effective_url;
161}
162
166bool EffectiveUrlCache::is_enabled() {
167 // The first time here, the value of d_enabled is -1. Once we check for it in TheBESKeys
168 // The value will be 0 (false) or 1 (true) and TheBESKeys will not be checked again.
169 if (d_enabled < 0) {
170 string value = TheBESKeys::TheKeys()->read_string_key(HTTP_CACHE_EFFECTIVE_URLS_KEY, "false");
171 d_enabled = BESUtil::lowercase(value) == "true";
172 }
173 BESDEBUG(MODULE, prolog << "d_enabled: " << (d_enabled ? "true" : "false") << endl);
174 return d_enabled;
175}
176
177void EffectiveUrlCache::set_skip_regex() {
178 if (!d_skip_regex) {
179 string pattern = TheBESKeys::TheKeys()->read_string_key(HTTP_CACHE_EFFECTIVE_URLS_SKIP_REGEX_KEY, "");
180 if (!pattern.empty()) {
181 d_skip_regex.reset(new BESRegex(pattern.c_str()));
182 }
183 BESDEBUG(MODULE, prolog << "d_skip_regex: "
184 << (d_skip_regex ? d_skip_regex->pattern() : "Value has not been set.") << endl);
185 }
186}
187
192void EffectiveUrlCache::dump(ostream &strm) const {
193 strm << BESIndent::LMarg << prolog << "(this: " << (void *) this << ")" << endl;
194 BESIndent::Indent();
195 strm << BESIndent::LMarg << "d_skip_regex: " << (d_skip_regex ? d_skip_regex->pattern() : "WAS NOT SET") << endl;
196 if (!d_effective_urls.empty()) {
197 strm << BESIndent::LMarg << "effective url list:" << endl;
198 BESIndent::Indent();
199 for (auto const &i: d_effective_urls) {
200 strm << BESIndent::LMarg << i.first << " --> " << i.second->str();
201 }
202 BESIndent::UnIndent();
203 } else {
204 strm << BESIndent::LMarg << "effective url list: EMPTY" << endl;
205 }
206 BESIndent::UnIndent();
207}
208
209} // namespace http
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
void set_message(const std::string &msg)
set the error message for this exception
Definition BESError.h:108
static std::string lowercase(const std::string &s)
Definition BESUtil.cc:257
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
static std::string read_string_key(const std::string &key, const std::string &default_value)
Read a string-valued key from the bes.conf file.
void dump(std::ostream &strm) const override
dumps information about this object
std::shared_ptr< EffectiveUrl > get_effective_url(std::shared_ptr< url > source_url)
utility class for the HTTP catalog module
Definition TheBESKeys.h:51