bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
AllowedHosts.cc
1// RemoteAccess.cc
2
3// -*- mode: c++; c-basic-offset:4 -*-
4
5// This file is part of the OPeNDAP Back-End Server (BES)
6// and creates an allowed hosts list of which systems that may be
7// accessed by the server as part of its routine operation.
8
9// Copyright (c) 2018 OPeNDAP, Inc.
10// Author: Nathan D. Potter <ndp@opendap.org>
11//
12// This library is free software; you can redistribute it and/or
13// modify it under the terms of the GNU Lesser General Public
14// License as published by the Free Software Foundation; either
15// version 2.1 of the License, or (at your option) any later version.
16//
17// This library is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20// Lesser General Public License for more details.
21//
22// You should have received a copy of the GNU Lesser General Public
23// License along with this library; if not, write to the Free Software
24// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25//
26// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
27
28#include "config.h"
29
30#include <sstream>
31
32#include "BESUtil.h"
33#include "BESCatalogList.h"
34#include "BESCatalogUtils.h"
35#include "BESRegex.h"
36#include "TheBESKeys.h"
37#include "BESDebug.h"
38
39#include "HttpNames.h"
40#include "url_impl.h"
41
42#include "AllowedHosts.h"
43
44using namespace std;
45
46#define MODULE "ah"
47#define prolog string("AllowedHosts::").append(__func__).append("() - ")
48
49namespace http {
50
51AllowedHosts::AllowedHosts() {
52 bool found;
53 // No exceptions in a constructor. Check d_allowed_hosts when it's used. jhrg 2/20.25
54 TheBESKeys::TheKeys()->get_values(ALLOWED_HOSTS_BES_KEY, d_allowed_hosts, found);
55}
56
58AllowedHosts::theHosts() {
59 static AllowedHosts instance;
60 return &instance;
61}
62
73bool AllowedHosts::is_allowed(const shared_ptr<http::url> &candidate_url) {
74 string error_msg;
75 return is_allowed(*candidate_url, error_msg);
76}
77
78bool AllowedHosts::is_allowed(const shared_ptr<http::url> &candidate_url, std::string &why_not) {
79 return AllowedHosts::is_allowed(*candidate_url, why_not);
80}
81
82bool AllowedHosts::is_allowed(const http::url &candidate_url) {
83 string error_msg;
84 return is_allowed(candidate_url, error_msg);
85}
86
87// Change this so that it does not throw an exception for the last case. OR, it always throws.
88// jhrg 11/22/24 Done jhrg 2/20/25
89bool AllowedHosts::is_allowed(const http::url &candidate_url, std::string &why_not) {
90 BESDEBUG(MODULE, prolog << "BEGIN candidate_url: " << candidate_url.str() << endl);
91
92 if (d_allowed_hosts.empty()) {
93 throw BESInternalError("The allowed hosts key, '" + string(ALLOWED_HOSTS_BES_KEY)
94 + "' has not been configured.", __FILE__, __LINE__);
95 }
96
97 bool isAllowed = false;
98
99 // Special case: This allows any file: URL to pass if the URL starts with the default
100 // catalog's path.
101 if (candidate_url.protocol() == FILE_PROTOCOL) {
102 // Ensure that the file path starts with the catalog root dir.
103 // We know that when a file URL is parsed by http::url it stores everything in after the "file://" mark in
104 // the path, as there is no hostname.
105 string file_path = candidate_url.path();
106 BESDEBUG(MODULE, prolog << " file_path: '" << file_path <<
107 "' (length: " << file_path.size() << " size: " << file_path.size() << ")" <<endl);
108 // Get the BES Catalog
109 BESCatalogList *bcl = BESCatalogList::TheCatalogList();
110 string default_catalog_name = bcl->default_catalog_name();
111 BESDEBUG(MODULE, prolog << "Searching for catalog named: '" << default_catalog_name << "'" << endl);
112 BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
113 if (!bcat) {
114 string error_msg = "INTERNAL_ERROR: Unable to locate default catalog. Check BES configuration.";
115 throw BESInternalError(error_msg, __FILE__, __LINE__);
116 }
117
118 string catalog_root = bcat->get_root();
119 BESDEBUG(MODULE, prolog << "catalog_root: '" << catalog_root <<
120 "' (length: " << catalog_root.size() << " size: " << catalog_root.size() << ")" << endl);
121
122 string relative_path;
123 if (file_path[0] == '/') {
124 if (file_path.size() < catalog_root.size()) {
125 // Upward traversal is not allowed (specified resource path is shorter than data root path)
126 why_not = "Path is out of scope from configuration.";
127 isAllowed = false;
128 }
129 else {
130 size_t ret = file_path.find(catalog_root);
131 BESDEBUG(MODULE, prolog << "file_path.find(catalog_root): " << ret << endl);
132 isAllowed = (ret == 0);
133 relative_path = file_path.substr(catalog_root.size());
134 BESDEBUG(MODULE, prolog << "relative_path: " << relative_path << endl);
135 BESDEBUG(MODULE, prolog << "isAllowed: " << (isAllowed?"true":"false") << endl);
136 }
137 }
138 else {
139 BESDEBUG(MODULE, prolog << "Relative path detected");
140 relative_path = file_path;
141 isAllowed = true;
142 }
143
144 // string::find() returns 0 if the submitted path begins with the catalog root.
145 // And since we are just looking at the catalog.root as a prefix of the resource
146 // name we only allow access to the resource for an exact match.
147 if (isAllowed) {
148 // If we stop adding a '/' to file_path values that don't begin with one
149 // then we need to detect the use of the relative path here
150 bool follow_sym_links = bcat->get_catalog_utils()->follow_sym_links();
151 try {
152 BESUtil::check_path(relative_path, catalog_root, follow_sym_links);
153 }
154 catch (const BESError &e) {
155 why_not = e.get_message();
156 isAllowed = false;
157 }
158 }
159 }
160 else if (candidate_url.protocol() == HTTPS_PROTOCOL || candidate_url.protocol() == HTTP_PROTOCOL) {
161
162 isAllowed = candidate_url.is_trusted() || check(candidate_url.str());
163 why_not = "The URL is not on the AllowedHosts list and is not a known trusted URL";
164 }
165 else {
166 why_not = "The URL utilizes an unsupported protocol:" + candidate_url.protocol();
167 isAllowed = false;
168 }
169
170 return isAllowed;
171}
172
173bool AllowedHosts::check(const std::string &url) const {
174 bool isAllowed=false;
175 for (auto const &a_regex_pattern: d_allowed_hosts) {
176 BESRegex reg_expr(a_regex_pattern.c_str());
177 int match_result = reg_expr.match(url.c_str(), (int)url.size());
178 if (match_result >= 0) {
179 if (match_result == static_cast<int>(url.size())) {
180 BESDEBUG(MODULE, prolog << "FULL MATCH. pattern: " << a_regex_pattern << " url: " << url << endl);
181 isAllowed = true;;
182 } else {
183 BESDEBUG(MODULE, prolog << "No Match. pattern: " << a_regex_pattern << " url: " << url << endl);
184 }
185 }
186 }
187
188 return isAllowed;
189}
190
191} // namespace http
virtual std::string default_catalog_name() const
The name of the default catalog.
virtual std::string get_root() const =0
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition BESUtil.cc:385
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Can a given URL be dereferenced given the BES's configuration?
Parse a URL into the protocol, host, path and query parts.
Definition url_impl.h:44
utility class for the HTTP catalog module
Definition TheBESKeys.h:51