bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
HttpdCatalog.cc
1// HttpdCatalog.cc
2// -*- mode: c++; c-basic-offset:4 -*-
3//
4// This file is part of BES httpd_catalog_module
5//
6// Copyright (c) 2018 OPeNDAP, Inc.
7// Author: Nathan Potter <ndp@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24// Please read the full copyright statement in the file COPYRIGHT_URI.
25//
26
27#include "config.h"
28
29#include <sys/types.h>
30#include <sys/stat.h>
31#include <dirent.h>
32
33#include <cstring>
34#include <cerrno>
35
36#include <sstream>
37#include <cassert>
38
39#include <memory>
40#include <algorithm>
41#include <map>
42
43#include <BESUtil.h>
44#include <BESCatalogUtils.h>
45#include <BESCatalogEntry.h>
46
47#include <CatalogNode.h>
48#include <CatalogItem.h>
49
50#include <BESInfo.h>
51#include <BESContainerStorageList.h>
52#include <BESFileContainerStorage.h>
53#include <BESLog.h>
54
55#include <BESInternalError.h>
56#include <BESForbiddenError.h>
57#include <BESNotFoundError.h>
58#include <BESSyntaxUserError.h>
59
60#include <TheBESKeys.h>
61#include <BESDebug.h>
62
63#include "HttpdCatalogNames.h"
64#include "HttpdCatalog.h"
65#include "HttpdDirScraper.h"
66
67using namespace bes;
68using namespace std;
69
70#define prolog std::string("HttpdCatalog::").append(__func__).append("() - ")
71
72namespace httpd_catalog {
73
84HttpdCatalog::HttpdCatalog(const string &catalog_name) :
85 BESCatalog(catalog_name) {
86 bool found = false;
87 vector<string> httpd_catalogs;
88 TheBESKeys::TheKeys()->get_values(HTTPD_CATALOG_COLLECTIONS, httpd_catalogs,
89 found);
90 if (!found) {
91 throw BESInternalError(
92 string("The httpd_catalog module must define at least one catalog name using the key; '")
93 + HTTPD_CATALOG_COLLECTIONS + "'", __FILE__, __LINE__);
94 }
95
97 for (it = httpd_catalogs.begin(); it != httpd_catalogs.end(); it++) {
98 string catalog_entry = *it;
99 int index = catalog_entry.find(":");
100 if (index > 0) {
101 string name = catalog_entry.substr(0, index);
102 string url = catalog_entry.substr(index + 1);
103 BESDEBUG(MODULE,
104 prolog << "name: '" << name << "' url: " << url << endl);
105 d_httpd_catalogs.insert(pair<string, string>(name, url));
106 } else {
107 throw BESInternalError(
108 string("The configuration entry for the ")
109 + HTTPD_CATALOG_COLLECTIONS
110 + " was incorrectly formatted. entry: "
111 + catalog_entry, __FILE__, __LINE__);
112 }
113 }
114
115#if 0
116 string default_type_match_key = "BES.Catalog.catalog.TypeMatch";
117 string catalog_type_match_key = "BES.Catalog."+catalog_name+".TypeMatch";
118 string type_match_key = catalog_type_match_key;
119 vector<string> type_match_v;
120 TheBESKeys::TheKeys()->get_values(type_match_key, type_match_v, found);
121 if(!found) {
122 type_match_key = default_type_match_key;
123 TheBESKeys::TheKeys()->get_values(type_match_key, type_match_v, found);
124 if(!found) {
125 throw BESInternalError("ERROR: Failed to located either the '"+catalog_type_match_key+
126 "' or the '"+default_type_match_key+"' BES keys." , __FILE__, __LINE__);
127 }
128 }
129
130 for(it=type_match_v.begin(); it!=type_match_v.end(); it++) {
131 string typeMatch_entry = *it;
132 int index = typeMatch_entry.find(":");
133 if(index>0) {
134 string name = typeMatch_entry.substr(0,index);
135 string regex = typeMatch_entry.substr(index+1);
136 BESDEBUG(MODULE, prolog << "name: '" << name << "' regex: " << regex << endl);
137 d_typematch.insert( pair<string,string>(name,regex));
138 }
139 else {
140 throw BESInternalError(string("The configuration entry for the ") + type_match_key +
141 " was incorrectly formatted. entry: "+typeMatch_entry, __FILE__,__LINE__);
142 }
143 }
144#endif
145
146}
147
157HttpdCatalog::get_node(const string &ppath) const
158{
159 string time_now = BESUtil::get_time(0,false);
160 bes::CatalogNode *node;
161
162 string path = ppath;
163 while(path.size()>0 && path[0]=='/')
164 path = path.substr(1);
165
166 if(path.empty() || path=="/"){
167 node = new CatalogNode("/");
168 node->set_lmt(time_now);
169 node->set_catalog_name(HTTPD_CATALOG_NAME);
170 map<string, string>::const_iterator it = d_httpd_catalogs.begin();
171
172 while(it!=d_httpd_catalogs.end()){
173 CatalogItem *collection = new CatalogItem();
174 collection->set_name(it->first);
175 collection->set_type(CatalogItem::node);
176 node->add_node(collection);
177 it++;
178 }
179 }
180 else {
181 string remote_target_url = path_to_access_url(path);
182
183 HttpdDirScraper hds;
184 node = hds.get_node(remote_target_url,path);
185 node->set_lmt(time_now);
186 node->set_catalog_name(HTTPD_CATALOG_NAME);
187
188 }
189
190 return node;
191}
192
201string HttpdCatalog::path_to_access_url(const string &p) const
202{
203 // If the path starts with a slash, remove that
204 string path = (p.find('/') == 0) ? p.substr(1): p;
205 string::size_type i = path.find('/');
206 string collection = path.substr(0, i); // if 'i == string::npos', substr() returns all of 'path'
207
208 map<string,string>::const_iterator it = d_httpd_catalogs.find(collection);
209 if (it == d_httpd_catalogs.end())
210 throw BESNotFoundError("The httpd_catalog does not contain a collection named '" + collection + "'", __FILE__, __LINE__);
211
212 // passing string::npos to the first arg of substr() causes an out-of-range exception
213 string remote_relative_path = (i == string::npos) ? "": path.substr(i);
214
215 if(remote_relative_path == "/" || remote_relative_path.empty())
216 return it->second;
217 else
218 return BESUtil::pathConcat(it->second, remote_relative_path);
219
220#if 0
221 vector<string> path_elements = BESUtil::split(path);
222 BESDEBUG(MODULE, prolog << "path_elements.size(): " << path_elements.size() << " path: '" << path << "'"<< endl);
223
224 string collection = path_elements[0];
225 BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_httpd_catalogs.size(): "
226 << d_httpd_catalogs.size() << endl);
227
228 map<string,string>::const_iterator it = d_httpd_catalogs.find(collection);
229 if(it == d_httpd_catalogs.end()) {
230 throw BESNotFoundError("The httpd_catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
231 }
232 BESDEBUG(MODULE, prolog << "The httpd_catalog collection " << collection << " is valid." << endl);
233
234 string url = it->second;
235
236 string remote_relative_path = path.substr(collection.size()+1); // Broken here. HK-313 jhrg 1/24/19
237 BESDEBUG(MODULE, prolog << "remote_relative_path: " << remote_relative_path << endl);
238
239 string access_url;
240 if(remote_relative_path == "/" || remote_relative_path.empty())
241 access_url = url;
242 else
243 access_url = BESUtil::pathConcat(url,remote_relative_path);
244
245 BESDEBUG(MODULE, prolog << "remote_target_url: " << access_url << endl);
246
247 return access_url;
248#endif
249
250}
251
259void HttpdCatalog::dump(ostream &strm) const
260{
261 strm << BESIndent::LMarg << prolog << "(" << (void *) this << ")" << endl;
262 BESIndent::Indent();
263
264 strm << BESIndent::LMarg << "catalog utilities: " << endl;
265 BESIndent::Indent();
266 get_catalog_utils()->dump(strm);
267 BESIndent::UnIndent();
268 BESIndent::UnIndent();
269}
270
271} // namespace httpd_catalog
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition BESUtil.cc:1068
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:754
static std::string get_time(bool use_local_time=false)
Definition BESUtil.cc:1017
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
void set_type(item_type t)
Set the type for this item.
void set_name(const std::string &n)
Set the name of the item.
void set_lmt(std::string lmt)
Set the LMT for this node.
Definition CatalogNode.h:92
virtual bes::CatalogNode * get_node(const std::string &path) const
Produces the bes::CatalogNode for the string ppath.
HttpdCatalog(const std::string &catalog_name=HTTPD_CATALOG_NAME)
A catalog based on scraping the directory pages produced by httpd .
virtual void dump(std::ostream &strm) const
dumps information about this object
virtual std::string path_to_access_url(const std::string &path) const
Takes a path which begins with the name of an HttpdCatalog collection and returns the associated acce...
This class builds a BES CatalogNode response from an httpd-generated directory page.
STL iterator class.
STL iterator class.