bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
BESCatalogUtils.cc
1// BESCatalogUtils.cc
2
3// This file is part of bes, A C++ back-end server implementation framework
4// for the OPeNDAP Data Access Protocol.
5
6// Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7// Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact University Corporation for Atmospheric Research at
24// 3080 Center Green Drive, Boulder, CO 80301
25
26// (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27// Please read the full copyright statement in the file COPYRIGHT_UCAR.
28//
29// Authors:
30// pwest Patrick West <pwest@ucar.edu>
31// jgarcia Jose Garcia <jgarcia@ucar.edu>
32
33#include "config.h"
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <dirent.h>
38
39#include <cerrno>
40#include <iostream>
41#include <sstream>
42#include <list>
43#include <cstring>
44
45#include "BESCatalogUtils.h"
46#include "BESCatalogList.h"
47#include "TheBESKeys.h"
48#include "BESInternalError.h"
49#include "BESSyntaxUserError.h"
50#include "BESNotFoundError.h"
51#include "BESRegex.h"
52#include "BESUtil.h"
53#include "BESInfo.h"
54#include "BESContainerStorageList.h"
55#include "BESContainerStorage.h"
56#include "BESCatalogEntry.h"
57
58using namespace std;
59
83BESCatalogUtils::BESCatalogUtils(const string &n, bool strict) :
84 d_name(n), d_follow_syms(false)
85{
86 string key = "BES.Catalog." + n + ".RootDirectory";
87 bool found = false;
88 TheBESKeys::TheKeys()->get_value(key, d_root_dir, found);
89 if (strict && (!found || d_root_dir == "")) {
90 string s = key + " not defined in BES configuration file";
91 throw BESSyntaxUserError(s, __FILE__, __LINE__);
92 }
93
94 if(d_root_dir != "UNUSED"){
95 // TODO access() or stat() would test for existence faster. jhrg 2.25.18
96 DIR *dip = opendir(d_root_dir.c_str());
97 if (dip == NULL) {
98 string serr = "BESCatalogDirectory - root directory " + d_root_dir + " does not exist";
99 throw BESNotFoundError(serr, __FILE__, __LINE__);
100 }
101 closedir(dip);
102 }
103
104 found = false;
105 key = (string) "BES.Catalog." + n + ".Exclude";
106 vector<string> vals;
107 TheBESKeys::TheKeys()->get_values(key, vals, found);
108 vector<string>::iterator ei = vals.begin();
109 vector<string>::iterator ee = vals.end();
110 for (; ei != ee; ei++) {
111 string e_str = (*ei);
112 if (!e_str.empty() && e_str != ";") BESUtil::explode(';', e_str, d_exclude);
113 }
114
115 key = (string) "BES.Catalog." + n + ".Include";
116 vals.clear();
117 TheBESKeys::TheKeys()->get_values(key, vals, found);
118 vector<string>::iterator ii = vals.begin();
119 vector<string>::iterator ie = vals.end();
120 for (; ii != ie; ii++) {
121 string i_str = (*ii);
122 if (!i_str.empty() && i_str != ";") BESUtil::explode(';', i_str, d_include);
123 }
124
125 key = "BES.Catalog." + n + ".TypeMatch";
126 list<string> match_list;
127 vals.clear();
128 TheBESKeys::TheKeys()->get_values(key, vals, found);
129 if (strict && (!found || vals.size() == 0)) {
130 string s = key + " not defined in key file";
131 throw BESInternalError(s, __FILE__, __LINE__);
132 }
133 vector<string>::iterator vi = vals.begin();
134 vector<string>::iterator ve = vals.end();
135 for (; vi != ve; vi++) {
136 BESUtil::explode(';', (*vi), match_list);
137 }
138
139 list<string>::iterator mli = match_list.begin();
140 list<string>::iterator mle = match_list.end();
141 for (; mli != mle; mli++) {
142 if (!((*mli).empty()) && *(mli) != ";") {
143 list<string> amatch;
144 BESUtil::explode(':', (*mli), amatch);
145 if (amatch.size() != 2) {
146 string s = (string) "Catalog type match malformed, " + "looking for type:regexp;[type:regexp;]";
147 throw BESInternalError(s, __FILE__, __LINE__);
148 }
149 list<string>::iterator ami = amatch.begin();
150 handler_regex newval;
151 newval.handler = (*ami);
152 ami++;
153 newval.regex = (*ami);
154 d_match_list.push_back(newval);
155 }
156 }
157
158 key = (string) "BES.Catalog." + n + ".FollowSymLinks";
159 string s_str;
160 TheBESKeys::TheKeys()->get_value(key, s_str, found);
161 s_str = BESUtil::lowercase(s_str);
162 if (s_str == "yes" || s_str == "on" || s_str == "true") {
163 d_follow_syms = true;
164 }
165}
166
178bool BESCatalogUtils::include(const string &inQuestion) const
179{
180 bool toInclude = false;
181
182 // First check the file against the include list. If the file should be
183 // included then check the exclude list to see if there are exceptions
184 // to the include list.
185 if (d_include.size() == 0) {
186 toInclude = true;
187 }
188 else {
189 list<string>::const_iterator i_iter = d_include.begin();
190 list<string>::const_iterator i_end = d_include.end();
191 for (; i_iter != i_end; i_iter++) {
192 string reg = *i_iter;
193 if (!reg.empty()) {
194 try {
195 // must match exactly, meaning result is = to length of string
196 // in question
197 BESRegex reg_expr(reg.c_str());
198 if (reg_expr.match(inQuestion.c_str(), inQuestion.size())
199 == static_cast<int>(inQuestion.size())) {
200 toInclude = true;
201 }
202 }
203 catch (BESError &e) {
204 string serr = (string) "Unable to get catalog information, "
205 + "malformed Catalog Include parameter " + "in bes configuration file around " + reg + ": "
206 + e.get_message();
207 throw BESInternalError(serr, __FILE__, __LINE__);
208 }
209 }
210 }
211 }
212
213 if (toInclude == true) {
214 if (exclude(inQuestion)) {
215 toInclude = false;
216 }
217 }
218
219 return toInclude;
220}
221
229bool BESCatalogUtils::exclude(const string &inQuestion) const
230{
231 list<string>::const_iterator e_iter = d_exclude.begin();
232 list<string>::const_iterator e_end = d_exclude.end();
233 for (; e_iter != e_end; e_iter++) {
234 string reg = *e_iter;
235 if (!reg.empty()) {
236 try {
237 BESRegex reg_expr(reg.c_str());
238 if (reg_expr.match(inQuestion.c_str(), inQuestion.size()) == static_cast<int>(inQuestion.size())) {
239 return true;
240 }
241 }
242 catch (BESError &e) {
243 string serr = (string) "Unable to get catalog information, " + "malformed Catalog Exclude parameter "
244 + "in bes configuration file around " + reg + ": " + e.get_message();
245 throw BESInternalError(serr, __FILE__, __LINE__);
246 }
247 }
248 }
249 return false;
250}
251
257BESCatalogUtils::match_citer BESCatalogUtils::match_list_begin() const
258{
259 return d_match_list.begin();
260}
261
267BESCatalogUtils::match_citer BESCatalogUtils::match_list_end() const
268{
269 return d_match_list.end();
270}
271
282unsigned int BESCatalogUtils::get_entries(DIR *dip, const string &fullnode, const string &use_node,
283 BESCatalogEntry *entry, bool dirs_only)
284{
285 unsigned int cnt = 0;
286
287 struct stat cbuf;
288 int statret = stat(fullnode.c_str(), &cbuf);
289 if (statret != 0) {
290 if (errno == ENOENT) { // ENOENT means that the path or part of the path does not exist
291 char *s_err = strerror(errno);
292 throw BESNotFoundError((s_err) ? string(s_err) : string("Node ") + use_node + " does not exist", __FILE__,
293 __LINE__);
294 }
295 // any other error means that access is denied for some reason
296 else {
297 char *s_err = strerror(errno);
298 throw BESNotFoundError((s_err) ? string(s_err) : string("Access denied for node ") + use_node, __FILE__,
299 __LINE__);
300 }
301 }
302
303 struct dirent *dit;
304 while ((dit = readdir(dip)) != NULL) {
305 string dirEntry = dit->d_name;
306 if (dirEntry == "." || dirEntry == "..") {
307 continue;
308 }
309
310 string fullPath = fullnode + "/" + dirEntry;
311
312 // Skip this dir entry if it is a sym link and follow links is false
313 if (follow_sym_links() == false) {
314 struct stat lbuf;
315 (void) lstat(fullPath.c_str(), &lbuf);
316 if (S_ISLNK(lbuf.st_mode))
317 continue;
318 }
319
320 // look at the mode and determine if this is a
321 // directory or a regular file. If it is not
322 // accessible, the stat fails, is not a directory
323 // or regular file, then simply do not include it.
324 struct stat buf;
325 statret = stat(fullPath.c_str(), &buf);
326 if (statret == 0 && S_ISDIR(buf.st_mode)) {
327 if (exclude(dirEntry) == false) {
328 BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
329
330 bes_add_stat_info(curr_entry, buf);
331
332 entry->add_entry(curr_entry);
333
334 // we don't go further than this, so we need
335 // to add a blank node here so that we know
336 // it's a node (collection)
337 BESCatalogEntry *blank_entry = new BESCatalogEntry(".blank", entry->get_catalog());
338 curr_entry->add_entry(blank_entry);
339 }
340 }
341 else if (statret == 0 && S_ISREG(buf.st_mode)) {
342 if (!dirs_only && include(dirEntry)) {
343 BESCatalogEntry *curr_entry = new BESCatalogEntry(dirEntry, entry->get_catalog());
344 bes_add_stat_info(curr_entry, buf);
345
346 list<string> services;
347 // TODO use the d_utils object? jhrg 2.26.18
348 isData(fullPath, d_name, services);
349 curr_entry->set_service_list(services);
350
351 bes_add_stat_info(curr_entry, buf);
352
353 entry->add_entry(curr_entry);
354 }
355 }
356 } // end of the while loop
357
358 // TODO this always return zero. FIXME jhrg 2.26.18
359 return cnt;
360}
361
362void BESCatalogUtils::display_entry(BESCatalogEntry *entry, BESInfo *info)
363{
364 string defcatname = BESCatalogList::TheCatalogList()->default_catalog_name();
365
366 // start with the external entry
367 map<string, string, std::less<>> props;
368 if (entry->get_catalog() == defcatname) {
369 props["name"] = entry->get_name();
370 }
371 else {
372 string name = entry->get_catalog() + "/";
373 if (entry->get_name() != "/") {
374 name = name + entry->get_name();
375 }
376 props["name"] = name;
377 }
378 props["catalog"] = entry->get_catalog();
379 props["size"] = entry->get_size();
380 props["lastModified"] = entry->get_mod_date() + "T" + entry->get_mod_time();
381 if (entry->is_collection()) {
382 props["node"] = "true";
383 ostringstream strm;
384 strm << entry->get_count();
385 props["count"] = strm.str();
386 }
387 else {
388 props["node"] = "false";
389 }
390 info->begin_tag("dataset", &props);
391
392 list<string> services = entry->get_service_list();
393 if (services.size()) {
394 list<string>::const_iterator si = services.begin();
395 list<string>::const_iterator se = services.end();
396 for (; si != se; si++) {
397 info->add_tag("serviceRef", (*si));
398 }
399 }
400}
401
416std::string
417BESCatalogUtils::get_handler_name(const std::string &item) const
418{
419 for (auto i = match_list_begin(), e = match_list_end(); i != e; ++i) {
420 BESRegex expr((*i).regex.c_str());
421 if (expr.match(item.c_str(), item.size()) == (int)item.size()) {
422 return (*i).handler;
423 }
424 }
425
426 return "";
427}
428
441bool
442BESCatalogUtils::is_data(const std::string &item) const
443{
444 for (auto i = match_list_begin(), e = match_list_end(); i != e; ++i) {
445 BESRegex expr((*i).regex.c_str());
446 if (expr.match(item.c_str(), item.size()) == (int)item.size()) {
447 return true;
448 }
449 }
450 return false;
451}
452
460void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, const string &fullnode)
461{
462 struct stat cbuf;
463 int statret = stat(fullnode.c_str(), &cbuf);
464 if (statret == 0) {
465 bes_add_stat_info(entry, cbuf);
466 }
467}
468
469void BESCatalogUtils::bes_add_stat_info(BESCatalogEntry *entry, struct stat &buf)
470{
471 off_t sz = buf.st_size;
472 entry->set_size(sz);
473
474 // %T = %H:%M:%S
475 // %F = %Y-%m-%d
476 time_t mod = buf.st_mtime;
477 struct tm stm{};
478 gmtime_r(&mod, &stm);
479 char mdate[64];
480 strftime(mdate, 64, "%Y-%m-%d", &stm);
481 char mtime[64];
482 strftime(mtime, 64, "%T", &stm);
483
484 ostringstream sdt;
485 sdt << mdate;
486 entry->set_mod_date(sdt.str());
487
488 ostringstream stt;
489 stt << mtime;
490 entry->set_mod_time(stt.str());
491}
492
493bool BESCatalogUtils::isData(const string &inQuestion, const string &catalog, list<string> &services)
494{
495 BESContainerStorage *store = BESContainerStorageList::TheList()->find_persistence(catalog);
496 if (!store) return false;
497
498 return store->isData(inQuestion, services);
499}
500
501void BESCatalogUtils::dump(ostream &strm) const
502{
503 strm << BESIndent::LMarg << "BESCatalogUtils::dump - (" << (void *) this << ")" << endl;
504 BESIndent::Indent();
505
506 strm << BESIndent::LMarg << "root directory: " << d_root_dir << endl;
507
508 if (d_include.size()) {
509 strm << BESIndent::LMarg << "include list:" << endl;
510 BESIndent::Indent();
511 list<string>::const_iterator i_iter = d_include.begin();
512 list<string>::const_iterator i_end = d_include.end();
513 for (; i_iter != i_end; i_iter++) {
514 if (!(*i_iter).empty()) {
515 strm << BESIndent::LMarg << *i_iter << endl;
516 }
517 }
518 BESIndent::UnIndent();
519 }
520 else {
521 strm << BESIndent::LMarg << "include list: empty" << endl;
522 }
523
524 if (d_exclude.size()) {
525 strm << BESIndent::LMarg << "exclude list:" << endl;
526 BESIndent::Indent();
527 list<string>::const_iterator e_iter = d_exclude.begin();
528 list<string>::const_iterator e_end = d_exclude.end();
529 for (; e_iter != e_end; e_iter++) {
530 if (!(*e_iter).empty()) {
531 strm << BESIndent::LMarg << *e_iter << endl;
532 }
533 }
534 BESIndent::UnIndent();
535 }
536 else {
537 strm << BESIndent::LMarg << "exclude list: empty" << endl;
538 }
539
540 if (d_match_list.size()) {
541 strm << BESIndent::LMarg << "type matches:" << endl;
542 BESIndent::Indent();
543 BESCatalogUtils::match_citer i = d_match_list.begin();
544 BESCatalogUtils::match_citer ie = d_match_list.end();
545 for (; i != ie; i++) {
546 handler_regex match = (*i);
547 strm << BESIndent::LMarg << match.handler << " : " << match.regex << endl;
548 }
549 BESIndent::UnIndent();
550 }
551 else {
552 strm << BESIndent::LMarg << " type matches: empty" << endl;
553 }
554
555 if (d_follow_syms) {
556 strm << BESIndent::LMarg << " follow symbolic links: on" << endl;
557 }
558 else {
559 strm << BESIndent::LMarg << " follow symbolic links: off" << endl;
560 }
561
562 BESIndent::UnIndent();
563}
564
565#if 0
567BESCatalogUtils::Utils(const string &cat_name)
568{
569 BESCatalogUtils *utils = BESCatalogUtils::_instances[cat_name];
570 if (!utils) {
571 utils = new BESCatalogUtils(cat_name);
572 BESCatalogUtils::_instances[cat_name] = utils;
573 }
574 return utils;
575}
576#endif
577
578
579#if 0
580// Added 12/24/12
581void BESCatalogUtils::delete_all_catalogs()
582{
583 map<string, BESCatalogUtils*>::iterator i = BESCatalogUtils::_instances.begin();
584 map<string, BESCatalogUtils*>::iterator e = BESCatalogUtils::_instances.end();
585 while (i != e) {
586 delete (*i++).second;
587 }
588}
589
590#endif
591
virtual bool exclude(const std::string &inQuestion) const
Should this file/directory be excluded in the catalog?
virtual unsigned int get_entries(DIR *dip, const std::string &fullnode, const std::string &use_node, BESCatalogEntry *entry, bool dirs_only)
virtual bool include(const std::string &inQuestion) const
Should this file/directory be included in the catalog?
std::string get_handler_name(const std::string &item) const
Find the handler name that will process.
bool is_data(const std::string &item) const
is there a handler that can process this
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
virtual bool isData(const std::string &inQuestion, std::list< std::string > &provides)=0
determine if the given container is data and what services are available for it
Base exception class for the BES with basic string message.
Definition BESError.h:66
std::string get_message() const
get the error message for this exception
Definition BESError.h:132
informational response object
Definition BESInfo.h:63
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition BESRegex.h:89
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition BESRegex.cc:70
error thrown if there is a user syntax error in the request or any other user error
static void explode(char delim, const std::string &str, std::list< std::string > &values)
Definition BESUtil.cc:543
static std::string lowercase(const std::string &s)
Definition BESUtil.cc:257
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
STL iterator class.
STL iterator class.
STL iterator class.