bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
BESCatalogDirectory.cc
1// BESCatalogDirectory.cc
2
3// This file is part of bes, A C++ back-end server implementation framework
4// for the OPeNDAP Data Access Protocol.
5
6// Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7// Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact University Corporation for Atmospheric Research at
24// 3080 Center Green Drive, Boulder, CO 80301
25
26// (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27// Please read the full copyright statement in the file COPYRIGHT_UCAR.
28//
29// Authors:
30// pwest Patrick West <pwest@ucar.edu>
31// jgarcia Jose Garcia <jgarcia@ucar.edu>
32
33#include "config.h"
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <dirent.h>
38
39#include <cstring>
40#include <cerrno>
41
42#include <sstream>
43#include <cassert>
44
45#include <memory>
46#include <algorithm>
47
48#include "BESUtil.h"
49#include "BESCatalogDirectory.h"
50#include "BESCatalogUtils.h"
51#include "BESCatalogEntry.h"
52
53#include "CatalogNode.h"
54#include "CatalogItem.h"
55
56#include "BESInfo.h"
57#include "BESContainerStorageList.h"
58#include "BESFileContainerStorage.h"
59#include "BESLog.h"
60
61#include "BESInternalError.h"
62#include "BESForbiddenError.h"
63#include "BESNotFoundError.h"
64
65#include "BESDebug.h"
66
67using namespace bes;
68using namespace std;
69
70#define MODULE "bes"
71#define PROLOG "BESCatalogDirectory::" << __func__ << "() - "
72
86 BESCatalog(name)
87{
88#if 0
89 get_catalog_utils() = BESCatalogUtils::Utils(name);
90#endif
91}
92
93BESCatalogDirectory::~BESCatalogDirectory()
94{
95}
96
109{
110 string use_node = node;
111 // use_node should only end in '/' if that's the only character in which
112 // case there's no need to call find()
113 if (!node.empty() && node != "/") {
114 string::size_type pos = use_node.find_last_not_of("/");
115 use_node = use_node.substr(0, pos + 1);
116 }
117
118 // This takes care of bizarre cases like "///" where use_node would be
119 // empty after the substring call.
120 if (use_node.empty()) use_node = "/";
121
122 string rootdir = get_catalog_utils()->get_root_dir();
123 string fullnode = rootdir;
124 if (!use_node.empty()) {
125 // TODO It's hard to know just what this code is supposed to do, but I
126 // think the following can be an error. Above, if use_node is empty(), the use_node becomes
127 // "/" and then it's not empty() and fullnode becomes "<stuff>//" but we just
128 // jumped through all kinds of hoops to make sure there was either zero
129 // or one trailing slash. jhrg 2.26.18
130 fullnode = fullnode + "/" + use_node;
131 }
132
133 string basename;
134 string::size_type slash = fullnode.rfind("/");
135 if (slash != string::npos) {
136 basename = fullnode.substr(slash + 1, fullnode.size() - slash);
137 }
138 else {
139 basename = fullnode;
140 }
141
142 // fullnode is the full pathname of the node, including the 'root' pathanme
143 // basename is the last component of fullnode
144
145 BESDEBUG(MODULE,
146 "BESCatalogDirectory::show_catalog: " << "use_node = " << use_node << endl << "rootdir = " << rootdir << endl << "fullnode = " << fullnode << endl << "basename = " << basename << endl);
147
148 // This will throw the appropriate exception (Forbidden or Not Found).
149 // Checks to make sure the different elements of the path are not
150 // symbolic links if follow_sym_links is set to false, and checks to
151 // make sure have permission to access node and the node exists.
152 // TODO Move up; this can be done once use_node is set. jhrg 2.26.18
153 BESUtil::check_path(use_node, rootdir, get_catalog_utils()->follow_sym_links());
154
155 // If null is passed in, then return the new entry, else add the new entry to the
156 // existing Entry object. jhrg 2.26.18
157 BESCatalogEntry *myentry = new BESCatalogEntry(use_node, get_catalog_name());
158 if (entry) {
159 // if an entry was passed, then add this one to it
160 entry->add_entry(myentry);
161 }
162 else {
163 // else we want to return the new entry created
164 entry = myentry;
165 }
166
167 // Is this node a directory?
168 // TODO use stat() instead. jhrg 2.26.18
169 DIR *dip = opendir(fullnode.c_str());
170 if (dip != nullptr) {
171 try {
172 // The node is a directory
173
174 // if the directory requested is in the exclude list then we won't
175 // let the user see it.
176 if (get_catalog_utils()->exclude(basename)) {
177 string error = "You do not have permission to view the node " + use_node;
178 throw BESForbiddenError(error, __FILE__, __LINE__);
179 }
180
181 // Now that we are ready to start building the response data we
182 // cancel any pending timeout alarm according to the configuration.
184
185 bool dirs_only = false;
186 // TODO This is the only place in the code where get_entries() is called
187 // jhrg 2.26.18
188 get_catalog_utils()->get_entries(dip, fullnode, use_node, myentry, dirs_only);
189 }
190 catch (... /*BESError &e */) {
191 closedir(dip);
192 throw /* e */;
193 }
194 closedir(dip);
195
196 // TODO This is the only place this method is called. replace the static method
197 // with an object call (i.e., get_catalog_utils())? jhrg 2.26.18
198 BESCatalogUtils::bes_add_stat_info(myentry, fullnode);
199 }
200 else {
201 // if the node is not in the include list then the requester does
202 // not have access to that node
203 if (get_catalog_utils()->include(basename)) {
204 struct stat buf;
205 int statret = 0;
206 if (get_catalog_utils()->follow_sym_links() == false) {
207 /*statret =*/(void) lstat(fullnode.c_str(), &buf);
208 if (S_ISLNK(buf.st_mode)) {
209 string error = "You do not have permission to access node " + use_node;
210 throw BESForbiddenError(error, __FILE__, __LINE__);
211 }
212 }
213 statret = stat(fullnode.c_str(), &buf);
214 if (statret == 0 && S_ISREG(buf.st_mode)) {
215 BESCatalogUtils::bes_add_stat_info(myentry, fullnode);
216
217 list<string> services;
218 BESCatalogUtils::isData(node, get_catalog_name(), services);
219 myentry->set_service_list(services);
220 }
221 else if (statret == 0) {
222 string error = "You do not have permission to access " + use_node;
223 throw BESForbiddenError(error, __FILE__, __LINE__);
224 }
225 else {
226 // ENOENT means that the path or part of the path does not
227 // exist
228 if (errno == ENOENT) {
229 string error = "Node " + use_node + " does not exist";
230 char *s_err = strerror(errno);
231 if (s_err) {
232 error = s_err;
233 }
234 throw BESNotFoundError(error, __FILE__, __LINE__);
235 }
236 // any other error means that access is denied for some reason
237 else {
238 string error = "Access denied for node " + use_node;
239 char *s_err = strerror(errno);
240 if (s_err) {
241 error = error + s_err;
242 }
243 throw BESNotFoundError(error, __FILE__, __LINE__);
244 }
245 }
246 }
247 else {
248 string error = "You do not have permission to access " + use_node;
249 throw BESForbiddenError(error, __FILE__, __LINE__);
250 }
251 }
252
253 return entry;
254}
255
261string
266
275static string get_time(time_t the_time, bool use_local_time = false)
276{
277 char buf[sizeof "YYYY-MM-DDTHH:MM:SSzone"];
278 int status = 0;
279
280 // From StackOverflow:
281 // This will work too, if your compiler doesn't support %F or %T:
282 // strftime(buf, sizeof buf, "%Y-%m-%dT%H:%M:%S%Z", gmtime(&now));
283 //
284 // Apologies for the twisted logic - UTC is the default. Override to
285 // local time using BES.LogTimeLocal=yes in bes.conf. jhrg 11/15/17
286 struct tm result{};
287 if (!use_local_time) {
288 gmtime_r(&the_time, &result);
289 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
290 }
291 else {
292 localtime_r(&the_time, &result);
293 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
294 }
295
296 if (!status)
297 ERROR_LOG("Error getting last modified time time for a leaf item in BESCatalogDirectory.");
298
299 return buf;
300}
301
305CatalogItem *BESCatalogDirectory::make_item(string path_prefix, string item) const
306{
307 if (item == "." || item == "..")
308 return 0;
309
310 string item_path = BESUtil::assemblePath(path_prefix,item);
311 BESDEBUG(MODULE, PROLOG << "Processing POSIX entry: " << item_path << endl);
312
313 bool include_item = get_catalog_utils()->include(item);
314 bool exclude_item = get_catalog_utils()->exclude(item);
315
316 BESDEBUG(MODULE, PROLOG << "catalog: " << this->get_catalog_name() << endl);
317 BESDEBUG(MODULE, PROLOG << "include_item: " << (include_item?"true":"false") << endl);
318 BESDEBUG(MODULE, PROLOG << "exclude_item: " << (exclude_item?"true":"false") << endl);
319
320 // TODO add a test in configure for the readdir macro(s) DT_REG, DT_LNK
321 // and DT_DIR and use those, if present, to dßetermine if the name is a
322 // link, directory or regular file. These are not present on all systems.
323 // Also, since we need mtime, these are not a huge time saver. But if we
324 // decide not to use the mtime, using these macros could save lots of system
325 // calls. jhrg 3/9/18
326
327 // Skip this dir entry if it is a sym link and follow links is false
328 if (get_catalog_utils()->follow_sym_links() == false) {
329 struct stat lbuf;
330 (void) lstat(item_path.c_str(), &lbuf);
331 if (S_ISLNK(lbuf.st_mode))
332 return 0;
333 }
334 // Is this a directory or a file? Should it be excluded or included?
335 struct stat buf;
336 int statret = stat(item_path.c_str(), &buf);
337 if (statret == 0 && S_ISDIR(buf.st_mode) && !exclude_item) {
338 BESDEBUG(MODULE, PROLOG << item_path << " is NODE" << endl);
339 return new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node);
340 }
341 else if (statret == 0 && S_ISREG(buf.st_mode) && include_item) {
342 BESDEBUG(MODULE, PROLOG << item_path << " is LEAF" << endl);
343 return new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
344 get_catalog_utils()->is_data(item), CatalogItem::leaf);
345 }
346
347 // This is the error case; it only is run when the item_path is neither a
348 // directory nor a regular file.
349 stringstream msg;
350 if(exclude_item || !include_item){
351 msg << "Excluded the item '" << item_path << "' from the catalog '" <<
352 get_catalog_name() << "' node listing." << endl;
353 }
354 else {
355 msg << "Unable to create CatalogItem for '" << item_path << "' from the catalog '" <<
356 get_catalog_name() << ",' SKIPPING." << endl;
357 }
358 BESDEBUG(MODULE, PROLOG << msg.str());
359 VERBOSE(msg.str());
360
361 return 0;
362}
363
364// path must start with a '/'. By this class it will be interpreted as a
365// starting at the CatalogDirectory instance's root directory. It may either
366// end in a '/' or not.
367//
368// If it is not a directory - that is an error. (return null or throw?)
369//
370// Item names are relative
371
395BESCatalogDirectory::get_node(const string &path) const
396{
397 if (path[0] != '/')
398 throw BESInternalError("The path sent to BESCatalogDirectory::get_node() must start with a slash (/)", __FILE__, __LINE__);
399
400 string rootdir = get_catalog_utils()->get_root_dir();
401
402 // This will throw the appropriate exception (Forbidden or Not Found).
403 // Checks to make sure the different elements of the path are not
404 // symbolic links if follow_sym_links is set to false, and checks to
405 // make sure have permission to access node and the node exists.
406 // TODO Make BESUtil::check_path() return the stat struct so we don't have to stat again here.
407 BESUtil::check_path(path, rootdir, get_catalog_utils()->follow_sym_links());
408 string fullpath = BESUtil::assemblePath(rootdir, path);
409 struct stat full_path_stat_buf;
410 int stat_result = stat(fullpath.c_str(), &full_path_stat_buf);
411 if(stat_result){
412 throw BESForbiddenError(
413 string("Unable to 'stat' the path '") + fullpath + "' errno says: " + std::strerror(errno),
414 __FILE__, __LINE__);
415 }
416
417 CatalogNode *node = new CatalogNode(path);
418 if(S_ISREG(full_path_stat_buf.st_mode)){
419 BESDEBUG(MODULE, PROLOG << "The requested node '"+fullpath+"' is actually a leaf. Wut do?" << endl);
420
421 CatalogItem *item = make_item(rootdir, path);
422 if(item){
423 node->set_leaf(item);
424 }
425 else {
426 string msg(__func__);
427 msg += "() - Failed to build CatalogItem for "+ path + " BESCatlogDirectory::make_item() returned NULL.",
428 throw BESInternalError(msg,__FILE__, __LINE__);
429 }
430
431 BESDEBUG(MODULE, PROLOG << "Actually, I'm a LEAF (" << (void*)item << ")" << endl);
432 return node;
433 }
434 else if(S_ISDIR(full_path_stat_buf.st_mode)){
435 BESDEBUG(MODULE, PROLOG << "Processing directory node: "<< fullpath << endl);
436 DIR *dip = 0;
437 try {
438 // The node is a directory
439 // Based on other code (show_catalogs()), use BESCatalogUtils::exclude() on
440 // a directory, but BESCatalogUtils::include() on a file.
441 if (get_catalog_utils()->exclude(path))
442 throw BESForbiddenError(
443 string("The path '") + path + "' is not included in the catalog '" + get_catalog_name() + "'.",
444 __FILE__, __LINE__);
445
446 node->set_catalog_name(get_catalog_name());
447 node->set_lmt(get_time(full_path_stat_buf.st_mtime));
448
449 dip = opendir(fullpath.c_str());
450 if(dip == NULL){
451 // That went well...
452 // We need to return this "node", and at this point it is empty.
453 // Which is probably enough, so we do nothing more.
454 BESDEBUG(MODULE, PROLOG << "Unable to open '" << fullpath << "' SKIPPING (errno: " << std::strerror(errno) << ")"<< endl);
455 }
456 else {
457 // otherwise we grind through the node contents...
458 struct dirent *dit;
459 while ((dit = readdir(dip)) != NULL) {
460 CatalogItem * item = make_item(fullpath, dit->d_name);
461 if(item){
462 if(item->get_type() == CatalogItem::node){
463 node->add_node(item);
464 }
465 else {
466 node->add_leaf(item);
467 }
468 }
469 }
470 closedir(dip);
471 }
472
474 sort(node->nodes_begin(), node->nodes_end(), ordering);
475 sort(node->leaves_begin(), node->leaves_end(), ordering);
476
477 return node;
478 }
479 catch (...) {
480 closedir(dip);
481 throw;
482 }
483 }
484 throw BESInternalError(
485 "A BESCatalogDirectory can only return nodes for directories and regular files. The path '" + path
486 + "' is not a directory or a regular file for BESCatalog '" + get_catalog_name() + "'.", __FILE__, __LINE__);
487}
488
489#if 0
490// path must start with a '/'. By this class it will be interpreted as a
491// starting at the CatalogDirectory instance's root directory. It may either
492// end in a '/' or not.
493//
494// If it is not a directory - that is an error. (return null or throw?)
495//
496// Item names are relative
497
514BESCatalogDirectory::get_node(const string &path) const
515{
516 if (path[0] != '/') throw BESInternalError("The path sent to BESCatalogDirectory::get_node() must start with a slash (/)", __FILE__, __LINE__);
517
518 string rootdir = get_catalog_utils()->get_root_dir();
519
520 // This will throw the appropriate exception (Forbidden or Not Found).
521 // Checks to make sure the different elements of the path are not
522 // symbolic links if follow_sym_links is set to false, and checks to
523 // make sure have permission to access node and the node exists.
524 BESUtil::check_path(path, rootdir, get_catalog_utils()->follow_sym_links());
525
526 string fullpath = rootdir + path;
527
528 DIR *dip = opendir(fullpath.c_str());
529 if (!dip)
530 throw BESInternalError(
531 "A BESCatalogDirectory can only return nodes for directory. The path '" + path
532 + "' is not a directory for BESCatalog '" + get_catalog_name() + "'.", __FILE__, __LINE__);
533
534 try {
535 // The node is a directory
536
537 // Based on other code (show_catalogs()), use BESCatalogUtils::exclude() on
538 // a directory, but BESCatalogUtils::include() on a file.
539 if (get_catalog_utils()->exclude(path))
540 throw BESForbiddenError(
541 string("The path '") + path + "' is not included in the catalog '" + get_catalog_name() + "'.",
542 __FILE__, __LINE__);
543
544 CatalogNode *node = new CatalogNode(path);
545
546 node->set_catalog_name(get_catalog_name());
547 struct stat buf;
548 int statret = stat(fullpath.c_str(), &buf);
549 if (statret == 0 /* && S_ISDIR(buf.st_mode) */)
550 node->set_lmt(get_time(buf.st_mtime));
551
552 struct dirent *dit;
553 while ((dit = readdir(dip)) != NULL) {
554 string item = dit->d_name;
555 if (item == "." || item == "..") continue;
556
557 string item_path = fullpath + "/" + item;
558
559 // TODO add a test in configure for the readdir macro(s) DT_REG, DT_LNK
560 // and DT_DIR and use those, if present, to determine if the name is a
561 // link, directory or regular file. These are not present on all systems.
562 // Also, since we need mtime, these are not a huge time saver. But if we
563 // decide not to use the mtime, using these macros could save lots of system
564 // calls. jhrg 3/9/18
565
566 // Skip this dir entry if it is a sym link and follow links is false
567 if (get_catalog_utils()->follow_sym_links() == false) {
568 struct stat lbuf;
569 (void) lstat(item_path.c_str(), &lbuf);
570 if (S_ISLNK(lbuf.st_mode)) continue;
571 }
572
573 // Is this a directory or a file? Should it be excluded or included?
574 statret = stat(item_path.c_str(), &buf);
575 if (statret == 0 && S_ISDIR(buf.st_mode) && !get_catalog_utils()->exclude(item)) {
576#if 0
577 // Add a new node; set the size to zero.
578 node->add_item(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
579#endif
580 node->add_node(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
581 }
582 else if (statret == 0 && S_ISREG(buf.st_mode) && get_catalog_utils()->include(item)) {
583#if 0
584 // Add a new leaf.
585 node->add_item(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
586 get_catalog_utils()->is_data(item), CatalogItem::leaf));
587#endif
588 node->add_leaf(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
589 get_catalog_utils()->is_data(item), CatalogItem::leaf));
590 }
591 else {
592 VERBOSE("Excluded the item '" << item_path << "' from the catalog '" << get_catalog_name() << "' node listing.");
593 }
594 } // end of the while loop
595
596 closedir(dip);
597
598 CatalogItem::CatalogItemAscending ordering;
599
600 sort(node->nodes_begin(), node->nodes_end(), ordering);
601 sort(node->leaves_begin(), node->leaves_end(), ordering);
602
603 return node;
604 }
605 catch (...) {
606 closedir(dip);
607 throw;
608 }
609}
610#endif
611
631void BESCatalogDirectory::get_site_map(const string &prefix, const string &node_suffix, const string &leaf_suffix,
632 ostream &out, const string &path) const
633{
634 unique_ptr<CatalogNode> node(get_node(path));
635
636#if ITEMS
637 for (CatalogNode::item_citer i = node->items_begin(), e = node->items_end(); i != e; ++i) {
638 if ((*i)->get_type() == CatalogItem::leaf && (*i)->is_data()) {
639 out << prefix << path << (*i)->get_name() << leaf_suffix << endl;
640 }
641 else if ((*i)->get_type() == CatalogItem::node) {
642 get_site_map(prefix, leaf_suffix, out, path + (*i)->get_name() + "/");
643 }
644 }
645#endif
646
647 if (!node_suffix.empty())
648 out << prefix << path << node_suffix << endl;
649
650 // Depth-first node traversal. Assume the nodes and leaves are sorted
651 for (CatalogNode::item_citer i = node->nodes_begin(), e = node->nodes_end(); i != e; ++i) {
652 assert((*i)->get_type() == CatalogItem::node);
653 get_site_map(prefix, node_suffix, leaf_suffix, out, path + (*i)->get_name() + "/");
654 }
655
656 // For leaves, only write the data items
657 for (CatalogNode::item_citer i = node->leaves_begin(), e = node->leaves_end(); i != e; ++i) {
658 assert((*i)->get_type() == CatalogItem::leaf);
659 if ((*i)->is_data() && !leaf_suffix.empty())
660 out << prefix << path << (*i)->get_name() << leaf_suffix << endl;
661 }
662}
663
671void BESCatalogDirectory::dump(ostream &strm) const
672{
673 strm << BESIndent::LMarg << "BESCatalogDirectory::dump - (" << (void *) this << ")" << endl;
674 BESIndent::Indent();
675
676 strm << BESIndent::LMarg << "catalog utilities: " << endl;
677 BESIndent::Indent();
678 get_catalog_utils()->dump(strm);
679 BESIndent::UnIndent();
680 BESIndent::UnIndent();
681}
682
virtual bes::CatalogNode * get_node(const std::string &path) const
Get a CatalogNode for the given path in the current catalog.
virtual std::string get_root() const
Get the root directory for the catalog.
BESCatalogDirectory(const std::string &name)
A catalog for POSIX file systems.
virtual BESCatalogEntry * show_catalog(const std::string &container, BESCatalogEntry *entry)
Get the CatalogEntry for the given node.
virtual void get_site_map(const std::string &prefix, const std::string &node_suffix, const std::string &leaf_suffix, std::ostream &out, const std::string &path="/") const
Write the site map for this catalog to the stream.
virtual void dump(std::ostream &strm) const
dumps information about this object
const std::string & get_root_dir() const
Get the root directory of the catalog.
virtual bool exclude(const std::string &inQuestion) const
Should this file/directory be excluded in the catalog?
virtual unsigned int get_entries(DIR *dip, const std::string &fullnode, const std::string &use_node, BESCatalogEntry *entry, bool dirs_only)
virtual bool include(const std::string &inQuestion) const
Should this file/directory be included in the catalog?
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition BESCatalog.h:102
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static void conditional_timeout_cancel()
Checks if the timeout alarm should be canceled based on the value of the BES key BES....
Definition BESUtil.cc:898
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition BESUtil.cc:385
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:804
item_type get_type() const
Get the type of this item (unknown, node or leaf)