bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
DirectoryUtil.cc
1
2// This file is part of the "NcML Module" project, a BES module designed
3// to allow NcML files to be used to be used as a wrapper to add
4// AIS to existing datasets of any format.
5//
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: Michael Johnson <m.johnson@opendap.org>
8//
9// For more information, please also see the main website: http://opendap.org/
10//
11// This library is free software; you can redistribute it and/or
12// modify it under the terms of the GNU Lesser General Public
13// License as published by the Free Software Foundation; either
14// version 2.1 of the License, or (at your option) any later version.
15//
16// This library is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19// Lesser General Public License for more details.
20//
21// You should have received a copy of the GNU Lesser General Public
22// License along with this library; if not, write to the Free Software
23// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24//
25// Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26//
27// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29
30#include "config.h"
31#include "DirectoryUtil.h"
32
33#include <cstring>
34#include <cerrno>
35#include <sstream>
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <dirent.h>
39
40#include "BESRegex.h"
41
42// bes
43#include "BESDebug.h"
44#include "BESForbiddenError.h"
45#include "BESInternalError.h"
46#include "TheBESKeys.h"
47#include "BESNotFoundError.h"
48#include "BESUtil.h"
49
50using std::string;
51using std::vector;
52using std::endl;
53
54namespace agg_util {
59struct DirWrapper {
60public:
61
62 DirWrapper(const string& fullDirPath) :
63 _pDir(0), _fullPath(fullDirPath)
64 {
65 // if the user sees null after this, they can check the errno.
66 _pDir = opendir(fullDirPath.c_str());
67 }
68
69 ~DirWrapper()
70 {
71 if (_pDir) {
72 closedir(_pDir);
73 _pDir = 0;
74 }
75 }
76
77 bool fail() const
78 {
79 return !_pDir;
80 }
81
82 DIR*
83 get() const
84 {
85 return _pDir;
86 }
87
88 // automatically closedir() if non-null on dtor.
89 DIR* _pDir;
90 std::string _fullPath;
91};
92
94FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
95 _path(path), _basename(basename), _fullPath("") // start empty, cached later
96 , _isDir(isDir), _modTime(modTime)
97{
100}
101
102FileInfo::~FileInfo()
103{
104}
105
106const std::string&
108{
109 return _path;
110}
111
112const std::string&
113FileInfo::basename() const
114{
115 return _basename;
116}
117
118bool FileInfo::isDir() const
119{
120 return _isDir;
121}
122
123time_t FileInfo::modTime() const
124{
125 return _modTime;
126}
127
129{
130 // we'll just use UTC for the output...
131 struct tm* pTM = gmtime(&_modTime);
132 char buf[128];
133 // this should be "Year-Month-Day Hour:Minute:Second"
134 strftime(buf, 128, "%F %T", pTM);
135 return string(buf);
136}
137
138const std::string&
140{
141 if (_fullPath.empty()) {
142 _fullPath = _path + "/" + _basename;
143 }
144 return _fullPath;
145}
146
147std::string FileInfo::toString() const
148{
149 return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
150 + getModTimeAsString() + "\""
151 " }";
152}
153
155
156const string DirectoryUtil::_sDebugChannel = "agg_util";
157
158DirectoryUtil::DirectoryUtil() :
159 _rootDir("/"), _suffix("") // we start with no filter
160 , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
161{
162 // this can throw, but the class is completely constructed by this point.
163 setRootDir("/");
164}
165
166DirectoryUtil::~DirectoryUtil()
167{
168 clearRegExp();
169}
170
172const std::string&
174{
175 return _rootDir;
176}
177
183void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
184 bool /*allowSymLinks=false*/)
185{
186 if (!allowRelativePaths && hasRelativePath(origRootDir)) {
187 throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
188 __LINE__);
189 }
190
191 // Get the root without trailing slash, we'll add it.
192 _rootDir = origRootDir;
193 removeTrailingSlashes(_rootDir);
194 // If empty here, that means the actual filesystem root.
195
196 // Use the BESUtil to test the path
197 // Since it assumes root is valid and strips preceding "/",
198 // we use "/" as the root path and the root path as the path
199 // to validate the root. This will throw if invalid.
200 BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
201
202 // We should be good if we get here.
203}
204
205void DirectoryUtil::setFilterSuffix(const std::string& suffix)
206{
207 _suffix = suffix;
208}
209
210void DirectoryUtil::setFilterRegExp(const std::string& regexp)
211{
212 clearRegExp(); // avoid leaks
213 if (!regexp.empty()) {
214 _pRegExp = new BESRegex(regexp.c_str());
215 }
216}
217
219{
220 delete _pRegExp;
221 _pRegExp = 0;
222}
223
225{
226 _newestModTime = newestModTime;
227 _filteringModTimes = true;
228}
229
230void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
231 std::vector<FileInfo>* pDirectories)
232{
233 string pathToUse(path);
234 removePrecedingSlashes(pathToUse);
235 pathToUse = getRootDir() + "/" + pathToUse;
236 BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
237
238 // RAII, will closedir no matter how we leave function, including a throw
239 DirWrapper pDir(pathToUse);
240 if (pDir.fail()) {
241 throwErrorForOpendirFail(pathToUse);
242 }
243
244 // Go through each entry and see if it's a directory or regular file and
245 // add it to the list.
246 struct dirent* pDirEnt = 0;
247 while ((pDirEnt = readdir(pDir.get())) != 0) {
248 string entryName = pDirEnt->d_name;
249 // Exclude ".", ".." and any dotfile dirs like ".svn".
250 if (!entryName.empty() && entryName[0] == '.') {
251 continue;
252 }
253
254 // Figure out if it's a regular file or directory
255 string pathToEntry = pathToUse + "/" + entryName;
256 struct stat statBuf;
257 int statResult = stat(pathToEntry.c_str(), &statBuf);
258 if (statResult != 0) {
259 // If we can't stat the file for some reason, then ignore it
260 continue;
261 }
262
263 // Use the passed in path for the entry since we
264 // want to make the locations be relative to the root
265 // for loading later.
266 if (pDirectories && S_ISDIR(statBuf.st_mode)) {
267 pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
268 }
269 else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
270 FileInfo theFile(path, entryName, false, statBuf.st_mtime);
271 // match against the relative passed in path, not root full path
272 if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
273 pRegularFiles->push_back(theFile);
274 }
275 }
276 }
277}
278
279void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
280 std::vector<FileInfo>* pDirectories)
281{
282 // Remove trailing slash to make it canonical
283 string canonicalPath = path;
284 removeTrailingSlashes(canonicalPath);
285
286 // We use our own local vector of directories in order to recurse,
287 // then add them to the end of pDirectories if it exists.
288
289 // First, get the current path's listing
290 vector<FileInfo> dirs;
291 dirs.reserve(16); // might as well start with a "few" to avoid grows.
292
293 // Keep adding them to the user specified regular file list if desired,
294 // but keep track of dirs ourself.
295 getListingForPath(canonicalPath, pRegularFiles, &dirs);
296
297 // If the caller wanted directories, append them all to the return
298 if (pDirectories) {
299 pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
300 }
301
302 // Finally, recurse on each directory in dirs
303 for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
304 string subPath = canonicalPath + "/" + it->basename();
305 BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
306 // Pass down the caller's accumulated vector's to be filled in.
307 getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
308 }
309
310}
311
312void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
313{
314 // call the other one, not accumulated the directories, only recursing into them.
315 getListingForPathRecursive(path, &rRegularFiles, 0);
316}
317
318void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
319{
320 switch (errno) {
321 case EACCES: {
322 string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
323 throw BESForbiddenError(msg, __FILE__, __LINE__);
324 }
325 break;
326
327 case ELOOP: {
328 string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
329 throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
330 }
331 break;
332
333 case ENAMETOOLONG: {
334 string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
335 throw BESNotFoundError(msg, __FILE__, __LINE__);
336 }
337 break;
338
339 case ENOENT: {
340 string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
341 throw BESNotFoundError(msg, __FILE__, __LINE__);
342 }
343 break;
344
345 case ENOTDIR: {
346 string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
347 throw BESNotFoundError(msg, __FILE__, __LINE__);
348 }
349 break;
350
351 case ENFILE: {
352 string msg = "Internal Error: Too many files are currently open!";
353 throw BESInternalError(msg, __FILE__, __LINE__);
354 }
355 break;
356
357 default: {
358 string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
359 throw BESInternalError(msg, __FILE__, __LINE__);
360 }
361 }
362}
363
364bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
365{
366 bool matches = true;
367 // Do the suffix first since it's fast
368 if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
369 matches = false;
370 }
371
372 // Suffix matches and we have a regexp, check that
373 if (matches && _pRegExp) {
374 // match the full string, -1 on fail, num chars matching otherwise
375 int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
376 matches = (numCharsMatching > 0); // TODO do we want to match the size()?
377 }
378
379 if (matches && _filteringModTimes) {
380 matches = (modTime < _newestModTime);
381 }
382
383 return matches;
384}
385
386bool DirectoryUtil::hasRelativePath(const std::string& path)
387{
388 return (path.find("..") != string::npos);
389}
390
392{
393 if (!path.empty()) {
394 string::size_type pos = path.find_last_not_of("/");
395 if (pos != string::npos) {
396 path = path.substr(0, pos + 1);
397 }
398 }
399}
400
402{
403 if (!path.empty()) {
404 string::size_type pos = path.find_first_not_of("/");
405 path = path.substr(pos, string::npos);
406 }
407}
408
409void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
410{
411 std::ostringstream oss;
412 printFileInfoList(oss, listing);
413 BESDEBUG(_sDebugChannel, oss.str() << endl);
414}
415
416void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
417{
418 for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
419 os << it->toString() << endl;
420 }
421}
422
424{
425 bool found;
426 string rootDir;
427 TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
428 if (!found) {
429 TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
430 }
431 if (!found) {
432 rootDir = "/";
433 }
434 return rootDir;
435}
436
437bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
438{
439 // see if the last suffix.size() characters match.
440 bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
441 return matches;
442}
443}
error thrown if the BES is not allowed to access the resource requested
error thrown if the resource requested cannot be found
Regular expression matching.
Definition BESRegex.h:89
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition BESUtil.cc:385
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
static std::string getBESRootDir()
void setFilterRegExp(const std::string &regexp)
static void removePrecedingSlashes(std::string &path)
static bool hasRelativePath(const std::string &path)
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
void setFilterSuffix(const std::string &suffix)
static void removeTrailingSlashes(std::string &path)
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void setFilterModTimeOlderThan(time_t newestModTime)
const std::string & getRootDir() const
std::string getModTimeAsString() const
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
const std::string & path() const
const std::string & getFullPath() const
STL iterator class.
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...