bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
BESUtil.cc
1// BESUtil.cc
2
3// This file is part of bes, A C++ back-end server implementation framework
4// for the OPeNDAP Data Access Protocol.
5
6// Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7// Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact University Corporation for Atmospheric Research at
24// 3080 Center Green Drive, Boulder, CO 80301
25
26// (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27// Please read the full copyright statement in the file COPYRIGHT_UCAR.
28//
29// Authors:
30// pwest Patrick West <pwest@ucar.edu>
31// jgarcia Jose Garcia <jgarcia@ucar.edu>
32
33#include "config.h"
34
35#include <sys/types.h>
36#include <sys/stat.h>
37#include <sys/resource.h>
38
39#include <fcntl.h>
40
41#if HAVE_UNISTD_H
42#include <unistd.h>
43#endif
44
45#include <thread> // std::this_thread::sleep_for
46#include <chrono> // std::chrono::seconds
47#include <string> // std::string, std::stol
48#include <cstdio>
49#include <cerrno>
50#include <cstring>
51#include <cstdlib>
52#include <ctime>
53#include <cassert>
54#include <vector>
55#include <list>
56#include <sstream>
57#include <iostream>
58#include <algorithm>
59#include <iomanip>
60
61#include <uuid/uuid.h>
62
63#include "TheBESKeys.h"
64#include "BESUtil.h"
65#include "BESDebug.h"
66#include "BESForbiddenError.h"
67#include "BESNotFoundError.h"
68#include "BESInternalError.h"
69#include "BESLog.h"
70#include "BESCatalogList.h"
71
72#include "BESInternalFatalError.h"
73#include "RequestServiceTimer.h"
74
75using namespace std;
76
77#define CRLF "\r\n"
78
79#define MODULE "util"
80#define prolog string("BESUtil::").append(__func__).append("() - ")
81
82const string BES_KEY_TIMEOUT_CANCEL = "BES.CancelTimeoutOnSend";
83
88long
90{
91 struct rusage usage;
92 if (getrusage(RUSAGE_SELF, &usage) == 0) { // getrusage() successful?
93#ifdef __APPLE__
94 // get the max size (man page says it is in bytes). This function returns the
95 // size in KB like Linux. jhrg 3/29/22
96 return usage.ru_maxrss / 1024;
97#else
98 return usage.ru_maxrss; // get the max size (man page says it is in kilobytes)
99#endif
100 }
101 else {
102 return 0;
103 }
104}
105
114{
115 if (!value.empty() && value.back() == '/')
116 value.pop_back(); // requires C++-11
117 // value.erase(value.end () -1);
118}
119
127{
128 if (!value.empty() && value[0] == '"')
129 value.erase(0, 1);
130 if (!value.empty() && value.back() == '"')
131 value.pop_back(); // requires C++-11
132 // value.erase(value.end () -1);
133}
134
139void BESUtil::set_mime_text(ostream &strm)
140{
141 strm << "HTTP/1.0 200 OK" << CRLF;
142 strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
143
144 const time_t t = time(0);
145 strm << "Date: " << rfc822_date(t).c_str() << CRLF;
146 strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
147
148 strm << "Content-Type: text/plain" << CRLF;
149 // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
150 strm << "Content-Description: unknown" << CRLF;
151 strm << CRLF;
152}
153
158void BESUtil::set_mime_html(ostream &strm)
159{
160 strm << "HTTP/1.0 200 OK" << CRLF;
161 strm << "XBES-Server: " << PACKAGE_STRING << CRLF;
162
163 const time_t t = time(0);
164 strm << "Date: " << rfc822_date(t).c_str() << CRLF;
165 strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF;
166
167 strm << "Content-type: text/html" << CRLF;
168 // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
169 strm << "Content-Description: unknown" << CRLF;
170 strm << CRLF;
171}
172
173// Return a MIME rfc-822 date. The grammar for this is:
174// date-time = [ day "," ] date time ; dd mm yy
175// ; hh:mm:ss zzz
176//
177// day = "Mon" / "Tue" / "Wed" / "Thu"
178// / "Fri" / "Sat" / "Sun"
179//
180// date = 1*2DIGIT month 2DIGIT ; day month year
181// ; e.g. 20 Jun 82
182// NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
183//
184// month = "Jan" / "Feb" / "Mar" / "Apr"
185// / "May" / "Jun" / "Jul" / "Aug"
186// / "Sep" / "Oct" / "Nov" / "Dec"
187//
188// time = hour zone ; ANSI and Military
189//
190// hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]
191// ; 00:00:00 - 23:59:59
192//
193// zone = "UT" / "GMT" ; Universal Time
194// ; North American : UT
195// / "EST" / "EDT" ; Eastern: - 5/ - 4
196// / "CST" / "CDT" ; Central: - 6/ - 5
197// / "MST" / "MDT" ; Mountain: - 7/ - 6
198// / "PST" / "PDT" ; Pacific: - 8/ - 7
199// / 1ALPHA ; Military: Z = UT;
200// ; A:-1; (J not used)
201// ; M:-12; N:+1; Y:+12
202// / ( ("+" / "-") 4DIGIT ) ; Local differential
203// ; hours+min. (HHMM)
204
205static const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
206static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
207
217string BESUtil::rfc822_date(const time_t t)
218{
219 struct tm stm{};
220 gmtime_r(&t, &stm);
221 char d[256];
222
223 snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm.tm_wday], stm.tm_mday,
224 months[stm.tm_mon], 1900 + stm.tm_year, stm.tm_hour, stm.tm_min, stm.tm_sec);
225 d[255] = '\0';
226 return {d};
227}
228
229string BESUtil::unhexstring(const string& s)
230{
231 int val;
232 istringstream ss(s);
233 ss >> std::hex >> val;
234 char tmp_str[2];
235 tmp_str[0] = static_cast<char>(val);
236 tmp_str[1] = '\0';
237 return {tmp_str};
238}
239
240// I modified this to mirror the version in libdap. The change allows several
241// escape sequences to by listed in 'except'. jhrg 2/18/09
242string BESUtil::www2id(const string &in, const string &escape, const string &except)
243{
244 string::size_type i = 0;
245 string res = in;
246 while ((i = res.find_first_of(escape, i)) != string::npos) {
247 if (except.find(res.substr(i, 3)) != string::npos) {
248 i += 3;
249 continue;
250 }
251 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
252 }
253
254 return res;
255}
256
257string BESUtil::lowercase(const string &s)
258{
259 string return_string = s;
260 for (int j = 0; j < static_cast<int>(return_string.size()); j++) {
261 return_string[j] = (char) tolower(return_string[j]);
262 }
263
264 return return_string;
265}
266
267string BESUtil::unescape(const string &s)
268{
269 bool done = false;
270 string::size_type index = 0;
271 /* string::size_type new_index = 0 ; */
272 string new_str;
273 while (!done) {
274 string::size_type bs = s.find('\\', index);
275 if (bs == string::npos) {
276 new_str += s.substr(index, s.size() - index);
277 done = true;
278 }
279 else {
280 new_str += s.substr(index, bs - index);
281 new_str += s[bs + 1];
282 index = bs + 2;
283 }
284 }
285
286 return new_str;
287}
288
294static void throw_access_error(const string &pathname, long error_number)
295{
296 switch(error_number) {
297 case ENOENT:
298 case ENOTDIR: {
299 string message = string("Failed to locate '").append(pathname).append("'\n");
300 INFO_LOG(message);
301 throw BESNotFoundError(message, __FILE__, __LINE__);
302 }
303
304 default: {
305 string message = string("Not allowed to access '").append(pathname).append("'\n");
306 INFO_LOG(message);
307 throw BESForbiddenError(message, __FILE__, __LINE__);
308 }
309 }
310}
311
318bool pathname_contains_symlink(const string &path, int search_limit)
319{
320 // This kludge to remove a trailing '/' is needed because lstat and readlinkat fail
321 // to detect a dir symlink when the dir name ends in '/'. On OSX readlinkat (and readlink)
322 // does detect embedded links, but not on Linux. The lstat() service doesn't detect
323 // embedded links anywhere. jhrg 1/3/22
324 string pathname = path;
325 if (!pathname.empty() && pathname.back() == '/') {
326 pathname.pop_back();
327 }
328
329 bool is_link = false;
330 size_t pos;
331 int i = 0; // used with search_limit
332 do {
333 // test pathname
334 struct stat buf;
335 int status = lstat(pathname.c_str(), &buf);
336 if (status == 0) {
337 is_link = S_ISLNK(buf.st_mode);
338 }
339 else {
340 string msg = "Could not resolve path when testing for symbolic links: ";
341 msg.append(strerror(errno));
342 BESDEBUG(MODULE, prolog << msg << endl);
343 throw BESInternalError(msg, __FILE__, __LINE__);
344 }
345
346 // remove the last part of pathname, including the trailing '/'
347 pos = pathname.find_last_of('/');
348 if (pos != string::npos) // find_last_of returns npos if the char is not found
349 pathname.erase(pos);
350 } while (++i < search_limit && !is_link && pos != string::npos && !pathname.empty());
351
352 return is_link;
353
354#if 0
355 // ssize_t readlink(const char *restrict pathname, char *restrict buf, size_t bufsiz);
356 // readlinkat (or readlink) can be used to detect sym links in a path or to get the path
357 // to the linked file. Here we used it to test for sym links. 1/3/22 jhrg
358 ssize_t len = readlinkat(AT_FDCWD, pathname.c_str(), nullptr, 0);
359 if (len == -1) {
360 // either errno is EINVAL meaning this is not a link or there's really an error
361 switch (errno) {
362 case EINVAL:
363 return false;
364 default:
365 string msg = "Could not resolve path when testing for symbolic links: ";
366 msg.append(strerror(errno));
367 throw BESInternalError(msg, __FILE__, __LINE__);
368 }
369 }
370
371 return true; // If readlinkat() does not return -1, it's a symlink
372#endif
373}
374
385void BESUtil::check_path(const string &path, const string &root, bool follow_sym_links) {
386 // if nothing is passed in path, then the path checks out since root is assumed to be valid.
387 if (path == "") return;
388
389 if (path.find("..") != string::npos) {
390 throw_access_error(path, EACCES); // use the code for 'access would be denied'
391 }
392
393 // Check if the combination of root + path exists on this machine. If so, check if it
394 // has symbolic links. Return BESNotFoundError if it does not exist and BESForbiddenError
395 // if it does exist but contains symbolic links and follow_sym_links is false. jhrg 12/30/21
396
397 string pathname = root;
398
399 if (pathname.back() != '/' && path.front() != '/')
400 pathname.append("/");
401
402 pathname.append(path);
403 if (access(pathname.c_str(), R_OK) != 0) {
404 throw_access_error(pathname, errno);
405 }
406
407 if (follow_sym_links == false) {
408 auto n = count(path.begin(), path.end(), '/');
409 // using 'n' for the search_limit may not be optimal (when path ends in '/', an extra
410 // component may be searched) but it's better than testing for a trailing '/' on every call.
411 if (pathname_contains_symlink(pathname, n)) {
412 throw_access_error(pathname, EACCES); // use the code for 'access would be denied'
413 }
414 }
415}
416
417char *
418BESUtil::fastpidconverter(char *buf, int base)
419{
420 return fastpidconverter(getpid(), buf, base);
421}
422
423char *
424BESUtil::fastpidconverter(long val, /* value to be converted */
425char *buf, /* output string */
426int base) /* conversion base */
427{
428 ldiv_t r; /* result of val / base */
429
430 if (base > 36 || base < 2) /* no conversion if wrong base */
431 {
432 *buf = '\0';
433 return buf;
434 }
435 if (val < 0) *buf++ = '-';
436 r = ldiv(labs(val), base);
437
438 /* output digits of val/base first */
439
440 if (r.quot > 0) buf = fastpidconverter(r.quot, buf, base);
441 /* output last digit */
442
443 *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int) r.rem];
444 *buf = '\0';
445 return buf;
446}
447
449{
450 if (!key.empty()) {
451 string::size_type first = key.find_first_not_of(" \t\n\r");
452 string::size_type last = key.find_last_not_of(" \t\n\r");
453 if (first == string::npos)
454 key = "";
455 else {
456 string::size_type num = last - first + 1;
457 string new_key = key.substr(first, num);
458 key = new_key;
459 }
460 }
461}
462
463string BESUtil::entity(char c)
464{
465 switch (c) {
466 case '>':
467 return "&gt;";
468 case '<':
469 return "&lt;";
470 case '&':
471 return "&amp;";
472 case '\'':
473 return "&apos;";
474 case '\"':
475 return "&quot;";
476 default:
477 return string(1, c); // is this proper default, just the char?
478 }
479}
480
487string BESUtil::id2xml(string in, const string &not_allowed)
488{
489 string::size_type i = 0;
490
491 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
492 in.replace(i, 1, entity(in[i]));
493 i++;
494 }
495
496 return in;
497}
498
504string BESUtil::xml2id(string in)
505{
506 string::size_type i = 0;
507
508 while ((i = in.find("&gt;", i)) != string::npos)
509 in.replace(i, 4, ">");
510
511 i = 0;
512 while ((i = in.find("&lt;", i)) != string::npos)
513 in.replace(i, 4, "<");
514
515 i = 0;
516 while ((i = in.find("&amp;", i)) != string::npos)
517 in.replace(i, 5, "&");
518
519 i = 0;
520 while ((i = in.find("&apos;", i)) != string::npos)
521 in.replace(i, 6, "'");
522
523 i = 0;
524 while ((i = in.find("&quot;", i)) != string::npos)
525 in.replace(i, 6, "\"");
526
527 return in;
528}
529
543void BESUtil::explode(char delim, const string &str, list<string> &values)
544{
545 std::string::size_type start = 0;
546 std::string::size_type qstart = 0;
547 std::string::size_type adelim = 0;
548 std::string::size_type aquote = 0;
549 bool done = false;
550 while (!done) {
551 string aval;
552 if (str[start] == '"') {
553 bool endquote = false;
554 qstart = start + 1;
555 while (!endquote) {
556 aquote = str.find('"', qstart);
557 if (aquote == string::npos) {
558 string currval = str.substr(start, str.size() - start);
559 string err = "BESUtil::explode - No end quote after value " + currval;
560 throw BESInternalError(err, __FILE__, __LINE__);
561 }
562 // could be an escaped escape character and an escaped
563 // quote, or an escaped escape character and a quote
564 if (str[aquote - 1] == '\\') {
565 if (str[aquote - 2] == '\\') {
566 endquote = true;
567 qstart = aquote + 1;
568 }
569 else {
570 qstart = aquote + 1;
571 }
572 }
573 else {
574 endquote = true;
575 qstart = aquote + 1;
576 }
577 }
578 if (str[qstart] != delim && qstart != str.size()) {
579 string currval = str.substr(start, qstart - start);
580 string err = "BESUtil::explode - No delim after end quote " + currval;
581 throw BESInternalError(err, __FILE__, __LINE__);
582 }
583 if (qstart == str.size()) {
584 adelim = string::npos;
585 }
586 else {
587 adelim = qstart;
588 }
589 }
590 else {
591 adelim = str.find(delim, start);
592 }
593 if (adelim == string::npos) {
594 aval = str.substr(start, str.size() - start);
595 done = true;
596 }
597 else {
598 aval = str.substr(start, adelim - start);
599 }
600
601 values.push_back(aval);
602 start = adelim + 1;
603 if (start == str.size()) {
604 values.push_back("");
605 done = true;
606 }
607 }
608}
609
620string BESUtil::implode(const list<string> &values, char delim)
621{
622 string result;
623 list<string>::const_iterator i = values.begin();
624 list<string>::const_iterator e = values.end();
625 bool first = true;
626 string::size_type d; // = string::npos ;
627 for (; i != e; i++) {
628 if (!first) result += delim;
629 d = (*i).find(delim);
630 if (d != string::npos && (*i)[0] != '"') {
631 string err = (string) "BESUtil::implode - delimiter exists in value " + (*i);
632 throw BESInternalError(err, __FILE__, __LINE__);
633 }
634 //d = string::npos ;
635 result += (*i);
636 first = false;
637 }
638 return result;
639}
640
660void BESUtil::url_explode(const string &url_str, BESUtil::url &url_parts)
661{
662 string rest;
663
664 string::size_type colon = url_str.find(":");
665 if (colon == string::npos) {
666 string err = "BESUtil::url_explode: missing colon for protocol";
667 throw BESInternalError(err, __FILE__, __LINE__);
668 }
669
670 url_parts.protocol = url_str.substr(0, colon);
671
672 if (url_str.substr(colon, 3) != "://") {
673 string err = "BESUtil::url_explode: no :// in the URL";
674 throw BESInternalError(err, __FILE__, __LINE__);
675 }
676
677 colon += 3;
678 rest = url_str.substr(colon);
679
680 string::size_type slash = rest.find("/");
681 if (slash == string::npos) slash = rest.size();
682
683 string::size_type at = rest.find("@");
684 if ((at != string::npos) && (at < slash)) {
685 // everything before the @ is username:password
686 string up = rest.substr(0, at);
687 colon = up.find(":");
688 if (colon != string::npos) {
689 url_parts.uname = up.substr(0, colon);
690 url_parts.psswd = up.substr(colon + 1);
691 }
692 else {
693 url_parts.uname = up;
694 }
695 // everything after the @ is domain/path
696 rest = rest.substr(at + 1);
697 }
698 slash = rest.find("/");
699 if (slash == string::npos) slash = rest.size();
700 colon = rest.find(":");
701 if ((colon != string::npos) && (colon < slash)) {
702 // everything before the colon is the domain
703 url_parts.domain = rest.substr(0, colon);
704 // everything after the folon is port/path
705 rest = rest.substr(colon + 1);
706 slash = rest.find("/");
707 if (slash != string::npos) {
708 url_parts.port = rest.substr(0, slash);
709 url_parts.path = rest.substr(slash + 1);
710 }
711 else {
712 url_parts.port = rest;
713 url_parts.path = "";
714 }
715 }
716 else {
717 slash = rest.find("/");
718 if (slash != string::npos) {
719 url_parts.domain = rest.substr(0, slash);
720 url_parts.path = rest.substr(slash + 1);
721 }
722 else {
723 url_parts.domain = rest;
724 }
725 }
726}
727
728string BESUtil::url_create(BESUtil::url &url_parts)
729{
730 string url = url_parts.protocol + "://";
731 if (!url_parts.uname.empty()) {
732 url += url_parts.uname;
733 if (!url_parts.psswd.empty()) url += ":" + url_parts.psswd;
734 url += "@";
735 }
736 url += url_parts.domain;
737 if (!url_parts.port.empty()) url += ":" + url_parts.port;
738 if (!url_parts.path.empty()) url += "/" + url_parts.path;
739
740 return url;
741}
742
743
754string BESUtil::pathConcat(const string &firstPart, const string &secondPart, char separator)
755{
756 string first = firstPart;
757 string second = secondPart;
758 string sep(1,separator);
759
760 // make sure there are not multiple slashes at the end of the first part...
761 // Note that this removes all the slashes. jhrg 9/27/16
762 while (!first.empty() && *first.rbegin() == separator) {
763 // C++-11 first.pop_back();
764 first = first.substr(0, first.size() - 1);
765 }
766 // make sure second part does not BEGIN with a slash
767 while (!second.empty() && second[0] == separator) {
768 // erase is faster? second = second.substr(1);
769 second.erase(0, 1);
770 }
771 string newPath;
772 if (first.empty()) {
773 newPath = second;
774 }
775 else if (second.empty()) {
776 newPath = first;
777 }
778 else {
779 newPath = first.append(sep).append(second);
780 }
781 return newPath;
782}
783
804string BESUtil::assemblePath(const string &firstPart, const string &secondPart, bool leadingSlash, bool trailingSlash)
805{
806 BESDEBUG(MODULE, prolog << "firstPart: '" << firstPart << "'" << endl);
807 BESDEBUG(MODULE, prolog << "secondPart: '" << secondPart << "'" << endl);
808
809 string newPath = BESUtil::pathConcat(firstPart, secondPart);
810 if (leadingSlash) {
811 if (newPath.empty()) {
812 newPath = "/";
813 }
814 else if (newPath.front() != '/') {
815 newPath = "/" + newPath;
816 }
817 }
818
819 if (trailingSlash) {
820 if (newPath.empty() || newPath.back() != '/') {
821 newPath.append("/");
822 }
823 }
824 else {
825 while (!newPath.empty() && newPath.back() == '/')
826 newPath.erase(newPath.size()-1);
827 }
828
829 BESDEBUG(MODULE, prolog << "newPath: " << newPath << endl);
830 return newPath;
831}
832
837bool BESUtil::endsWith(string const &fullString, string const &ending)
838{
839 if (fullString.size() >= ending.size()) {
840 return (0 == fullString.compare(fullString.size() - ending.size(), ending.size(), ending));
841 }
842 else {
843 return false;
844 }
845}
846
869{
870 if (RequestServiceTimer::TheTimer()->is_expired()) {
871 stringstream msg;
872 msg << "The submitted request took too long to service.";
873 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
874 }
875}
876
899{
900 const string false_str = "false";
901 const string no_str = "no";
902
903 bool cancel_timeout_on_send = true;
904 bool found = false;
905 string value;
906
907 TheBESKeys::TheKeys()->get_value(BES_KEY_TIMEOUT_CANCEL, value, found);
908 if (found) {
909 value = BESUtil::lowercase(value);
910 if ( value == false_str || value == no_str) cancel_timeout_on_send = false;
911 }
912 BESDEBUG(MODULE, __func__ << "() - cancel_timeout_on_send: " << (cancel_timeout_on_send ? "true" : "false") << endl);
913 if (cancel_timeout_on_send) {
915 alarm(0);
916 }
917}
918
924unsigned int BESUtil::replace_all(string &s, string find_this, string replace_with_this)
925{
926 unsigned int replace_count = 0;
927 size_t pos = s.find(find_this);
928 while (pos != string::npos) {
929 // Replace current matching substring
930 s.replace(pos, find_this.size(), replace_with_this);
931 // Get the next occurrence from current position
932 pos = s.find(find_this, pos + replace_with_this.size());
933 replace_count++;
934 }
935 return replace_count;
936}
937
949string BESUtil::normalize_path(const string &raw_path, bool leading_separator, bool trailing_separator, const string separator /* = "/" */)
950{
951 if (separator.size() != 1)
952 throw BESInternalError("Path separators must be a single character. The string '" + separator + "' does not qualify.", __FILE__, __LINE__);
953 char separator_char = separator[0];
954 string double_separator;
955 double_separator = double_separator.append(separator).append(separator);
956
957 string path(raw_path);
958
959 replace_all(path, double_separator, separator);
960
961 if (path.empty()) {
962 path = separator;
963 }
964 if (path == separator) {
965 return path;
966 }
967 if (leading_separator) {
968 if (path[0] != separator_char) {
969 path = string(separator).append(path);
970 }
971 }
972 else {
973 if (path[0] == separator_char) {
974 path = path.substr(1);
975 }
976 }
977 if (trailing_separator) {
978 if (*path.rbegin() != separator_char) {
979 path = path.append(separator);
980 }
981 }
982 else {
983 if (*path.rbegin() == separator_char) {
984 path = path.substr(0, path.size() - 1);
985 }
986 }
987 return path;
988}
989
995void BESUtil::tokenize(const string& str, vector<string>& tokens, const string& delimiters /* = "/" */)
996{
997 // Skip delimiters at beginning.
998 string::size_type lastPos = str.find_first_not_of(delimiters, 0);
999 // Find first "non-delimiter".
1000 string::size_type pos = str.find_first_of(delimiters, lastPos);
1001 while (string::npos != pos || string::npos != lastPos) {
1002 // Found a token, add it to the vector.
1003 tokens.push_back(str.substr(lastPos, pos - lastPos));
1004 // Skip delimiters. Note the "not_of"
1005 lastPos = str.find_first_not_of(delimiters, pos);
1006 // Find next "non-delimiter"
1007 pos = str.find_first_of(delimiters, lastPos);
1008 }
1009}
1010
1017string BESUtil::get_time(bool use_local_time)
1018{
1019 return get_time(time(0), use_local_time);
1020}
1021
1029string BESUtil::get_time(time_t the_time, bool use_local_time)
1030{
1031 char buf[sizeof "YYYY-MM-DDTHH:MM:SS zones"];
1032 int status = 0;
1033
1034 // From StackOverflow:
1035 // This will work too, if your compiler doesn't support %F or %T:
1036 // strftime(buf, sizeof buf, "%Y-%m-%dT%H:%M:%S%Z", gmtime(&now));
1037 //
1038 // UTC is the default. Override to local time based on the
1039 // passed parameter 'use_local_time'
1040 struct tm result{};
1041 if (!use_local_time) {
1042 gmtime_r(&the_time, &result);
1043 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
1044 }
1045 else {
1046 localtime_r(&the_time, &result);
1047 status = strftime(buf, sizeof buf, "%FT%T%Z", &result);
1048 }
1049
1050 if (!status) {
1051 ERROR_LOG(prolog + "Error formatting time value!");
1052 return "date-format-error";
1053 }
1054
1055 return buf;
1056}
1057
1068vector<string> BESUtil::split(const string &s, char delim /* '/' */, bool skip_empty /* true */)
1069{
1070 stringstream ss(s);
1071 string item;
1072 vector<string> tokens;
1073
1074 while (getline(ss, item, delim)) {
1075
1076 if (item.empty() && skip_empty)
1077 continue;
1078
1079 tokens.push_back(item);
1080 }
1081
1082 return tokens;
1083}
1084
1085BESCatalog *BESUtil::separateCatalogFromPath(std::string &ppath)
1086{
1087 BESCatalog *catalog = 0; // pointer to a singleton; do not delete
1088 vector<string> path_tokens;
1089
1090 // BESUtil::normalize_path() removes duplicate separators and adds leading and trailing separators as directed.
1091 string path = BESUtil::normalize_path(ppath, false, false);
1092 BESDEBUG(MODULE, prolog << "Normalized path: " << path << endl);
1093
1094 // Because we may need to alter the container/file/resource name by removing
1095 // a catalog name from the first node in the path we use "use_container" to store
1096 // the altered container path.
1097 string use_container = ppath;
1098
1099 // Breaks path into tokens
1100 BESUtil::tokenize(path, path_tokens);
1101 if (!path_tokens.empty()) {
1102 BESDEBUG(MODULE, "First path token: " << path_tokens[0] << endl);
1103 catalog = BESCatalogList::TheCatalogList()->find_catalog(path_tokens[0]);
1104 if (catalog) {
1105 BESDEBUG(MODULE, prolog << "Located catalog " << catalog->get_catalog_name() << " from path component" << endl);
1106 // Since the catalog name is in the path we
1107 // need to drop it this should leave container
1108 // with a leading
1109 ppath = BESUtil::normalize_path(path.substr(path_tokens[0].size()), true, false);
1110 BESDEBUG(MODULE, prolog << "Modified container/path value to: " << use_container << endl);
1111 }
1112 }
1113
1114 return catalog;
1115}
1116
1117void ios_state_msg(std::ios &ios_ref, std::stringstream &msg) {
1118 msg << " {ios.good()=" << (ios_ref.good() ? "true" : "false") << "}";
1119 msg << " {ios.eof()=" << (ios_ref.eof()?"true":"false") << "}";
1120 msg << " {ios.fail()=" << (ios_ref.fail()?"true":"false") << "}";
1121 msg << " {ios.bad()=" << (ios_ref.bad()?"true":"false") << "}";
1122}
1123
1124// size of the buffer used to read from the temporary file built on disk and
1125// send data to the client over the network connection (socket/stream)
1126#define OUTPUT_FILE_BLOCK_SIZE 4096
1127
1136uint64_t BESUtil::file_to_stream(const std::string &file_name, std::ostream &o_strm, uint64_t read_start_position)
1137{
1138#ifndef NDEBUG
1139 stringstream msg;
1140 msg << prolog << "Using ostream: " << (void *) &o_strm << " cout: " << (void *) &cout << endl;
1141 BESDEBUG(MODULE, msg.str());
1142 INFO_LOG( msg.str());
1143#endif
1144
1145 vector<char> rbuffer(OUTPUT_FILE_BLOCK_SIZE);
1146 std::ifstream i_stream(file_name, std::ios_base::in | std::ios_base::binary); // Use binary mode so we can
1147
1148 // good() returns true if !(eofbit || badbit || failbit)
1149 if(!i_stream.good()){
1150 stringstream msg;
1151 msg << prolog << "Failed to open file " << file_name;
1152 ios_state_msg(i_stream, msg);
1153 BESDEBUG(MODULE, msg.str() << endl);
1154 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1155 }
1156
1157 // good() returns true if !(eofbit || badbit || failbit)
1158 if(!o_strm.good()){
1159 stringstream msg;
1160 msg << prolog << "Problem with ostream. " << file_name;
1161 ios_state_msg(i_stream, msg);
1162 BESDEBUG(MODULE, msg.str() << endl);
1163 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1164 }
1165 // this is where we advance to the last byte that was read
1166 i_stream.seekg(read_start_position);
1167
1168 //vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
1169 // This is where the file is copied.
1170 uint64_t tcount = 0;
1171 while (i_stream.good() && o_strm.good()){
1172 i_stream.read(rbuffer.data(), OUTPUT_FILE_BLOCK_SIZE); // Read at most n bytes into
1173 o_strm.write(rbuffer.data(), i_stream.gcount()); // buf, then write the buf to
1174 tcount += i_stream.gcount();
1175 }
1176 o_strm.flush();
1177
1178 // fail() is true if failbit || badbit got set, but does not consider eofbit
1179 if(i_stream.fail() && !i_stream.eof()){
1180 stringstream msg;
1181 msg << prolog << "There was an ifstream error when reading from: " << file_name;
1182 ios_state_msg(i_stream, msg);
1183 msg << " last_lap: " << i_stream.gcount() << " bytes";
1184 msg << " total_read: " << tcount << " bytes";
1185 BESDEBUG(MODULE, msg.str() << endl);
1186 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1187 }
1188
1189 // If we're not at the eof of the input stream then we have failed.
1190 if (!i_stream.eof()){
1191 stringstream msg;
1192 msg << prolog << "Failed to reach EOF on source file: " << file_name;
1193 ios_state_msg(i_stream, msg);
1194 msg << " last_lap: " << i_stream.gcount() << " bytes";
1195 msg << " total_read: " << tcount << " bytes";
1196 BESDEBUG(MODULE, msg.str() << endl);
1197 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1198 }
1199
1200 // And if something went wrong on the output stream we have failed.
1201 if(!o_strm.good()){
1202 stringstream msg;
1203 msg << prolog << "There was an ostream error during transmit. Transmitted " << tcount << " bytes.";
1204 ios_state_msg(o_strm, msg);
1205 auto crntpos = o_strm.tellp();
1206 msg << " current_position: " << crntpos << endl;
1207 BESDEBUG(MODULE, msg.str());
1208 ERROR_LOG(msg.str());
1209 }
1210
1211#ifndef NDEBUG
1212 msg.str("");
1213 msg << prolog << "Sent "<< tcount << " bytes from file '" << file_name<< "'. " << endl;
1214 BESDEBUG(MODULE,msg.str());
1215 INFO_LOG(msg.str());
1216#endif
1217
1218 return tcount;
1219}
1220
1222bool BESUtil::is_directory(const string &p) {
1223 struct stat st{};
1224 if (stat(p.c_str(), &st) == 0) {
1225 return S_ISDIR(st.st_mode);
1226 }
1227 return false;
1228}
1229
1235string BESUtil::get_dir_name(const string &p) {
1236 size_t pos = p.find_last_of('/');
1237 if (pos == string::npos) {
1238 return ".";
1239 }
1240 else if (pos == 0) {
1241 return "/";
1242 }
1243 else {
1244 return p.substr(0, pos);
1245 }
1246}
1247
1254int BESUtil::mkdir_p(const string &path, mode_t mode) {
1255 if (path.empty()) {
1256 return 0;
1257 }
1258
1259 string p = path;
1260 if (p[p.size() - 1] == '/') {
1261 p = p.substr(0, p.size() - 1);
1262 }
1263
1264 if (p.empty()) {
1265 return 0;
1266 }
1267
1268 if (is_directory(p)) {
1269 return 0;
1270 }
1271
1272 int rc = mkdir_p(get_dir_name(p), mode);
1273 if (rc == 0) {
1274 rc = mkdir(p.c_str(), mode);
1275 }
1276
1277 return rc;
1278}
1279
1280string BESUtil::file_to_string(const string &filename) {
1281 std::ifstream t(filename);
1282 if (!t.is_open()) {
1283 throw BESInternalError("Could not open file: " + filename, __FILE__, __LINE__);
1284 }
1285 std::stringstream buffer;
1286 buffer << t.rdbuf();
1287 return buffer.str();
1288}
1289
1297int BESUtil::make_temp_file(const string &temp_file_dir, string &temp_file_name) {
1298 temp_file_name = BESUtil::assemblePath(temp_file_dir, "/bes_util_XXXXXX");
1299
1300 // Open truncated for update. NB: mkstemp() returns a file descriptor.
1301 // man mkstemp says "... The file is opened with the O_EXCL flag,
1302 // guaranteeing that when mkstemp returns successfully we are the only
1303 // user." 09/19/02 jhrg
1304 // The 'hack' &temp_file_name[0] is explicitly supported by the C++ 11 standard.
1305 // jhrg 3/9/23
1306 int fd = mkstemp(&temp_file_name[0]); // fd mode is 666 or 600 (Unix)
1307 if (fd < 0) {
1308 throw BESInternalError(string("mkstemp() for ") + temp_file_name + " failed (" + strerror(errno) + ").",
1309 __FILE__, __LINE__);
1310 }
1311
1312 return fd;
1313}
1314
1321void BESUtil::string_to_file(const string &filename, const string &content) {
1322 std::ofstream t(filename, std::ios::out | std::ios::trunc);
1323 if (!t.is_open()) {
1324 throw BESInternalError("Could not open file: " + filename, __FILE__, __LINE__);
1325 }
1326 t << content;
1327}
1328
1334std::string &BESUtil::remove_crlf(std::string &str) {
1335 const auto the_bad_things ="\r\n";
1336 size_t pos = 0;
1337 while ((pos = str.find_first_of(the_bad_things, pos)) != std::string::npos) {
1338 str[pos] = ' ';
1339 }
1340 return str;
1341}
1342
1343
1344std::string BESUtil::uuid() {
1345 uuid_t raw_uuid;
1346 uuid_generate_random(raw_uuid);
1347 char uuid_str[37];
1348 uuid_unparse_lower(raw_uuid, uuid_str);
1349 return {uuid_str};
1350}
1351
1352
Catalogs provide a hierarchical organization for data.
Definition BESCatalog.h:51
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition BESCatalog.h:102
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition BESUtil.cc:1068
static void explode(char delim, const std::string &str, std::list< std::string > &values)
Definition BESUtil.cc:543
static long get_current_memory_usage() noexcept
Get the Resident Set Size in KB.
Definition BESUtil.cc:89
static void url_explode(const std::string &url_str, BESUtil::url &url_parts)
Given a url, break the url into its different parts.
Definition BESUtil.cc:660
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition BESUtil.cc:837
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition BESUtil.cc:995
static std::string get_dir_name(const std::string &p)
Definition BESUtil.cc:1235
static void set_mime_text(std::ostream &strm)
Generate an HTTP 1.0 response header for a text document.
Definition BESUtil.cc:139
static std::string id2xml(std::string in, const std::string &not_allowed="><&'\"")
Definition BESUtil.cc:487
static void conditional_timeout_cancel()
Checks if the timeout alarm should be canceled based on the value of the BES key BES....
Definition BESUtil.cc:898
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Is the combination of root + path a pathname the BES can/should access?
Definition BESUtil.cc:385
static int mkdir_p(const std::string &path, mode_t mode)
Definition BESUtil.cc:1254
static void exit_on_request_timeout()
Checks if the timeout alarm should be canceled based on the value of the BES key BES....
Definition BESUtil.cc:868
static unsigned int replace_all(std::string &s, std::string find_this, std::string replace_with_this)
Operates on the string 's' to replaces every occurrence of the value of the string 'find_this' with t...
Definition BESUtil.cc:924
static void set_mime_html(std::ostream &strm)
Generate an HTTP 1.0 response header for a html document.
Definition BESUtil.cc:158
static std::string lowercase(const std::string &s)
Definition BESUtil.cc:257
static bool is_directory(const std::string &p)
Is the given path a directory?
Definition BESUtil.cc:1222
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:754
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition BESUtil.cc:804
static std::string www2id(const std::string &in, const std::string &escape="%", const std::string &except="")
Definition BESUtil.cc:242
static std::string implode(const std::list< std::string > &values, char delim)
Definition BESUtil.cc:620
static void trim_if_surrounding_quotes(std::string &value)
Remove double quotes around a string This function will remove a leading and/or trailing double quote...
Definition BESUtil.cc:126
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition BESUtil.cc:949
static std::string xml2id(std::string in)
Definition BESUtil.cc:504
static uint64_t file_to_stream(const std::string &file_name, std::ostream &o_strm, uint64_t read_start_position=0)
Copies the contents of the file identified by file_name to the stream o_strm.
Definition BESUtil.cc:1136
static void trim_if_trailing_slash(std::string &value)
If the string ends in a slash, remove it This function works for empty strings (doing nothing)....
Definition BESUtil.cc:113
static std::string unescape(const std::string &s)
Definition BESUtil.cc:267
static void string_to_file(const std::string &filename, const std::string &content)
Write a string to a file.
Definition BESUtil.cc:1321
static char * fastpidconverter(char *buf, int base)
Definition BESUtil.cc:418
static void removeLeadingAndTrailingBlanks(std::string &key)
Definition BESUtil.cc:448
static std::string & remove_crlf(std::string &str)
"Sanitizes" the string by replacing any 0x0A (new line) or 0x0D (carriage return) characters with 0x2...
Definition BESUtil.cc:1334
static int make_temp_file(const std::string &temp_file_dir, std::string &temp_file_name)
Make and open a temporary file. The file is opened such that we know it is unique and not in use by a...
Definition BESUtil.cc:1297
static std::string get_time(bool use_local_time=false)
Definition BESUtil.cc:1017
static RequestServiceTimer * TheTimer()
Return a pointer to a singleton timer instance. If an instance does not exist it will create and init...
void disable_timeout()
Set the time_out is disabled.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
STL iterator class.