41#include "BESCatalogList.h" 
   47using std::chrono::system_clock;
 
   49#define MODULE HTTP_MODULE 
   50#define prolog string("url::").append(__func__).append("() - ") 
   60    const string protocol_end(
"://");
 
   61    BESDEBUG(MODULE, prolog << 
"BEGIN (parsing: '" << d_source_url_str << 
"')" << endl);
 
   67    if(d_source_url_str.find(protocol_end) == string::npos){
 
   70        const BESCatalogList *bcl = BESCatalogList::TheCatalogList();
 
   72        BESDEBUG(MODULE, prolog << 
"Searching for  catalog: " << default_catalog_name << endl);
 
   73        const BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
 
   75            BESDEBUG(MODULE, prolog << 
"Found catalog: " << bcat->
get_catalog_name() << endl);
 
   77            string msg = 
"OUCH! Unable to locate default catalog!";
 
   78            BESDEBUG(MODULE, prolog << msg << endl);
 
   79            throw BESInternalError(msg, __FILE__, __LINE__);
 
   81        string catalog_root = bcat->
get_root();
 
   82        BESDEBUG(MODULE, prolog << 
"Catalog root: " << catalog_root << endl);
 
   85        if(file_path[0] != 
'/')
 
   86            file_path = 
"/" + file_path;
 
   87        d_source_url_str = FILE_PROTOCOL + file_path;
 
   90    const string parse_url_target(d_source_url_str);
 
   92    auto prot_i = search(parse_url_target.cbegin(), parse_url_target.cend(),
 
   93                         protocol_end.begin(), protocol_end.end());
 
   95    if (prot_i != parse_url_target.end())
 
   96        advance(prot_i, protocol_end.size());
 
   98    d_protocol.reserve(distance(parse_url_target.begin(), prot_i));
 
   99    transform(parse_url_target.begin(), prot_i,
 
  100              back_inserter(d_protocol),
 
  101              [](
int c) { 
return tolower(c); }); 
 
  102    if (prot_i == parse_url_target.end())
 
  105    if (d_protocol == FILE_PROTOCOL) {
 
  106        d_path = parse_url_target.substr(d_protocol.size());
 
  107        BESDEBUG(MODULE, prolog << 
"FILE_PROTOCOL d_path: " << d_path << endl);
 
  109    else if( d_protocol == HTTP_PROTOCOL || d_protocol == HTTPS_PROTOCOL){
 
  111        const auto path_i = find(prot_i, parse_url_target.cend(), 
'/');
 
  112        d_host.reserve(distance(prot_i, path_i));
 
  113        transform(prot_i, path_i, back_inserter(d_host), [](
int c) { 
return tolower(c); });
 
  115        auto query_i = find(path_i, parse_url_target.cend(), 
'?');
 
  116        d_path.assign(path_i, query_i);
 
  118        if (query_i != parse_url_target.cend())
 
  120        d_query.assign(query_i, parse_url_target.cend());
 
  123        if (!d_query.empty()) {
 
  124            parse_query_string();
 
  129        msg << prolog << 
"Unsupported URL protocol " << d_protocol << 
" found in URL: " << d_source_url_str;
 
  130        BESDEBUG(MODULE, msg.str() << endl);
 
  131        throw BESInternalError(msg.str(), __FILE__, __LINE__);
 
  134    BESDEBUG(MODULE, prolog << 
"END (parsing: '" << d_source_url_str << 
"')" << endl);
 
  140void url::parse_query_string() {
 
  141    vector<string> records;
 
  142    string delimiters = 
"&";
 
  144    for (
const auto &kvp: records) {
 
  145        size_t index = kvp.find(
'=');
 
  146        if (index != string::npos) {
 
  147            string key = kvp.substr(0, index);
 
  148            string value = kvp.substr(index + 1);
 
  149            BESDEBUG(MODULE, prolog << 
"key: " << key << 
" value: " << value << endl);
 
  151            const auto &record_it = d_query_kvp.find(key);
 
  152            if (record_it != d_query_kvp.end()) {
 
  153                record_it->second.push_back(value);
 
  155                vector<string> values{value};
 
  156                d_query_kvp[key] = values;
 
  168    const auto &it = d_query_kvp.find(key);
 
  169    if (it != d_query_kvp.end()) {
 
  170        vector<string> values = it->second;
 
  171        if (!it->second.empty()) {
 
  172            return  it->second[0];
 
 
  184    const auto &it = d_query_kvp.find(key);
 
  185    if (it != d_query_kvp.end()) {
 
  186        return it->second.size();
 
 
  199    const auto &it = d_query_kvp.find(key);
 
  200    if (it != d_query_kvp.end()) {
 
  204        throw BESInternalError(
string(
"Key '") + key + 
"' not found in url::query_parameter_values().", __FILE__, __LINE__);
 
 
  215    std::time_t now = system_clock::to_time_t(system_clock::now());
 
  217    BESDEBUG(MODULE, prolog << 
"now: " << now << endl);
 
  219    std::time_t expires_time = ingest_time() + HTTP_EFFECTIVE_URL_DEFAULT_EXPIRES_INTERVAL;
 
  224    if (!cf_expires.empty()) { 
 
  225        std::istringstream(cf_expires) >> expires_time;
 
  226        BESDEBUG(MODULE, prolog << 
"Using " << CLOUDFRONT_EXPIRES_HEADER_KEY << 
": " << expires_time << endl);
 
  228    else if (!aws_expires_str.empty()) {
 
  229        long long aws_expires;
 
  230        std::istringstream(aws_expires_str) >> aws_expires;
 
  234        std::time_t aws_start_time = ingest_time();
 
  239        if (!aws_date.empty()) {
 
  241            string year = aws_date.substr(0, 4);
 
  242            string month = aws_date.substr(4, 2);
 
  243            string day = aws_date.substr(6, 2);
 
  244            string hour = aws_date.substr(9, 2);
 
  245            string minute = aws_date.substr(11, 2);
 
  246            string second = aws_date.substr(13, 2);
 
  248            BESDEBUG(MODULE, prolog << 
"date: " << aws_date <<
 
  249                                    " year: " << year << 
" month: " << month << 
" day: " << day <<
 
  250                                    " hour: " << hour << 
" minute: " << minute << 
" second: " << second << endl);
 
  254            BESDEBUG(MODULE, prolog << 
"old_now: " << old_now << endl);
 
  256            gmtime_r(&old_now, &ti);
 
  257            ti.tm_year = stoi(year) - 1900;
 
  258            ti.tm_mon = stoi(month) - 1;
 
  259            ti.tm_mday = stoi(day);
 
  260            ti.tm_hour = stoi(hour);
 
  261            ti.tm_min = stoi(minute);
 
  262            ti.tm_sec = stoi(second);
 
  264            BESDEBUG(MODULE, prolog << 
"ti.tm_year: " << ti.tm_year <<
 
  265                                    " ti.tm_mon: " << ti.tm_mon <<
 
  266                                    " ti.tm_mday: " << ti.tm_mday <<
 
  267                                    " ti.tm_hour: " << ti.tm_hour <<
 
  268                                    " ti.tm_min: " << ti.tm_min <<
 
  269                                    " ti.tm_sec: " << ti.tm_sec << endl);
 
  271            aws_start_time = mktime(&ti);
 
  272            BESDEBUG(MODULE, prolog << 
"AWS start_time (computed): " << aws_start_time << endl);
 
  275        expires_time = aws_start_time + aws_expires;
 
  276        BESDEBUG(MODULE, prolog << 
"Using " << AMS_EXPIRES_HEADER_KEY << 
": " << aws_expires <<
 
  277                                " (expires_time: " << expires_time << 
")" << endl);
 
  280    std::time_t remaining = expires_time - now;
 
  281    BESDEBUG(MODULE, prolog << 
"expires_time: " << expires_time <<
 
  282                            "  remaining: " << remaining <<
 
  283                            " threshold: " << HTTP_URL_REFRESH_THRESHOLD << endl);
 
  285    stale = remaining < HTTP_URL_REFRESH_THRESHOLD;
 
  286    BESDEBUG(MODULE, prolog << 
"stale: " << (stale ? 
"true" : 
"false") << endl);
 
 
  299    ss << 
"http::url [" << 
this << 
"] " << endl;
 
  300    ss << indent << 
"d_source_url_str: " << d_source_url_str << endl;
 
  301    ss << indent << 
"d_protocol:   " << d_protocol << endl;
 
  302    ss << indent << 
"d_host:       " << d_host << endl;
 
  303    ss << indent << 
"d_path:       " << d_path << endl;
 
  304    ss << indent << 
"d_query:      " << d_query << endl;
 
  306    string idt = indent+indent;
 
  307    for(
const auto &it: d_query_kvp) {
 
  308        ss << indent << 
"d_query_kvp["<<it.first<<
"]: " << endl;
 
  310        for(
const auto &v: it.second) { 
 
  311            ss << idt << 
"value[" << i << 
"]: " << v << endl;
 
  315    ss << indent << 
"d_ingest_time:      " << d_ingest_time.time_since_epoch().count() << endl;
 
 
virtual std::string default_catalog_name() const
The name of the default catalog.
 
virtual std::string get_root() const =0
 
virtual std::string get_catalog_name() const
Get the name for this catalog.
 
exception thrown if internal error encountered
 
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
 
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
 
virtual size_t query_parameter_values_size(const std::string &key) const
Return the number of query string values for a given key .
 
virtual std::string query_parameter_value(const std::string &key) const
Get the value of a query string key.
 
virtual std::string dump()
 
virtual const std::vector< std::string > & query_parameter_values(const std::string &key) const
Get the vector of query string values for a given key.
 
virtual bool is_expired()
 
utility class for the HTTP catalog module