bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
CmrCatalog.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2//
3// CMRCatalog.cc
4//
5// This file is part of BES cmr_module
6//
7// Copyright (c) 2018 OPeNDAP, Inc.
8// Author: Nathan Potter <ndp@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25// Please read the full copyright statement in the file COPYRIGHT_URI.
26//
27
28#include "config.h"
29
30#include <sys/types.h>
31#include <sys/stat.h>
32#include <dirent.h>
33
34#include <cstring>
35#include <cerrno>
36
37#include <sstream>
38#include <cassert>
39
40#include <memory>
41#include <algorithm>
42
43
44#include "BESUtil.h"
45#include "BESCatalogUtils.h"
46#include "BESCatalogEntry.h"
47
48
49#include "BESInfo.h"
50#include "BESContainerStorageList.h"
51#include "BESFileContainerStorage.h"
52#include "BESLog.h"
53
54#include "BESInternalError.h"
55#include "BESForbiddenError.h"
56#include "BESNotFoundError.h"
57#include "BESSyntaxUserError.h"
58
59#include "TheBESKeys.h"
60#include "BESDebug.h"
61
62#include "CatalogNode.h"
63#include "CatalogItem.h"
64
65#include "CmrApi.h"
66#include "CmrNames.h"
67#include "CmrCatalog.h"
68
69using namespace bes;
70using namespace std;
71
72#define prolog std::string("CmrCatalog::").append(__func__).append("() - ")
73
74namespace cmr {
75
87CmrCatalog::CmrCatalog(const std::string &name /* = “CMR” */) : BESCatalog(name) {
88 bool found = false;
89 TheBESKeys::TheKeys()->get_values(CMR_COLLECTIONS_KEY, d_collections, found);
90 if(!found){
91 throw BESInternalError(string("The CMR module must define at least one collection name using the key; '") + CMR_COLLECTIONS_KEY
92 + "'", __FILE__, __LINE__);
93 }
94
95 found = false;
96 TheBESKeys::TheKeys()->get_values(CMR_FACETS_KEY, d_facets, found);
97 if(!found){
98 throw BESInternalError(string("The CMR module must define at least one facet name using the key; '") + CMR_COLLECTIONS_KEY
99 + "'", __FILE__, __LINE__);
100 }
101}
102
103bes::CatalogNode * CmrCatalog::get_providers_node() const
104{
105 CmrApi cmrApi;
106 bes::CatalogNode *node;
107 string epoch_time = BESUtil::get_time(0,false);
108
109 node = new CatalogNode("/");
110 node->set_lmt(epoch_time);
111 node->set_catalog_name(CMR_CATALOG_NAME);
112 map<string, unique_ptr<Provider>> providers;
113 cmrApi.get_opendap_providers(providers);
114 for (const auto &provider : providers ) {
115 auto *collection = new CatalogItem();
116 collection->set_name(provider.second->id());
117 collection->set_description(provider.second->description_of_holding());
118 collection->set_type(CatalogItem::node);
119 node->add_node(collection);
120 }
121 return node;
122}
123
124
125bes::CatalogNode *CmrCatalog::get_collections_node(const string &path, const string &provider_id) const
126{
127 CmrApi cmrApi;
128 string epoch_time = BESUtil::get_time(0,false);
129
130
131 map<string, unique_ptr<Collection>> collections;
132 cmrApi.get_opendap_collections(provider_id, collections);
133 if(collections.empty()){
134 stringstream msg;
135 msg << "The provider " << provider_id << " does contain any OPeNDAP enabled collections.";
136 throw BESNotFoundError(msg.str(),__FILE__,__LINE__);
137 }
138
139 auto *catalog_node = new CatalogNode(path);
140 catalog_node->set_lmt(epoch_time);
141 catalog_node->set_catalog_name(CMR_CATALOG_NAME);
142 for (const auto &collection : collections ) {
143 auto *catalog_item = new CatalogItem();
144 catalog_item->set_name(collection.second->id());
145 catalog_item->set_description(collection.second->abstract());
146 catalog_item->set_type(CatalogItem::node);
147 catalog_node->add_node(catalog_item);
148 }
149 return catalog_node;
150}
151
152bes::CatalogNode *
153CmrCatalog::get_facets_node(const std::string &path, const std::string &collection_id) const {
154 BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection_id << endl);
155 string epoch_time = BESUtil::get_time(0,false);
156 auto node = new CatalogNode(path);
157 node->set_lmt(epoch_time);
158 node->set_catalog_name(CMR_CATALOG_NAME);
159 for(const auto & d_facet : d_facets){
160 auto *catalogItem = new CatalogItem();
161 catalogItem->set_name(d_facet);
162 catalogItem->set_type(CatalogItem::node);
163 catalogItem->set_lmt(epoch_time);
164 BESDEBUG(MODULE, prolog << "Adding facet: " << d_facet << endl);
165 node->add_node(catalogItem);
166 }
167 return node;
168}
169
170bes::CatalogNode *
171CmrCatalog::get_temporal_facet_nodes(const string &path, const vector<string> &path_elements, const string &collection_id) const
172{
173 BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
174 CmrApi cmrApi;
175 string epoch_time = BESUtil::get_time(0,false);
176 auto node = new CatalogNode(path);
177 node->set_lmt(epoch_time);
178 node->set_catalog_name(CMR_CATALOG_NAME);
179
180
181 switch( path_elements.size()){
182
183 case 0: // The path ends at temporal facet, so we need the year nodes.
184 {
185 vector<string> years;
186
187 BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection_id<< endl);
188 cmrApi.get_years(collection_id, years);
189 for(const auto & year : years){
190 auto *catalogItem = new CatalogItem();
191 catalogItem->set_type(CatalogItem::node);
192 catalogItem->set_name(year);
193 catalogItem->set_is_data(false);
194 catalogItem->set_lmt(epoch_time);
195 catalogItem->set_size(0);
196 node->add_node(catalogItem);
197 }
198 }
199 break;
200
201 case 1: // The path ends at years facet, so we need the month nodes.
202 {
203 const string &year = path_elements[0];
204 string day;
205 vector<string> months;
206
207 BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection_id << " year: " << year << endl);
208 cmrApi.get_months(collection_id, year, months);
209 for(const auto & month : months){
210 auto *catalogItem = new CatalogItem();
211 catalogItem->set_type(CatalogItem::node);
212 catalogItem->set_name(month);
213 catalogItem->set_is_data(false);
214 catalogItem->set_lmt(epoch_time);
215 catalogItem->set_size(0);
216 node->add_node(catalogItem);
217 }
218 }
219 break;
220
221 case 2: // The path ends at months facet, so we need the day nodes.
222 {
223 const string &year = path_elements[0];
224 const string &month = path_elements[1];
225 vector<string> days;
226
227 BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection_id << " year: " << year << " month: " << month << endl);
228 cmrApi.get_days(collection_id, year, month, days);
229 for(const auto &day : days){
230 auto *catalogItem = new CatalogItem();
231 catalogItem->set_type(CatalogItem::node);
232 catalogItem->set_name(day);
233 catalogItem->set_is_data(false);
234 catalogItem->set_lmt(epoch_time);
235 catalogItem->set_size(0);
236 node->add_node(catalogItem);
237 }
238 }
239 break;
240
241 case 3: // The path ends at the days facet, so we need the granule nodes.
242 {
243 const string &year = path_elements[0];
244 const string &month = path_elements[1];
245 const string &day = path_elements[2];
246 BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection_id << " year: " << year << " month: " << month << " day: " << day << endl);
247 vector<unique_ptr<GranuleUMM>> granules;
248 cmrApi.get_granules_umm(collection_id, year, month, day, granules);
249 for(const auto &granule : granules){
250 node->add_leaf(granule->getCatalogItem(get_catalog_utils()));
251 }
252 }
253 break;
254
255 case 4: // Looks like they are trying to get a particular granule...
256 {
257 // http://localhost:8080/opendap/CMR/EEDTEST/C1245618475-EEDTEST/temporal/2020/01/05/GPM_3IMERGHH.06%3A3B-HHR.MS.MRG.3IMERG.20200105-S000000-E002959.0000.V06B.HDF5.dmr.html
258 // provider_id: EEDTEST
259 // collection_conept_id: C1245618475-EEDTEST
260 // temporal/year/month/day
261 // granule??: GPM_3IMERGHH.06%3A3B-HHR.MS.MRG.3IMERG.20200105-S000000-E002959.0000.V06B.HDF5
262 const string &year = path_elements[0];
263 const string &month = path_elements[1];
264 const string &day = path_elements[2];
265 const string &granule_id = path_elements[3];
266 BESDEBUG(MODULE, prolog << "Request resolved to leaf granule/dataset name, collection: " << collection_id << " year: " << year
267 << " month: " << month << " day: " << day << " granule: " << granule_id << endl);
268 auto granule = cmrApi.get_granule(collection_id,year,month,day,granule_id);
269 if(granule){
270 auto *granuleItem = new CatalogItem();
271 granuleItem->set_type(CatalogItem::leaf);
272 granuleItem->set_name(granule->getName());
273 granuleItem->set_is_data(true);
274 granuleItem->set_lmt(granule->getLastModifiedStr());
275 granuleItem->set_size(granule->getSize());
276 node->set_leaf(granuleItem);
277 }
278 else {
279 throw BESNotFoundError("No such resource: "+path,__FILE__,__LINE__);
280 }
281 }
282 break;
283
284 default:
285 {
286 throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
287 }
288 }
289
290 return node;
291}
292
306bes::CatalogNode *
307CmrCatalog::get_node(const string &ppath) const
308{
309 string path = BESUtil::normalize_path(ppath,true, false);
310 vector<string> path_elements = BESUtil::split(path);
311 BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
312
313 string epoch_time = BESUtil::get_time(0,false);
314
315 // Not sure why this is being "cleaned" but it must have been a thing - ndp 11/9/22
316 for (auto & path_element : path_elements) {
317 if (path_element == "-")
318 path_element = "";
319 }
320
321 string provider_id;
322 string collection_id;
323
324 switch(path_elements.size()){
325 case 0: {
326 // path_elements.size()==0 path: / (providers node - providers with OPeNDAP serviced collections)
327 return get_providers_node();
328 }
329 case 1: {
330 // path_elements.size()==1 path: /provider_id/ (collections node - OPeNDAP serviced collections for provider_name)
331 provider_id = path_elements[0];
332 return get_collections_node(path, provider_id);
333 }
334 //case 2: {
335 // collection_id = path_elements[1];
336 // return get_facets_node(path,collection_id );
337 //}
338 default:
339 break;
340 }
341
342 // If we are here we know the path_elements vector is not empty and that it has MORE than
343 // three members. So we set provider_id and the collection_id to the first two values.
344 provider_id = path_elements[0];
345 path_elements.erase(path_elements.begin());
346
347 collection_id = path_elements[0];
348 path_elements.erase(path_elements.begin());
349
350 //string facet = path_elements[0];
351 //path_elements.erase(path_elements.begin());
352
353
354 // Now we QC the facet name,
355 //if( facet != CMR_TEMPORAL_NAVIGATION_FACET_KEY){
356 // throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
357 //}
358
359 return get_temporal_facet_nodes(path, path_elements,collection_id);
360}
361
362#if 0
376CmrCatalog::get_node_OLD(const string &ppath) const
377{
378 string path = BESUtil::normalize_path(ppath,true, false);
379 vector<string> path_elements = BESUtil::split(path);
380 BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
381
382 string epoch_time = BESUtil::get_time(0,false);
383
384 CmrApi cmrApi;
385 bes::CatalogNode *node;
386
387 if(path_elements.empty()){
388 node = new CatalogNode("/");
389 node->set_lmt(epoch_time);
390 node->set_catalog_name(CMR_CATALOG_NAME);
391 for(const auto & d_collection : d_collections){
392 auto *collection = new CatalogItem();
393 collection->set_name(d_collection);
394 collection->set_type(CatalogItem::node);
395 node->add_node(collection);
396 }
397 }
398 else {
399 for(auto & path_element : path_elements){
400 if(path_element=="-")
401 path_element = "";
402 }
403
404 string collection = path_elements[0];
405 BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_collections.size(): " << d_collections.size() << endl);
406 bool valid_collection = false;
407 for(size_t i=0; i<d_collections.size() && !valid_collection ; i++){
408 if(collection == d_collections[i])
409 valid_collection = true;
410 }
411 if(!valid_collection){
412 throw BESNotFoundError("The CMR catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
413 }
414 BESDEBUG(MODULE, prolog << "Collection " << collection << " is valid." << endl);
415 if(path_elements.size() >1){
416 string facet = path_elements[1];
417 bool valid_facet = false;
418 for(size_t i=0; i<d_facets.size() && !valid_facet ; i++){
419 if(facet == d_facets[i])
420 valid_facet = true;
421 }
422 if(!valid_facet){
423 throw BESNotFoundError("The CMR collection '"+collection+"' does not contain a facet named '"+facet+"'",__FILE__,__LINE__);
424 }
425
426 if(facet=="temporal"){
427 BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
428 node = new CatalogNode(path);
429 node->set_lmt(epoch_time);
430 node->set_catalog_name(CMR_CATALOG_NAME);
431
432
433 switch( path_elements.size()){
434
435 case 2: // The path ends at temporal facet, so we need the year nodes.
436 {
437 vector<string> years;
438
439 BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection<< endl);
440 cmrApi.get_years(collection, years);
441 for(auto & year : years){
442 auto *catalogItem = new CatalogItem();
443 catalogItem->set_type(CatalogItem::node);
444 catalogItem->set_name(year);
445 catalogItem->set_is_data(false);
446 catalogItem->set_lmt(epoch_time);
447 catalogItem->set_size(0);
448 node->add_node(catalogItem);
449 }
450 }
451 break;
452
453 case 3: // The path ends at years facet, so we need the month nodes.
454 {
455 string year = path_elements[2];
456 //string month;
457 string day;
458 vector<string> months;
459
460 BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection << " year: " << year << endl);
461 cmrApi.get_months(collection, year, months);
462 for(auto & month : months){
463 auto *catalogItem = new CatalogItem();
464 catalogItem->set_type(CatalogItem::node);
465 catalogItem->set_name(month);
466 catalogItem->set_is_data(false);
467 catalogItem->set_lmt(epoch_time);
468 catalogItem->set_size(0);
469 node->add_node(catalogItem);
470 }
471 }
472 break;
473
474 case 4: // The path ends at months facet, so we need the day nodes.
475 {
476 string year = path_elements[2];
477 string month = path_elements[3];
478 //string day("");
479 vector<string> days;
480
481 BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection << " year: " << year << " month: " << month << endl);
482 cmrApi.get_days(collection, year, month, days);
483 for(auto & day : days){
484 auto *catalogItem = new CatalogItem();
485 catalogItem->set_type(CatalogItem::node);
486 catalogItem->set_name(day);
487 catalogItem->set_is_data(false);
488 catalogItem->set_lmt(epoch_time);
489 catalogItem->set_size(0);
490 node->add_node(catalogItem);
491 }
492 }
493 break;
494
495 case 5: // The path ends at the days facet, so we need the granule nodes.
496 {
497 string year = path_elements[2];
498 string month = path_elements[3];
499 string day = path_elements[4];
500 BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection << " year: " << year << " month: " << month << " day: " << day << endl);
501 vector<Granule *> granules;
502 cmrApi.get_granules(collection, year, month, day, granules);
503 for(auto & granule : granules){
504 node->add_leaf(granule->getCatalogItem(get_catalog_utils()));
505 }
506 }
507 break;
508
509 case 6: // Looks like they are trying to get a particular granule...
510 {
511 string year = path_elements[2];
512 string month = path_elements[3];
513 string day = path_elements[4];
514 string granule_id = path_elements[5];
515 BESDEBUG(MODULE, prolog << "Request resolved to leaf granule/dataset name, collection: " << collection << " year: " << year
516 << " month: " << month << " day: " << day << " granule: " << granule_id << endl);
517 Granule *granule = cmrApi.get_granule(collection,year,month,day,granule_id);
518 if(granule){
519 auto *granuleItem = new CatalogItem();
520 granuleItem->set_type(CatalogItem::leaf);
521 granuleItem->set_name(granule->getName());
522 granuleItem->set_is_data(true);
523 granuleItem->set_lmt(granule->getLastModifiedStr());
524 granuleItem->set_size(granule->getSize());
525 node->set_leaf(granuleItem);
526 }
527 else {
528 throw BESNotFoundError("No such resource: "+path,__FILE__,__LINE__);
529 }
530 }
531 break;
532
533 default:
534 {
535 throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
536 }
537 break;
538 }
539
540 }
541 else {
542 throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
543 }
544 }
545 else {
546 BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection << endl);
547 node = new CatalogNode(path);
548 node->set_lmt(epoch_time);
549 node->set_catalog_name(CMR_CATALOG_NAME);
550 for(const auto & d_facet : d_facets){
551 auto *catalogItem = new CatalogItem();
552 catalogItem->set_name(d_facet);
553 catalogItem->set_type(CatalogItem::node);
554 catalogItem->set_lmt(epoch_time);
555 BESDEBUG(MODULE, prolog << "Adding facet: " << d_facet << endl);
556 node->add_node(catalogItem);
557 }
558 }
559 }
560 return node;
561}
562#endif
563
564
565
573void CmrCatalog::dump(ostream &strm) const
574{
575 strm << BESIndent::LMarg << prolog << "(" << (void *) this << ")" << endl;
576 BESIndent::Indent();
577
578 strm << BESIndent::LMarg << "catalog utilities: " << endl;
579 BESIndent::Indent();
580 get_catalog_utils()->dump(strm);
581 BESIndent::UnIndent();
582 BESIndent::UnIndent();
583}
584
585} // namespace cmr
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition BESCatalog.h:112
exception thrown if internal error encountered
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition BESUtil.cc:1068
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition BESUtil.cc:949
static std::string get_time(bool use_local_time=false)
Definition BESUtil.cc:1017
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
void get_opendap_providers(std::map< std::string, std::unique_ptr< cmr::Provider > > &providers) const
Definition CmrApi.cc:803
void get_granules(const std::string &collection_name, const std::string &r_year, const std::string &r_month, const std::string &r_day, std::vector< std::unique_ptr< cmr::Granule > > &granule_objs) const
Definition CmrApi.cc:680
void get_days(const std::string &collection_name, const std::string &r_year, const std::string &r_month, std::vector< std::string > &days_result) const
Definition CmrApi.cc:518
void get_months(const std::string &collection_name, const std::string &year, std::vector< std::string > &months_result) const
Definition CmrApi.cc:449
bes::CatalogNode * get_node(const std::string &path) const override
CmrCatalog(const std::string &name=CMR_CATALOG_NAME)
A catalog based on NASA's CMR system.
Definition CmrCatalog.cc:87
void dump(std::ostream &strm) const override
dumps information about this object