bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
GranuleUMM.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of cmr_module, A C++ MODULE that can be loaded in to
4// the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5
6// Copyright (c) 2015 OPeNDAP, Inc.
7// Author: Nathan Potter <ndp@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25/*
26 * Granule.h
27 *
28 * Created on: July, 13 2018
29 * Author: ndp
30 */
31#include "config.h"
32
33#include <sstream>
34#include <algorithm>
35#include <string>
36
37#include "nlohmann/json.hpp"
38
39#include "BESDebug.h"
40#include "BESUtil.h"
41
42#include "CmrNames.h"
43#include "CmrApi.h"
44#include "CmrInternalError.h"
45#include "GranuleUMM.h"
46#include "JsonUtils.h"
47
48
49using namespace std;
50
51#define prolog std::string("GranuleUMM::").append(__func__).append("() - ")
52
53
54namespace cmr {
55
60GranuleUMM::GranuleUMM(const nlohmann::json& granule_umm_json)
61{
62 BESDEBUG(MODULE, prolog << "BEGIN" << endl << granule_umm_json.dump(2) << endl);
63 setConceptId(granule_umm_json);
64 setName(granule_umm_json);
65 setSize(granule_umm_json);
66 setDapServiceUrl(granule_umm_json);
67 setDataGranuleUrl(granule_umm_json);
68 setLastModifiedStr(granule_umm_json);
69 setDescription(granule_umm_json);
70}
71
72
73void GranuleUMM::setName(const nlohmann::json& granule_umm_json)
74{
75 JsonUtils json;
76 const auto &umm_obj = json.qc_get_object(CMR_UMM_UMM_KEY, granule_umm_json);
77 this->d_name = json.get_str_if_present(CMR_UMM_GRANULE_UR_KEY, umm_obj);
78}
79
84void GranuleUMM::setDescription(const nlohmann::json& /*granule_umm_json*/)
85{
86 // Not implemented because the .umm_json response does not contain an obvious candidate for Description.
87}
88
89void GranuleUMM::setConceptId(const nlohmann::json& granule_umm_json)
90{
91 JsonUtils json;
92 const auto &meta_obj = json.qc_get_object(CMR_UMM_META_KEY, granule_umm_json);
93
94 BESDEBUG(MODULE, prolog << "META OBJECT" << endl << json.probe_json(meta_obj) << endl);
95
96 this->d_id = json.get_str_if_present(CMR_UMM_CONCEPT_ID_KEY, meta_obj);
97}
98
99
108void GranuleUMM::setSize(const nlohmann::json& granule_umm_json)
109{
110 JsonUtils json;
111 const auto &umm_obj = json.qc_get_object(CMR_UMM_UMM_KEY, granule_umm_json);
112 const auto &data_granule_obj = json.qc_get_object(CMR_UMM_DATA_GRANULE_KEY, umm_obj);
113 BESDEBUG(MODULE, prolog << CMR_UMM_DATA_GRANULE_KEY << data_granule_obj.dump(2) << endl );
114 const auto &arch_and_info_array = json.qc_get_array(CMR_UMM_ARCHIVE_AND_DIST_INFO_KEY, data_granule_obj);
115 //
116 // At this point we just look at the first entry in the arch_and_info_array.
117 // What does more than a single entry mean? It means that using this method is not deterministic.
118 // Consider this relevant JSON fragment:
119 //
120 // "DataGranule" : {
121 // "ArchiveAndDistributionInformation" : [ {
122 // "SizeUnit" : "MB",
123 // "Size" : 29.323293685913086,
124 // "Checksum" : {
125 // "Value" : "b807626928f3176bd969664090ad4b05",
126 // "Algorithm" : "MD5"
127 // },
128 // "Name" : "saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc"
129 // }, {
130 // "SizeUnit" : "MB",
131 // "Size" : 1.0967254638671875E-4,
132 // "Checksum" : {
133 // "Value" : "f617b33fb4ace5c26b044a418d22fcbf",
134 // "Algorithm" : "MD5"
135 // },
136 // "Name" : "saildrone-gen_4-baja_2018-sd1002-20180411T180000-20180611T055959-1_minutes-v1.nc.md5"
137 // } ],
138 // "DayNightFlag" : "Unspecified",
139 // "ProductionDateTime" : "2018-08-29T21:02:49.000Z"
140 // },
141 //
142 // This fragment contains two entries in the ArchiveAndDistributionInformation array. One entry for the native
143 // netcdf file, and the other entry for its MD5 checksum. Assuming that the order is not fixed in some way
144 // We would be lucky to get the right size for the granule from the first element. Nothing in this to
145 // distinguish one entry as the correct one other than the name ending in .nc vs .nc.md5. That works for this
146 // example, but I doubt it works for hdf5 files (.h5) apand I doubt there semantic constraints on the value of
147 // Name in the ArchiveAndDistributionInformation.
148 //
149 for(const auto &entry : arch_and_info_array){
150 BESDEBUG(MODULE, prolog << CMR_UMM_ARCHIVE_AND_DIST_INFO_KEY << entry.dump(2) << endl );
151 string name = json.get_str_if_present(CMR_UMM_NAME_KEY,entry);
152 BESDEBUG(MODULE, prolog << CMR_UMM_NAME_KEY << ": " << name << endl );
153
154 // We want the granule and not its md5 hash, so we check for that.
155 // There may be other entries in the array but *shrugs* what's a person to do?
156 if(BESUtil::endsWith(name,".md5")) {
157 BESDEBUG(MODULE, prolog << "Detected MD5 hash file: " << name << " SKIPPING." << endl);
158 }
159 else {
160 d_size_orig = json.qc_double(CMR_UMM_SIZE_KEY, entry);
161 BESDEBUG(MODULE, prolog << "d_size_orig: " << d_size_orig << endl );
162
163 d_size_units_str = json.get_str_if_present(CMR_UMM_SIZE_UNIT_KEY, entry).c_str();
164 std::transform(d_size_units_str.begin(), d_size_units_str.end(),d_size_units_str.begin(), ::toupper);
165 BESDEBUG(MODULE, prolog << "d_size_units_str: " << d_size_units_str << endl );
166
167 if(d_size_units_str.empty()){
168 BESDEBUG(MODULE, prolog << "Size content is incomplete. size: " << d_size_str << " units: " << d_size_units_str << endl );
169 return;
170 }
171
172 double size;
173 size = d_size_orig;
174
175 if(d_size_units_str == "KB"){
176 size *= 1024;
177 }
178 if(d_size_units_str == "MB"){
179 size *= 1024ULL*1024ULL;
180 }
181 else if (d_size_units_str=="GB"){
182 size *= 1024ULL*1024ULL*1024ULL;
183 }
184 else if (d_size_units_str=="TB"){
185 size *= 1024ULL*1024ULL*1024ULL*1024ULL;
186 }
187 d_size = static_cast<uint64_t>(size);
188
189 BESDEBUG(MODULE, prolog << "d_size: " << d_size << " bytes" << endl );
190 break;
191 }
192 }
193}
194
195
200void GranuleUMM::setLastModifiedStr(const nlohmann::json& granule_umm_json)
201{
202 JsonUtils json;
203 const auto &umm_obj = json.qc_get_object(CMR_UMM_META_KEY, granule_umm_json);
204 this->d_last_modified_time = json.get_str_if_present(CMR_UMM_REVISION_DATE_KEY, umm_obj);
205}
206
207
211void GranuleUMM::setDataGranuleUrl(const nlohmann::json& granule_umm_json)
212{
213 JsonUtils json;
214 const auto& umm_obj = json.qc_get_object(CMR_UMM_UMM_KEY, granule_umm_json);
215 const auto& related_urls = json.qc_get_array(CMR_UMM_RELATED_URLS_KEY, umm_obj);
216 for(auto &url_obj : related_urls){
217 string url = json.get_str_if_present(CMR_UMM_URL_KEY, url_obj);
218 string type = json.get_str_if_present(CMR_UMM_TYPE_KEY, url_obj);
219 if(type == CMR_UMM_TYPE_GET_DATA_VALUE){
220 this->d_data_access_url = url;
221 return;
222 }
223 }
224 stringstream msg;
225 msg << "ERROR: Failed to locate Data Granule URL (";
226 msg << CMR_UMM_RELATED_URLS_KEY << "). json: " << endl << related_urls.dump(2) << endl;
227 BESDEBUG(MODULE, prolog << msg.str() << endl);
228}
248void GranuleUMM::setDapServiceUrl(const nlohmann::json& granule_umm_json)
249{
250 const std::string DAP2_HTML_SUFFIX(".html");
251 const std::string DAP4_HTML_SUFFIX(".dmr.html");
252
253 JsonUtils json;
254 const auto& umm_obj = json.qc_get_object(CMR_UMM_UMM_KEY, granule_umm_json);
255 const auto& related_urls = json.qc_get_array(CMR_UMM_RELATED_URLS_KEY, umm_obj);
256 for(auto &related_url_obj : related_urls){
257 string url = json.get_str_if_present(CMR_UMM_URL_KEY,related_url_obj);
258 string type = json.get_str_if_present(CMR_UMM_TYPE_KEY,related_url_obj);
259 string subtype = json.get_str_if_present(CMR_UMM_SUBTYPE_KEY,related_url_obj);
260 bool is_dap_service = ((type == CMR_UMM_TYPE_USE_SERVICE_API_VALUE || type == CMR_UMM_TYPE_GET_DATA_VALUE)
261 && subtype == CMR_UMM_SUBTYPE_KEY_OPENDAP_DATA_VALUE);
262 if(is_dap_service){
263 // This next is a hack for bad CMR records in which the URL incorrectly references the DAP2 or
264 // DAP4 Data Request form and not the unadorned Dataset URL.
265 if(BESUtil::endsWith(url, DAP2_HTML_SUFFIX)){
266 url = url.substr(0,url.length() - DAP2_HTML_SUFFIX.length());
267 }
268 else if(BESUtil::endsWith(url, DAP4_HTML_SUFFIX)){
269 url = url.substr(0,url.length() - DAP4_HTML_SUFFIX.length());
270 }
271 d_dap_service_url = url;
272 return;
273 }
274 }
275 stringstream msg;
276 msg << "ERROR: Failed to locate DAP service URL (";
277 msg << CMR_UMM_RELATED_URLS_KEY << "). json: " << endl << related_urls.dump(2) << endl;
278 BESDEBUG(MODULE, prolog << msg.str() << endl);
279}
280
287{
288 auto *item = new bes::CatalogItem();
289 item->set_type(bes::CatalogItem::leaf);
290 item->set_name(getName());
291 item->set_lmt(getLastModifiedStr());
292 item->set_size(getSize());
293 item->set_description(getDescription());
294
295 if(!getDapServiceUrl().empty()) {
296 item->set_dap_service_url(getDapServiceUrl());
297 }
298 bool is_data = d_catalog_utils->is_data(item->get_name()) || !getDapServiceUrl().empty();
299 item->set_is_data(is_data);
300
301 return item;
302}
303
304
305
306
307} //namespace cmr
bool is_data(const std::string &item) const
is there a handler that can process this
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition BESUtil.cc:837
bes::CatalogItem * getCatalogItem(const BESCatalogUtils *d_catalog_utils)
GranuleUMM(const nlohmann::json &granule_umm_json)
Definition GranuleUMM.cc:60
const nlohmann::json & qc_get_object(const std::string &key, const nlohmann::json &json_obj) const
Definition JsonUtils.cc:335