bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
history_utils.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the Hyrax data server.
4
5// Copyright (c) 2021 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#include "config.h"
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#ifdef HAVE_UNISTD_H
31#include <unistd.h>
32#endif
33
34#include <sys/stat.h>
35
36#include <fstream>
37#include <sstream> // std::stringstream
38#include <thread>
39#include <future>
40
41// rapidjson
42#include <stringbuffer.h>
43#include <writer.h>
44#include <document.h>
45
46#include <libdap/DDS.h>
47#include <libdap/DMR.h>
48#include <libdap/D4Group.h>
49#include <libdap/D4Attributes.h>
50#include <libdap/DataDDS.h>
51
52#include "BESContextManager.h"
53#include "BESDapResponseBuilder.h"
54#include "DapFunctionUtils.h"
55#include "BESDebug.h"
56#include "BESUtil.h"
57#include "TempFile.h"
58
59#include "FONcBaseType.h"
60#include "FONcTransmitter.h"
61#include "FONcTransform.h"
62
63using namespace std;
64
65#define NEW_LINE ((char)0x0a)
66#define CF_HISTORY_KEY "history"
67#define CF_HISTORY_CONTEXT "cf_history_entry"
68#define HISTORY_JSON_KEY "history_json"
69#define HISTORY_JSON_CONTEXT "history_json_entry"
70
71// Define this to keep the JSON history attribute out of the DAS and
72// drop it into the netCDF file directly.
73//
74// Look in FONcTransform.cc for *** in a comment for the locations
75// where the change could be made. For now, the current approach, which
76// is sort of convoluted, is working. jhrg 2/28/22
77#define HISTORY_JSON_DIRECT_TO_NETCDF 0
78
79#define MODULE "fonc"
80#define prolog string("history_utils::").append(__func__).append("() - ")
81
82// I added this namespace because only two of these functions are called by
83// code outside this file and its associated unit test. jhrg 2/25/22
84
85namespace fonc_history_util {
86
91string
92get_time_now() {
93 time_t raw_now;
94 // jhrg 2/2/24 struct tm *timeinfo;
95 time(&raw_now); /* get current time; same as: timer = time(NULL) */
96 const struct tm *timeinfo = localtime(&raw_now);
97
98 char time_str[128];
99 strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", timeinfo);
100 return string(time_str);
101}
102
114string create_cf_history_txt(const string &request_url) {
115 // This code will be used only when the 'cf_history_context' is not set,
116 // which should be never in an operating server. However, when we are
117 // testing, often only the besstandalone code is running and the existing
118 // baselines don't set the context, so we have this. It must do something
119 // so the tests are not hopelessly obscure and filter out junk that varies
120 // by host (e.g., the names of cached files that have been decompressed).
121 // jhrg 6/3/16
122
123 stringstream ss;
124 ss << get_time_now() << " " << "Hyrax" << " " << request_url << NEW_LINE;
125
126 BESDEBUG(MODULE, prolog << "New cf history entry: '" << ss.str() << "'" << endl);
127 return ss.str();
128}
129
144template<typename RJSON_WRITER>
145void create_json_history_obj(const string &request_url, RJSON_WRITER &writer) {
146 const string schema = "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-0.1.0.json";
147
148 writer.StartObject();
149 writer.Key("$schema");
150 writer.String(schema.c_str());
151 writer.Key("date_time");
152 writer.String(get_time_now().c_str() /*jhrg time_str*/);
153 writer.Key("program");
154 writer.String("hyrax");
155 writer.Key("version");
156 writer.String("1.16.3");
157 writer.Key("parameters");
158 writer.StartArray();
159 writer.StartObject();
160 writer.Key("request_url");
161 writer.String(request_url.c_str());
162 writer.EndObject();
163 writer.EndArray();
164 writer.EndObject();
165}
166
167// NB: This is where I stopped writing unit tests. jhrg 2/25/22
168
178static string get_cf_history_entry(const string &request_url) {
179 bool foundIt = false;
180 string cf_history_entry = BESContextManager::TheManager()->get_context(CF_HISTORY_CONTEXT, foundIt);
181 if (!foundIt) {
182 // If the cf_history_entry context was not set by the incoming command then
183 // we compute and the value of the history string here.
184 cf_history_entry = create_cf_history_txt(request_url);
185 }
186 return cf_history_entry;
187}
188
194static string get_history_json_entry(const string &request_url) {
195 bool foundIt = false;
196 string history_json_entry = BESContextManager::TheManager()->get_context(HISTORY_JSON_CONTEXT, foundIt);
197 if (!foundIt) {
198 // If the history_json_entry context was not set as a context key on BESContextManager
199 // we compute and the value of the history string here.
200 rapidjson::Document history_json_doc;
201 history_json_doc.SetObject();
202 rapidjson::StringBuffer buffer;
203 rapidjson::Writer <rapidjson::StringBuffer> writer(buffer);
204 create_json_history_obj(request_url, writer);
205 history_json_entry = buffer.GetString();
206 }
207
208 BESDEBUG(MODULE, prolog << "Using history_json_entry: " << history_json_entry << endl);
209 return history_json_entry;
210}
211
222string json_append_entry_to_array(const string &source_array_str, const string &new_entry_str) {
223 rapidjson::Document target_array;
224 target_array.SetArray();
225 rapidjson::Document::AllocatorType &allocator = target_array.GetAllocator();
226 target_array.Parse(source_array_str.c_str()); // Parse json array
227
228 rapidjson::Document entry;
229 entry.Parse(new_entry_str.c_str()); // Parse new entry
230
231 target_array.PushBack(entry, allocator);
232
233 // Stringify JSON
234 rapidjson::StringBuffer buffer;
235 rapidjson::Writer <rapidjson::StringBuffer> writer(buffer);
236 target_array.Accept(writer);
237 return buffer.GetString();
238}
239
255static void update_history_json_attr(D4Attribute *global_attribute, const string &request_url) {
256 BESDEBUG(MODULE,
257 prolog << "Updating history_json entry for global DAP4 attribute: " << global_attribute->name() << endl);
258
259 string hj_entry_str = get_history_json_entry(request_url);
260 BESDEBUG(MODULE, prolog << "hj_entry_str: " << hj_entry_str << endl);
261
262 string history_json;
263
264 D4Attribute *history_json_attr = nullptr;
265 if (global_attribute->type() == D4AttributeType::attr_container_c) {
266 history_json_attr = global_attribute->attributes()->find(HISTORY_JSON_KEY);
267 }
268 else if (global_attribute->name() == HISTORY_JSON_KEY) {
269 history_json_attr = global_attribute;
270 }
271
272 if (!history_json_attr) {
273 // If there is no source history_json attribute then we make one from scratch
274 // and add it to the global_attribute
275 BESDEBUG(MODULE,
276 prolog << "Adding history_json entry to global_attribute " << global_attribute->name() << endl);
277 history_json_attr = new D4Attribute(HISTORY_JSON_KEY, attr_str_c);
278 global_attribute->attributes()->add_attribute_nocopy(history_json_attr);
279
280 // Promote the entry to a json array, assigning it the value of the attribute
281 history_json = "[" + hj_entry_str + "]";
282 BESDEBUG(MODULE, prolog << "CREATED history_json: " << history_json << endl);
283
284 }
285 else {
286 // We found an existing history_jason attribute!
287 // We know the convention is that this should be a single valued DAP attribute
288 // We need to get the existing json document, parse it, insert the entry into
289 // the document using rapidjson, and then serialize it to a new string value that
290 // We will use to overwrite the current value in the existing history_json_attr.
291 history_json = *history_json_attr->value_begin();
292
293 // This was in the production code, but I think it was left over from early
294 // debugging. I'm going to break up the long line so it's more obvious that
295 // is the case. jhrg 2/25/22
296 // history_json = R"([{"$schema":"https:\/\/harmony.earthdata.nasa.gov\/schemas\/history\/0.1.0\/history-0.1.0.json",
297 // "date_time":"2021-06-25T13:28:48.951+0000","program":"hyrax","version":"@HyraxVersion@",
298 // "parameters":[{"request_url":"http:\/\/localhost:8080\/opendap\/hj\/coads_climatology.nc.dap.nc4?GEN1"}]}])";
299
300 BESDEBUG(MODULE, prolog << "FOUND history_json: " << history_json << endl);
301
302 // Append the entry to the existing history_json array
303 history_json = json_append_entry_to_array(history_json, hj_entry_str);
304 BESDEBUG(MODULE, prolog << "NEW history_json: " << history_json << endl);
305
306 }
307
308 // Now that we have the update history_json element, serialized to a string, we use it to
309 // replace the value of the existing D4Attribute history_json_attr
310 vector <string> attr_vals;
311 attr_vals.push_back(history_json);
312 history_json_attr->add_value_vector(attr_vals); // This replaces the value
313}
314
322static string append_cf_history_entry(string cf_history, string cf_history_entry) {
323
324 stringstream cf_hist_new;
325 if (!cf_history.empty()) {
326 cf_hist_new << cf_history;
327 if (cf_history.back() != NEW_LINE)
328 cf_hist_new << NEW_LINE;
329 }
330
331 cf_hist_new << cf_history_entry;
332 if (cf_history_entry.back() != NEW_LINE)
333 cf_hist_new << NEW_LINE;
334
335 BESDEBUG(MODULE, prolog << "Updated cf history: '" << cf_hist_new.str() << "'" << endl);
336 return cf_hist_new.str();
337}
338
356static void update_cf_history_attr(D4Attribute *global_attribute, const string &request_url) {
357 BESDEBUG(MODULE,
358 prolog << "Updating cf history entry for global DAP4 attribute: " << global_attribute->name() << endl);
359
360 string cf_hist_entry = get_cf_history_entry(request_url);
361 BESDEBUG(MODULE, prolog << "New cf history entry: " << cf_hist_entry << endl);
362
363 string cf_history;
364 D4Attribute *history_attr = nullptr;
365 if (global_attribute->type() == D4AttributeType::attr_container_c) {
366 history_attr = global_attribute->attributes()->find(CF_HISTORY_KEY);
367 }
368 else if (global_attribute->name() == CF_HISTORY_KEY) {
369 history_attr = global_attribute;
370 }
371
372 if (!history_attr) {
373 //if there is no source cf history attribute make one and add it to the global_attribute.
374 BESDEBUG(MODULE, prolog << "Adding history entry to " << global_attribute->name() << endl);
375 history_attr = new D4Attribute(CF_HISTORY_KEY, attr_str_c);
376 global_attribute->attributes()->add_attribute_nocopy(history_attr);
377 }
378 else {
379 cf_history = history_attr->value(0);
380 }
381 cf_history = append_cf_history_entry(cf_history, cf_hist_entry);
382
383 std::vector <std::string> cf_hist_vec;
384 cf_hist_vec.push_back(cf_history);
385 history_attr->add_value_vector(cf_hist_vec);
386}
387
394void update_cf_history_attr(AttrTable *global_attr_tbl, const string &request_url) {
395
396 BESDEBUG(MODULE,
397 prolog << "Updating cf history entry for global DAP2 attribute: " << global_attr_tbl->get_name() << endl);
398
399 string cf_hist_entry = get_cf_history_entry(request_url);
400 BESDEBUG(MODULE, prolog << "New cf history entry: '" << cf_hist_entry << "'" << endl);
401
402 string cf_history = global_attr_tbl->get_attr(CF_HISTORY_KEY); // returns empty string if not found
403 BESDEBUG(MODULE, prolog << "Previous cf history: '" << cf_history << "'" << endl);
404
405 cf_history = append_cf_history_entry(cf_history, cf_hist_entry);
406 BESDEBUG(MODULE, prolog << "Updated cf history: '" << cf_history << "'" << endl);
407
408 global_attr_tbl->del_attr(CF_HISTORY_KEY, -1);
409 int attr_count = global_attr_tbl->append_attr(CF_HISTORY_KEY, "string", cf_history);
410 BESDEBUG(MODULE, prolog << "Found " << attr_count << " value(s) for the cf history attribute." << endl);
411}
412
418void update_history_json_attr(AttrTable *global_attr_tbl, const string &request_url) {
419
420 BESDEBUG(MODULE, prolog << "Updating history_json entry for global DAP2 attribute: " << global_attr_tbl->get_name()
421 << endl);
422
423 string hj_entry_str = get_history_json_entry(request_url);
424 BESDEBUG(MODULE, prolog << "New history_json entry: " << hj_entry_str << endl);
425
426 string history_json = global_attr_tbl->get_attr(HISTORY_JSON_KEY);
427 BESDEBUG(MODULE, prolog << "Previous history_json: " << history_json << endl);
428
429 if (history_json.empty()) {
430 //if there is no source history_json attribute
431 BESDEBUG(MODULE,
432 prolog << "Creating new history_json entry to global attribute: " << global_attr_tbl->get_name()
433 << endl);
434 history_json = "[" + hj_entry_str + "]"; // Hack to make the entry into a json array.
435 }
436 else {
437 history_json = json_append_entry_to_array(history_json, hj_entry_str);
438 global_attr_tbl->del_attr(HISTORY_JSON_KEY, -1);
439 }
440 BESDEBUG(MODULE, prolog << "New history_json: " << history_json << endl);
441 int attr_count = global_attr_tbl->append_attr(HISTORY_JSON_KEY, "string", history_json);
442 BESDEBUG(MODULE, prolog << "Found " << attr_count << " value(s) for the history_json attribute." << endl);
443}
444
451void updateHistoryAttributes(DDS *dds, const string &ce) {
452 string request_url = dds->filename();
453 // remove path info
454 request_url = request_url.substr(request_url.find_last_of('/') + 1);
455 // remove 'uncompress' cache mangling
456 request_url = request_url.substr(request_url.find_last_of('#') + 1);
457 if (!ce.empty()) request_url += "?" + ce;
458
459 // Add the new entry to the "history" attribute
460 // Get the top level Attribute table.
461 AttrTable &globals = dds->get_attr_table();
462
463 // Since many files support "CF" conventions the history tag may already exist
464 // in the source dataset, and the code should add an entry to it if possible.
465
466 // Used to indicate that we located a toplevel AttrTable whose name ends in
467 // "_GLOBAL" and that has an existing "history" attribute.
468 bool added_history = false;
469
470 if (globals.is_global_attribute()) {
471 // Here we look for a top level AttrTable whose name ends with "_GLOBAL" which is where, by convention,
472 // data ingest handlers place global level attributes found in the source dataset.
473 auto i = globals.attr_begin();
474 auto e = globals.attr_end();
475 for (; i != e; i++) {
476 AttrType attrType = globals.get_attr_type(i);
477 string attr_name = globals.get_name(i);
478 // Test the entry...
479 if (attrType == Attr_container && BESUtil::endsWith(attr_name, "_GLOBAL")) {
480 // We are going to append to an existing history attribute if there is one
481 // Or just add a history attribute if there is not one. In a most
482 // handy API moment, append_attr() does just this.
483
484 AttrTable *global_attr_tbl = globals.get_attr_table(i);
485 update_cf_history_attr(global_attr_tbl, request_url);
486#if !HISTORY_JSON_DIRECT_TO_NETCDF
487 // if we do not plan on writing the attribute directly using the netcdf API
488 // put the JSON in as a DAP attribute. jhrg 2/28/22
489 update_history_json_attr(global_attr_tbl, request_url);
490#endif
491 added_history = true;
492 BESDEBUG(MODULE, prolog << "Added history entries to " << attr_name << endl);
493 }
494 }
495
496 // if we didn't find a "_GLOBAL" container, we add both the CF History and
497 // JSON History attributes to a new "DAP_GLOBAL" attrribute container.
498 // We use the function update...() but those make a new attribute if one
499 // does not exist and, since this is a new container, they don;t alreay exist.
500 // jhrg 2/25/22
501 if (!added_history) {
502 auto dap_global_at = globals.append_container("DAP_GLOBAL");
503 dap_global_at->set_name("DAP_GLOBAL");
504 dap_global_at->set_is_global_attribute(true);
505
506 update_cf_history_attr(dap_global_at, request_url);
507#if !HISTORY_JSON_DIRECT_TO_NETCDF
508 update_history_json_attr(dap_global_at, request_url);
509#endif
510 BESDEBUG(MODULE, prolog << "No top level AttributeTable name matched '*_GLOBAL'. "
511 "Created DAP_GLOBAL AttributeTable and added history attributes to it." << endl);
512 }
513 }
514}
515
522void updateHistoryAttributes(DMR *dmr, const string &ce) {
523 string request_url = dmr->filename();
524 // remove path info
525 request_url = request_url.substr(request_url.find_last_of('/') + 1);
526 // remove 'uncompress' cache mangling
527 request_url = request_url.substr(request_url.find_last_of('#') + 1);
528 if (!ce.empty()) request_url += "?" + ce;
529
530 bool added_cf_history = false;
531 bool added_json_history = false;
532 D4Group *root_grp = dmr->root();
533 D4Attributes *root_attrs = root_grp->attributes();
534 for (auto attrs = root_attrs->attribute_begin(); attrs != root_attrs->attribute_end(); ++attrs) {
535 string name = (*attrs)->name();
536 BESDEBUG(MODULE, prolog << "Attribute name is " << name << endl);
537 if ((*attrs)->type() == D4AttributeType::attr_container_c && BESUtil::endsWith(name, "_GLOBAL")) {
538 // Update Climate Forecast history attribute.
539 update_cf_history_attr(*attrs, request_url);
540 added_cf_history = true;
541
542 // Update NASA's history_json attribute
543#if !HISTORY_JSON_DIRECT_TO_NETCDF
544 update_history_json_attr(*attrs, request_url);
545 added_json_history = true;
546#endif
547 }
548 else if (name == CF_HISTORY_KEY) { // A top level cf history attribute
549 update_cf_history_attr(*attrs, request_url);
550 added_cf_history = true;
551 }
552#if !HISTORY_JSON_DIRECT_TO_NETCDF
553 else if (name == HISTORY_JSON_KEY) { // A top level history_json attribute
554 update_cf_history_attr(*attrs, request_url);
555 added_json_history = true;
556 }
557#endif
558 }
559 if (!added_cf_history || !added_json_history) {
560 auto *dap_global = new D4Attribute("DAP_GLOBAL", attr_container_c);
561 root_attrs->add_attribute_nocopy(dap_global);
562 // CF history attribute
563 if (!added_cf_history) {
564 update_cf_history_attr(dap_global, request_url);
565 }
566 // NASA's history_json attribute
567#if !HISTORY_JSON_DIRECT_TO_NETCDF
568 if (!added_json_history) {
569 update_history_json_attr(dap_global, request_url);
570 }
571#endif
572 }
573}
574
575} // namespace fnoc_history_util
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition BESUtil.cc:837
RAPIDJSON_DEFAULT_ALLOCATOR AllocatorType
Definition document.h:2207