bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
DapUtils.cc
1// This file is part of bes, A C++ back-end server implementation framework
2// for the OPeNDAP Data Access Protocol.
3
4// Copyright (c) 2022 OPeNDAP
5// Author: James Gallagher <jgallagher@opendap.org>
6//
7// This library is free software; you can redistribute it and/or
8// modify it under the terms of the GNU Lesser General Public
9// License as published by the Free Software Foundation; either
10// version 2.1 of the License, or (at your option) any later version.
11//
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15// Lesser General Public License for more details.
16//
17// You should have received a copy of the GNU Lesser General Public
18// License along with this library; if not, write to the Free Software
19// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
21//
22// Created by James Gallagher on 4/6/22.
23//
24
25#include "config.h"
26
27#include <iostream>
28#include <sstream>
29#include <unordered_map>
30#include <cmath>
31
32#include <libdap/DDS.h>
33#include <libdap/DMR.h>
34#include <libdap/D4Group.h>
35#include <libdap/Vector.h>
36#include <libdap/Array.h>
37#include <libdap/Constructor.h>
38#include <libdap/XMLWriter.h>
39
40#include "TheBESKeys.h"
41#include "BESContextManager.h"
42#include "BESUtil.h"
43#include "BESLog.h"
44#include "BESDebug.h"
45#include "BESStopWatch.h"
46#include "BESSyntaxUserError.h"
47#include "DapUtils.h"
48
49// Because prolog evaluates to a string it cannot be part of a constexpr, so we continue with the macro version.
50#define prolog std::string("dap_utils::").append(__func__).append("() - ")
51
52using namespace libdap;
53
54namespace dap_utils {
55
56constexpr auto BES_KEYS_MAX_RESPONSE_SIZE_KEY = "BES.MaxResponseSize.bytes";
57constexpr auto BES_KEYS_MAX_VAR_SIZE_KEY = "BES.MaxVariableSize.bytes";
58constexpr auto BES_CONTEXT_MAX_RESPONSE_SIZE_KEY = "max_response_size";
59constexpr auto BES_CONTEXT_MAX_VAR_SIZE_KEY = "max_variable_size";
60constexpr uint64_t twoGB = 2147483648;
61constexpr uint64_t fourGB = 4294967296;
62
63// We want MODULE and MODULE_VERBOSE to be in the namespace in order to isolate them from potential overlap between
64// different bes/modules
65constexpr auto MODULE = "dap_utils";
66constexpr auto MODULE_VERBOSE = "dap_utils_verbose";
67
73static void log_response_and_memory_size_helper(const std::string &caller_id, long response_size) {
74 auto mem_size = BESUtil::get_current_memory_usage(); // size in KB or 0. jhrg 4/6/22
75 if (mem_size) {
76 INFO_LOG(caller_id + "response size: " + std::to_string(response_size) + "KB"+BESLog::mark+"memory used by process: " +
77 std::to_string(mem_size) + "KB");
78 }
79 else {
80 INFO_LOG(caller_id + "response size: " + std::to_string(response_size) + "KB");
81 }
82}
83
92void log_response_and_memory_size(const std::string &caller_id, DDS *const *dds)
93{
94 auto response_size = (long)(*dds)->get_request_size_kb(true);
95 log_response_and_memory_size_helper(caller_id, response_size);
96}
97
106void log_response_and_memory_size(const std::string &caller_id, DMR &dmr)
107{
108 // The request_size_kb() method is not marked const. Fix. jhrg 4/6/22
109 auto response_size = (long)dmr.request_size_kb(true);
110 log_response_and_memory_size_helper(caller_id, response_size);
111}
112
121void log_response_and_memory_size(const std::string &caller_id, libdap::XMLWriter &dmrpp_writer)
122{
123 auto response_size = (long)dmrpp_writer.get_doc_size() / 1000;
124 log_response_and_memory_size_helper(caller_id, response_size);
125}
126
133std::string mk_model_incompatibility_message(const std::vector<std::string> &inventory){
134 stringstream msg;
135 msg << endl;
136 msg << "ERROR: Your have asked this service to utilize the DAP2 data model\n";
137 msg << "to process your request. Unfortunately the requested dataset contains\n";
138 msg << "data types that cannot be represented in DAP2.\n ";
139 msg << "\n";
140 msg << "There are " << inventory.size() << " incompatible variables and/or attributes referenced \n";
141 msg << "in your request.\n";
142 msg << "Incompatible variables: \n";
143 msg << "\n";
144 for(const auto &entry: inventory){ msg << " " << entry << "\n"; }
145 msg << "\n";
146 msg << "You may resolve these issues by asking the service to use\n";
147 msg << "the DAP4 data model instead of the DAP2 model.\n";
148 msg << "\n";
149 msg << " - NetCDF If you wish to receive your response encoded as a\n";
150 msg << " netcdf file please note that netcdf-3 has similar representational\n";
151 msg << " constraints as DAP2, while netcdf-4 does not. In order to request\n";
152 msg << " a DAP4 model nectdf-4 response, change your request URL from \n";
153 msg << " dataset_url.nc to dataset_url.dap.nc4\n";
154 msg << "\n";
155 msg << " - DAP Clients If you are using a specific DAP client like pyDAP or\n";
156 msg << " Panoply you may be able to signal the tool to use DAP4 by changing\n";
157 msg << " the protocol of the dataset_url from https:// to dap4:// \n";
158 msg << "\n";
159 msg << " - If you are using the service's Data Request Form for your dataset\n";
160 msg << " you can find the DAP4 version by changing form_url.html to form_url.dmr.html\n";
161 msg << "\n";
162 return msg.str();
163}
164
165
172void throw_for_dap4_typed_vars_or_attrs(DDS *dds, const std::string &file, unsigned int line)
173{
174 vector<string> inventory;
175 if(dds->is_dap4_projected(inventory)){
176 string msg = mk_model_incompatibility_message(inventory);
177 throw BESSyntaxUserError(msg, file, line);
178 }
179}
180
187void throw_for_dap4_typed_attrs(DAS *das, const std::string &file, unsigned int line)
188{
189 vector<string> inventory;
190 if(das->get_top_level_attributes()->has_dap4_types("/",inventory)){
191 string msg = mk_model_incompatibility_message(inventory);
192 throw BESSyntaxUserError(msg, file, line);
193 }
194}
195
196
203uint64_t count_requested_elements(const D4Dimension *d4dim){
204 uint64_t elements = 0;
205 if(d4dim->constrained()){
206 elements = (d4dim->c_stop() - d4dim->c_start());
207 if(d4dim->c_stride()){
208 elements = elements / d4dim->c_stride();
209 }
210 } else{
211 elements = d4dim->size();
212 }
213 if(!elements)
214 elements = 1;
215
216 return elements;
217}
218
225uint64_t count_requested_elements(const Array::dimension &dim){
226 uint64_t elements;
227 elements = (dim.stop - dim.start) / dim.stride;
228 if(!elements)
229 elements = 1;
230 return elements;
231}
232
237std::string get_dap_array_dims_str(libdap::Array &a){
238 stringstream my_dims;
239 for (auto dim_iter = a.dim_begin(), end_iter = a.dim_end(); dim_iter != end_iter; ++dim_iter) { stringstream ce;
240 const auto &dim = *dim_iter;
241 ce << dim.start << ":";
242 if(dim.stride != 1){
243 ce << dim.stride << ":";
244 }
245 ce << dim.stop;
246 my_dims << "[" << ce.str() << "]";
247 }
248 return my_dims.str();
249}
250
256std::string get_dap_decl(libdap::BaseType *var) {
257
258 stringstream ss;
259 if(var->is_vector_type()){
260 auto myArray = dynamic_cast<libdap::Array *>(var);
261 if(myArray) {
262 ss << myArray->prototype()->type_name() << " " << var->FQN();
263 ss << get_dap_array_dims_str(*myArray);
264 }
265 else {
266 auto myVec = dynamic_cast<libdap::Vector *>(var);
267 if(myVec){
268 ss << myVec->prototype()->type_name() << " " << var->FQN();
269 ss << "[" << myVec->length() << "]";
270 }
271 }
272 }
273 else {
274 ss << var->type_name() << var->FQN();
275 }
276 return ss.str();
277}
278
282uint64_t crsaibv_process_ctor(const libdap::Constructor *ctor,
283 const uint64_t max_var_size,
284 std::vector<std::string> &too_big );
285
296uint64_t crsaibv_process_variable(
297 BaseType *var,
298 const uint64_t max_var_size,
299 std::vector<std::string> &too_big
300){
301
302 uint64_t response_size = 0;
303
304 if(var->send_p()) {
305 if (var->is_constructor_type()) {
306 response_size += crsaibv_process_ctor(dynamic_cast<libdap::Constructor *>(var), max_var_size, too_big);
307 }
308 else {
309 // width_ll() returns the number of bytes needed to hold the data
310 uint64_t vsize = var->width_ll(true);
311 response_size += vsize;
312
313 BESDEBUG(MODULE_VERBOSE, prolog << " " << get_dap_decl(var) << "(" << vsize << " bytes)" << endl);
314 if ( (max_var_size > 0) && (vsize > max_var_size) ) {
315 string entry = get_dap_decl(var) + " (" + to_string(vsize) + " bytes)";
316 too_big.emplace_back(entry);
317 BESDEBUG(MODULE,
318 prolog << get_dap_decl(var) << "(" << vsize
319 << " bytes) is bigger than the max_var_size of "
320 << max_var_size << " bytes. too_big.size(): " << too_big.size() << endl);
321 }
322 }
323 }
324 return response_size;
325}
326
336 uint64_t crsaibv_process_ctor(const libdap::Constructor *ctor,
337 const uint64_t max_var_size,
338 std::vector<std::string> &too_big
339 ){
340 uint64_t response_size = 0;
341 if (ctor) {
342 for (auto dap_var: ctor->variables()) {
343 response_size += crsaibv_process_variable(dap_var, max_var_size, too_big);
344 }
345 }
346 else {
347 BESDEBUG(MODULE,
348 prolog << "ERROR Received a null pointer to Constructor. " <<
349 "It is likely that a dynamic_cast failed.." << endl);
350 }
351 return response_size;
352}
353
354
364uint64_t compute_response_size_and_inv_big_vars(
365 const libdap::D4Group *grp,
366 const uint64_t max_var_size,
367 std::vector<std::string> &too_big)
368{
369 BESDEBUG(MODULE_VERBOSE, prolog << "BEGIN " << grp->type_name() << " " << grp->FQN() << endl);
370
371 uint64_t response_size = 0;
372 // Process child variables.
373 for(auto dap_var:grp->variables()){
374 response_size += crsaibv_process_variable(dap_var, max_var_size, too_big);
375 }
376
377 // Process child groups.
378 for (const auto child_grp: grp->groups()) {
379 if (child_grp->send_p()) {
380 response_size += compute_response_size_and_inv_big_vars(child_grp, max_var_size, too_big);
381 }
382 else {
383 BESDEBUG(MODULE_VERBOSE, prolog << "SKIPPING: " << grp->type_name() <<
384 " " << child_grp->FQN() << " (No child selected.)" << endl);
385 }
386 }
387 BESDEBUG(MODULE_VERBOSE, prolog << "END " << grp->type_name() << " " << grp->FQN() << " ("
388 "response_size: " << response_size << ", "<<
389 "too_big_vars: " << too_big.size() << ")" << endl);
390 return response_size;
391}
392
404uint64_t compute_response_size_and_inv_big_vars(
405 libdap::DMR &dmr,
406 const uint64_t max_var_size,
407 std::vector<std::string> &too_big)
408{
409 BES_STOPWATCH_START(MODULE, prolog + "DMR");
410 return compute_response_size_and_inv_big_vars(dmr.root(), max_var_size,too_big);
411}
412
423uint64_t compute_response_size_and_inv_big_vars(
424 const libdap::DDS &dds,
425 const uint64_t max_var_size,
426 std::vector<std::string> &too_big)
427{
428 BES_STOPWATCH_START(MODULE, prolog + "DDS");
429 uint64_t response_size = 0;
430 // Process child variables.
431 for(auto dap_var:dds.variables()){
432 response_size += crsaibv_process_variable(dap_var, max_var_size, too_big);
433 }
434 return response_size;
435}
436
437
438
448void get_max_sizes_bytes(uint64_t &max_response_size_bytes, uint64_t &max_var_size_bytes, bool is_dap2)
449{
450 BES_STOPWATCH_START(MODULE, prolog + (is_dap2?"DAP2":"DAP4"));
451
452 // The BES configuration is help in TheBESKeys, so we read from there.
453 uint64_t config_max_resp_size = TheBESKeys::TheKeys()->read_uint64_key(BES_KEYS_MAX_RESPONSE_SIZE_KEY, 0);
454 BESDEBUG(MODULE, prolog << "config_max_resp_size: " << config_max_resp_size << "\n");
455 max_response_size_bytes = config_max_resp_size; // This is the default state, the command can only make it smaller
456
457 uint64_t cmd_context_max_resp_size;
458 bool found;
459 cmd_context_max_resp_size = BESContextManager::TheManager()->get_context_uint64(BES_CONTEXT_MAX_RESPONSE_SIZE_KEY, found);
460 if (!found) {
461 BESDEBUG(MODULE,
462 prolog << "Did not locate BESContext key: " << BES_CONTEXT_MAX_RESPONSE_SIZE_KEY << " SKIPPING."
463 << "\n");
464 }
465 else {
466 BESDEBUG(MODULE, prolog << "cmd_context_max_resp_size: " << cmd_context_max_resp_size << "\n");
467 // If the cmd_context_max_resp_size==0, then there's nothing to do because
468 // we prioritize the bes configuration. If the config_max_resp_size=0 it's a no-op, and if config_max_resp_size
469 // is some other value then it's not unlimited, and we're not letting the command context make these values
470 // bigger than the one in the BES configuration, only smaller.
471 if(cmd_context_max_resp_size != 0 && (cmd_context_max_resp_size < config_max_resp_size || config_max_resp_size == 0) ){
472 // If the context value is effectively less than the config value, use the context value.
473 max_response_size_bytes = cmd_context_max_resp_size;
474 }
475 }
476 BESDEBUG(MODULE, prolog << "max_response_size_bytes: " << max_response_size_bytes << "\n");
477
478 // The BES configuration is help in TheBESKeys, so we read from there.
479 uint64_t config_max_var_size = TheBESKeys::TheKeys()->read_uint64_key(BES_KEYS_MAX_VAR_SIZE_KEY, 0);
480 BESDEBUG(MODULE, prolog << "config_max_var_size: " << config_max_var_size << "\n");
481 max_var_size_bytes = config_max_var_size;
482
483 uint64_t cmd_context_max_var_size=0;
484 found = false;
485 cmd_context_max_var_size = BESContextManager::TheManager()->get_context_uint64(BES_CONTEXT_MAX_VAR_SIZE_KEY, found);
486 if (!found) {
487 max_var_size_bytes = config_max_var_size;
488 BESDEBUG(MODULE, prolog << "Did not locate BESContext key: " << BES_CONTEXT_MAX_VAR_SIZE_KEY << " SKIPPING." << "\n");
489 }
490 else if( (cmd_context_max_var_size != 0) && (cmd_context_max_var_size < config_max_var_size || config_max_var_size == 0) ){
491 // If the context value is effectively less than the config value, use the context value.
492 max_var_size_bytes = cmd_context_max_var_size;
493 }
494
495 // Enforce DAP2 limits?
496 if ( is_dap2){
497 if (max_var_size_bytes == 0 || max_var_size_bytes > twoGB) {
498 max_var_size_bytes = twoGB;
499 BESDEBUG(MODULE, prolog << "Adjusted max_var_size_bytes to DAP2 limit.\n");
500 }
501 if (max_response_size_bytes == 0 || max_response_size_bytes > fourGB) {
502 max_response_size_bytes = fourGB;
503 BESDEBUG(MODULE, prolog << "Adjusted max_response_size_bytes to DAP2 limit.\n");
504 }
505 }
506 BESDEBUG(MODULE, prolog << "max_var_size_bytes: " << max_var_size_bytes << "\n");
507}
508
515std::string too_big_error_prolog(const uint64_t max_response_size_bytes, const uint64_t max_var_size_bytes){
516 stringstream msg;
517 msg << "\nYou asked for too much! \n";
518 msg << " Maximum allowed response size: ";
519 if(max_response_size_bytes == 0){
520 msg << "unlimited\n";
521 }
522 else {
523 msg << max_response_size_bytes << " bytes.\n";
524 }
525 msg << " Maximum allowed variable size: ";
526 if(max_var_size_bytes == 0){
527 msg << "unlimited\n";
528 }
529 else {
530 msg << max_var_size_bytes << " bytes.\n";
531 }
532 return msg.str();
533}
534
548bool its_too_big(
549 stringstream &msg,
550 const uint64_t max_response_size_bytes,
551 const uint64_t response_size_bytes,
552 const uint64_t max_var_size_bytes,
553 const std::vector<string> &too_big_vars,
554 bool is_dap2=false
555 ){
556
557 BESDEBUG(MODULE, prolog << "max_response_size_bytes: " << max_response_size_bytes << "\n");
558 BESDEBUG(MODULE, prolog << "max_var_size_bytes: " << max_var_size_bytes << "\n");
559 BESDEBUG(MODULE, prolog << "response_size_bytes: " << response_size_bytes << "\n");
560 BESDEBUG(MODULE, prolog << "too_big_vars.size(): " << too_big_vars.size() << "\n");
561
562 // Is the whole thing too big? If so flag and start message.
563 bool response_too_big = (max_response_size_bytes > 0) && (response_size_bytes > max_response_size_bytes);
564 if(response_too_big){
565 msg << too_big_error_prolog(max_response_size_bytes, max_var_size_bytes);
566 msg << "The submitted DAP" << (is_dap2?"2":"4") << " request will generate a ";
567 msg << response_size_bytes << " byte\n";
568 msg << "response, which is larger than the maximum allowed response size.\n";
569 }
570
571 // Was one or more of the constrained variables too big?
572 if(!too_big_vars.empty()){
573 if(response_too_big){
574 // Is the whole thing too big? Continue message.
575// msg <<"- Consider asking for fewer variables (do you need them all?)"
576 msg << "\nIn addition to the overall response being too large for the\n";
577 msg << "service to produce, the request references the following\n";
578 msg << "variable(s) ";
579 }
580 else {
581 // Start message
582 msg << too_big_error_prolog(max_response_size_bytes, max_var_size_bytes);
583 msg << "The following is a list of variable(s), identified\n";
584 msg << "in the request, ";
585 }
586 // Add oversoze variable info.
587 msg << "that are each too large for the service\n";
588 msg << "to process.\n";
589 msg << "\nOversized Variable(s): \n";
590 for(const auto& var_entry:too_big_vars){
591 msg << " " << var_entry << "\n";
592 }
593 msg << "\n";
594 response_too_big = true;
595 }
596
597 if(response_too_big) {
598 // Finish message
599 msg << "You can resolve these issues by requesting less.\n";
600 msg << " - Consider asking for fewer variables (do you need them all?)\n";
601 msg << " - If individual variables are too large you can also subset\n";
602 msg << " them using an index based array subset expression \n";
603 msg << " to request a smaller area or to decimate the variable.\n";
604 if(is_dap2){
605 msg << "You can find detailed information about DAP2 variable sub-setting\n";
606 msg << "expressions in section 4.4 of the DAP2 User Guide located here:\n";
607 msg << "https://www.opendap.org/documentation/UserGuideComprehensive.pdf\n";
608 }
609 else {
610 // It's a DAP4 thing...
611 msg << "You can find detailed information about DAP4 variable sub-setting here:\n";
612 msg << "https://github.com/OPENDAP/dap4-specification/blob/main/";
613 msg << "01_data-model-and-serialized-rep.md#8-constraints\n";
614 }
615 return true;
616 }
617 return false;
618}
619
627void throw_if_too_big(libdap::DMR &dmr, const string &file, const unsigned int line)
628{
629 BES_STOPWATCH_START(MODULE, prolog + "DMR");
630
631 uint64_t max_var_size_bytes=0;
632 uint64_t max_response_size_bytes=0;
633 std::vector<std::string> too_big_vars;
634
635 get_max_sizes_bytes(max_response_size_bytes, max_var_size_bytes);
636 BESDEBUG(MODULE, prolog << "max_var_size_bytes: " << max_var_size_bytes << "\n");
637 BESDEBUG(MODULE, prolog << "max_response_size_bytes: " << max_response_size_bytes << "\n");
638
639 auto response_size_bytes = compute_response_size_and_inv_big_vars(dmr, max_var_size_bytes, too_big_vars);
640 BESDEBUG(MODULE, prolog << "response_size_bytes: " << response_size_bytes << "\n");
641 BESDEBUG(MODULE, prolog << "too_big_vars: " << too_big_vars.size() << "\n");
642
643 stringstream too_big_message;
644 if(its_too_big(
645 too_big_message,
646 max_response_size_bytes,
647 response_size_bytes,
648 max_var_size_bytes,
649 too_big_vars)) {
650 BESDEBUG(MODULE, prolog << "It's TOO BIG:\n" << too_big_message.str() << "\n");
651 throw BESSyntaxUserError(too_big_message.str(), file, line);
652 }
653}
654
655
663void throw_if_too_big(const libdap::DDS &dds, const std::string &file, const unsigned int line)
664{
665 BES_STOPWATCH_START(MODULE, prolog + "DDS");
666
667 uint64_t max_var_size_bytes=0;
668 uint64_t max_response_size_bytes=0;
669 std::vector<std::string> too_big_vars;
670
671 get_max_sizes_bytes(max_response_size_bytes, max_var_size_bytes, true);
672 BESDEBUG(MODULE, prolog << "max_var_size_bytes: " << max_var_size_bytes << "\n");
673 BESDEBUG(MODULE, prolog << "max_response_size_bytes: " << max_response_size_bytes << "\n");
674
675 auto response_size_bytes = compute_response_size_and_inv_big_vars(dds, max_var_size_bytes, too_big_vars);
676 BESDEBUG(MODULE, prolog << "response_size_bytes: " << response_size_bytes << "\n");
677 BESDEBUG(MODULE, prolog << "too_big_vars: " << too_big_vars.size() << "\n");
678
679 stringstream too_big_message;
680 if(its_too_big(
681 too_big_message,
682 max_response_size_bytes,
683 response_size_bytes,
684 max_var_size_bytes,
685 too_big_vars,
686 true)) {
687 BESDEBUG(MODULE, prolog << "It's TOO BIG:\n" << too_big_message.str() << "\n");
688 throw BESSyntaxUserError(too_big_message.str(), file, line);
689 }
690}
691
692
693
694} // namespace dap_utils
static long get_current_memory_usage() noexcept
Get the Resident Set Size in KB.
Definition BESUtil.cc:89
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
static uint64_t read_uint64_key(const std::string &key, uint64_t default_value)
Read an integer-valued key from the bes.conf file.