25#include <unordered_set>
33#include <libdap/BaseType.h>
34#include <libdap/Array.h>
35#include <libdap/Type.h>
36#include <libdap/D4Dimensions.h>
37#include <libdap/D4Group.h>
38#include <libdap/D4BaseTypeFactory.h>
39#include <libdap/D4Enum.h>
40#include <libdap/D4EnumDefs.h>
41#include <libdap/D4Attributes.h>
42#include <libdap/D4Maps.h>
43#include <libdap/DMR.h>
44#include <libdap/util.h>
46#include "DmrppNames.h"
48#define PUGIXML_NO_XPATH
49#define PUGIXML_HEADER_ONLY
56#include "DmrppCommon.h"
57#include "DmrppArray.h"
58#include "DmrppStructure.h"
62#include "DmrppD4Group.h"
64#include "DmrppRequestHandler.h"
65#include "DmrppChunkOdometer.h"
66#include "TheBESKeys.h"
81#define TREAT_NAMESPACES_AS_LITERALS 1
87#define USE_CACHED_XML_NODE 1
89#define SUPPORT_FILL_VALUE_CHUNKS 1
91#define prolog std::string("DMZ::").append(__func__).append("() - ")
96using shape = std::vector<unsigned long long>;
99constexpr static const auto UNSUPPORTED_STRING =
"unsupported-string";
100constexpr static const auto UNSUPPORTED_ARRAY =
"unsupported-array";
101constexpr static const auto UNSUPPORTED_COMPOUND =
"unsupported-compound";
103constexpr static const auto UNSUPPORTED_VARIABLE_LENGTH_STRING =
"unsupported-variable-length-string";
105constexpr static const auto ELIDE_UNSUPPORTED_KEY =
"DMRPP.Elide.Unsupported";
107bool DMZ::d_elide_unsupported =
true;
111const std::set<std::string> DMZ::variable_elements{
"Byte",
"Int8",
"Int16",
"Int32",
"Int64",
"UInt8",
"UInt16",
"UInt32",
112 "UInt64",
"Float32",
"Float64",
"String",
"Structure",
"Sequence",
119static inline bool is_eq(
const char *value,
const char *key)
121#if TREAT_NAMESPACES_AS_LITERALS
122 return strcmp(value, key) == 0;
124 if (strcmp(value, key) == 0) {
128 const char* colon = strchr(value,
':');
129 return colon && strcmp(colon + 1, key) == 0;
135static inline bool has_dim_nodes(
const xml_node &var_node)
137 return var_node.child(
"Dim");
141static inline bool member_of(
const set<string> &elements_set,
const string &element_name)
143 return elements_set.find(element_name) != elements_set.end();
151 throw BESInternalError(
string(
"Expected a BaseType that was also a DmrppCommon instance (")
152 .append((btp) ? btp->name() :
"unknown").append(
")."), __FILE__, __LINE__);
161void DMZ::load_config_from_keys()
176 load_config_from_keys();
187 std::ifstream stream(file_name);
194 pugi::xml_parse_result result = d_xml_doc.load(stream, pugi::parse_default | pugi::parse_ws_pcdata_single);
197 throw BESInternalError(
string(
"DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
199 if (!d_xml_doc.document_element())
211bool flagged_as_unsupported_type(xml_node var_node,
string &unsupported_flag) {
212 if (var_node ==
nullptr) {
213 throw BESInternalError(prolog +
"Received null valued xml_node in the DMR++ XML document.", __FILE__, __LINE__);
217 bool is_unsupported_type =
false;
220 auto chunks = var_node.child(
"dmrpp:chunks");
223 return is_unsupported_type;
226 xml_attribute fillValue_attr = chunks.attribute(
"fillValue");
227 if(!fillValue_attr) {
229 return is_unsupported_type;
233 if(is_eq(fillValue_attr.value(), UNSUPPORTED_STRING)){
242 is_unsupported_type =
true;
244 auto dim_node = var_node.child(
"Dim");
248 fillValue_attr.set_value(
"");
249 is_unsupported_type =
false;
253 auto flsa_node = var_node.child(
"dmrpp:FixedLengthStringArray");
257 fillValue_attr.set_value(
"");
258 is_unsupported_type =
false;
262 else if(is_eq(fillValue_attr.value(),UNSUPPORTED_VARIABLE_LENGTH_STRING)) {
263 unsupported_flag=fillValue_attr.value();
264 is_unsupported_type =
true;
266 else if(is_eq(fillValue_attr.value(),UNSUPPORTED_ARRAY)){
267 unsupported_flag=fillValue_attr.value();
268 is_unsupported_type =
true;
270 else if(is_eq(fillValue_attr.value(),UNSUPPORTED_COMPOUND)){
271 unsupported_flag=fillValue_attr.value();
272 is_unsupported_type =
true;
275 return is_unsupported_type;
292 pugi::xml_parse_result result = d_xml_doc.load_string(source.c_str());
295 throw BESInternalError(
string(
"DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
297 if (!d_xml_doc.document_element())
310void DMZ::process_dataset(DMR *dmr,
const xml_node &xml_root)
313 int required_attrs_found = 0;
315 bool href_trusted =
false;
316 string dmrpp_version;
317 for (xml_attribute attr = xml_root.first_attribute(); attr; attr = attr.next_attribute()) {
318 if (is_eq(attr.name(),
"name")) {
319 ++required_attrs_found;
320 dmr->set_name(attr.value());
322 else if (is_eq(attr.name(),
"dapVersion")) {
323 dmr->set_dap_version(attr.value());
325 else if (is_eq(attr.name(),
"dmrVersion")) {
326 dmr->set_dmr_version(attr.value());
328 else if (is_eq(attr.name(),
"base")) {
329 dmr->set_request_xml_base(attr.value());
330 BESDEBUG(PARSER, prolog <<
"Dataset xml:base is set to '" << dmr->request_xml_base() <<
"'" << endl);
333 else if (is_eq(attr.name(),
"xmlns")) {
334 dmr->set_namespace(attr.value());
338 else if (is_eq(attr.name(),
"dmrpp:href")) {
339 href_attr = attr.value();
341 else if (is_eq(attr.name(),
"dmrpp:trust")) {
342 href_trusted = is_eq(attr.value(),
"true");
344 else if (is_eq(attr.name(),
"dmrpp:version")) {
345 dmrpp_version = attr.value();
350 if (dmrpp_version.empty()) {
351 DmrppRequestHandler::d_emulate_original_filter_order_behavior =
true;
354 auto dmrpp =
dynamic_cast<DMRpp*
>(dmr);
356 dmrpp->set_version(dmrpp_version);
360 if (required_attrs_found != 1)
361 throw BESInternalError(
"DMR++ XML dataset element missing one or more required attributes.", __FILE__, __LINE__);
363 if (href_attr.empty())
364 throw BESInternalError(
"DMR++ XML dataset element dmrpp:href is missing. ", __FILE__, __LINE__);
366 d_dataset_elem_href.reset(
new http::url(href_attr, href_trusted));
374void DMZ::process_dimension(D4Group *grp,
const xml_node &dimension_node)
378 for (xml_attribute attr = dimension_node.first_attribute(); attr; attr = attr.next_attribute()) {
379 if (is_eq(attr.name(),
"name")) {
380 name_value = attr.value();
382 else if (is_eq(attr.name(),
"size")) {
383 size_value = attr.value();
387 if (name_value.empty() || size_value.empty())
388 throw BESInternalError(
"The required attribute 'name' or 'size' was missing from a Dimension element.", __FILE__, __LINE__);
392 auto *dimension =
new D4Dimension();
393 dimension->set_name(name_value);
394 dimension->set_size(size_value);
395 grp->dims()->add_dim_nocopy(dimension);
398 throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
409void DMZ::process_dim(DMR *dmr, D4Group *grp, Array *array,
const xml_node &dim_node)
413 for (xml_attribute attr = dim_node.first_attribute(); attr; attr = attr.next_attribute()) {
414 if (is_eq(attr.name(),
"name")) {
415 name_value = attr.value();
417 else if (is_eq(attr.name(),
"size")) {
418 size_value = attr.value();
422 if (name_value.empty() && size_value.empty())
423 throw BESInternalError(
"Either 'size' or 'name' must be used in a Dim element.", __FILE__, __LINE__);
424 if (!name_value.empty() && !size_value.empty())
425 throw BESInternalError(
"Only one of 'size' and 'name' are allowed in a Dim element, but both were used.", __FILE__, __LINE__);
427 if (!size_value.empty()) {
428 BESDEBUG(PARSER, prolog <<
"Processing nameless Dim of size: " << stoll(size_value) << endl);
429 array->append_dim_ll(stoll(size_value));
431 else if (!name_value.empty()) {
432 BESDEBUG(PARSER, prolog <<
"Processing Dim with named Dimension reference: " << name_value << endl);
435 if (name_value[0] ==
'/')
436 dim = dmr->root()->find_dim(name_value);
439 dim = grp->find_dim(name_value);
442 throw BESInternalError(
"The dimension '" + name_value +
"' was not found while parsing the variable '" + array->name() +
"'.",__FILE__,__LINE__);
444 array->append_dim(dim);
448void DMZ::process_map(DMR *dmr, D4Group *grp, Array *array,
const xml_node &map_node)
452 for (xml_attribute attr = map_node.first_attribute(); attr; attr = attr.next_attribute()) {
453 if (is_eq(attr.name(),
"name")) {
454 name_value = attr.value();
459 if (name_value[0] !=
'/')
460 name_value = grp->FQN() + name_value;
463 Array *map_source = dmr->root()->find_map_source(name_value);
477 array->maps()->add_map(
new D4Map(name_value, map_source));
494void DMZ::process_variable(DMR *dmr, D4Group *group, Constructor *parent,
const xml_node &var_node)
497 throw BESInternalError(
498 prolog +
"Received a null valued Group pointer!", __FILE__, __LINE__);
501 string unsupported_flag;
502 if(d_elide_unsupported && flagged_as_unsupported_type(var_node, unsupported_flag)){
505 auto var_name = var_node.attribute(
"name");
506 auto var_type = var_node.name();
507 INFO_LOG(prolog +
"Unsupported Type Encountered: " + var_type +
" " + var_name.value() +
"; flag: '" + unsupported_flag +
"'\n");
513 Type t = get_type(var_node.name());
515 if(t == dods_group_c){
516 throw BESInternalError(
517 prolog +
"ERROR - The variable node to process is a Group type! "
518 "This is handled elsewhere, not here. Parser State Issue!!", __FILE__, __LINE__);
522 if (has_dim_nodes(var_node)) {
524 btp = add_array_variable(dmr, group, parent, t, var_node);
525 if (t == dods_structure_c || t == dods_sequence_c) {
526 if(btp->type() != dods_array_c || btp->var()->type() != t){
527 throw BESInternalError(
528 prolog +
"Failed to create an array variable for " + var_node.name(), __FILE__, __LINE__);
531 parent =
dynamic_cast<Constructor*
>(btp->var());
533 throw BESInternalError(
534 prolog +
"Failed to cast " + btp->var()->type_name() +
" " + btp->name() +
535 " to an instance of Constructor." , __FILE__, __LINE__);
537 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
538 if (member_of(variable_elements, child.name()))
539 process_variable(dmr, group, parent, child);
545 btp = add_scalar_variable(dmr, group, parent, t, var_node);
546 if (t == dods_structure_c || t == dods_sequence_c) {
547 if(btp->type() != t){
548 throw BESInternalError(
549 prolog +
"Failed to create a scalar variable for " + var_node.name(), __FILE__, __LINE__);
551 parent =
dynamic_cast<Constructor*
>(btp);
553 throw BESInternalError(
554 prolog +
"Failed to cast " + btp->var()->type_name() +
" " + btp->name() +
555 " to an instance of Constructor." , __FILE__, __LINE__);
557 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
558 if (member_of(variable_elements, child.name()))
559 process_variable(dmr, group, parent, child);
564 dc(btp)->set_xml_node(var_node);
574BaseType *DMZ::build_variable(DMR *dmr, D4Group *group,
Type t,
const xml_node &var_node)
577 throw BESInternalError(prolog +
"ERROR - Received a DMR without a class factory!", __FILE__, __LINE__);
582 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
583 if (is_eq(attr.name(),
"name")) {
584 name_value = attr.value();
586 if (is_eq(attr.name(),
"enum")) {
587 enum_value = attr.value();
591 if (name_value.empty())
592 throw BESInternalError(
"The variable 'name' attribute was missing.", __FILE__, __LINE__);
594 BaseType *btp = dmr->factory()->NewVariable(t, name_value);
596 throw BESInternalError(
"Could not instantiate the variable ' "+ name_value +
"'.", __FILE__, __LINE__);
598 btp->set_is_dap4(
true);
601 if (t == dods_enum_c) {
602 if (enum_value.empty())
603 throw BESInternalError(
"The variable ' " + name_value +
"' lacks an 'enum' attribute.", __FILE__, __LINE__);
606 if (enum_value[0] ==
'/')
607 enum_def = dmr->root()->find_enum_def(enum_value);
609 enum_def = group->find_enum_def(enum_value);
612 throw BESInternalError(
"Could not find the Enumeration definition '" + enum_value +
"'.", __FILE__, __LINE__);
614 dynamic_cast<D4Enum&
>(*btp).set_enumeration(enum_def);
630BaseType *DMZ::add_scalar_variable(DMR *dmr, D4Group *group, Constructor *parent,
Type t,
const xml_node &var_node)
633 throw BESInternalError(prolog +
"ERROR - Received a null valued Group pointer!", __FILE__, __LINE__);
636 BaseType *btp = build_variable(dmr, group, t, var_node);
641 parent->add_var_nocopy(btp);
643 group->add_var_nocopy(btp);
662BaseType *DMZ::add_array_variable(DMR *dmr, D4Group *group, Constructor *parent,
Type t,
const xml_node &var_node)
665 throw BESInternalError(prolog +
"ERROR - Received a null valued Group pointer!", __FILE__, __LINE__);
668 BaseType *btp = build_variable(dmr, group, t, var_node);
671 auto *array =
static_cast<DmrppArray*
>(dmr->factory()->NewVariable(dods_array_c, btp->name()));
672 array->set_is_dap4(
true);
673 array->add_var_nocopy(btp);
679 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
680 if (is_eq(child.name(),
"Dim")) {
681 process_dim(dmr, group, array, child);
683 else if (is_eq(child.name(),
"Map")) {
684 process_map(dmr, group, array, child);
686 else if (is_eq(child.name(), DMRPP_FIXED_LENGTH_STRING_ARRAY_ELEMENT)) {
687 BESDEBUG(PARSER, prolog <<
"Variable has been marked with a " << DMRPP_FIXED_LENGTH_STRING_ARRAY_ELEMENT << endl);
689 array->set_is_flsa(
true);
690 for (xml_attribute attr = child.first_attribute(); attr; attr = attr.next_attribute()) {
691 if (is_eq(attr.name(), DMRPP_FIXED_LENGTH_STRING_LENGTH_ATTR)) {
692 auto length = array->set_fixed_string_length(attr.value());
693 BESDEBUG(PARSER, prolog <<
"Fixed length string array string length: " << length << endl);
695 else if (is_eq(attr.name(), DMRPP_FIXED_LENGTH_STRING_PAD_ATTR)) {
696 string_pad_type pad = array->set_fixed_length_string_pad_type(attr.value());
697 BESDEBUG(PARSER, prolog <<
"Fixed length string array padding scheme: " << pad <<
" (" <<
698 array->get_fixed_length_string_pad_str() <<
")" << endl);
702 else if(is_eq(child.name(), DMRPP_VLSA_ELEMENT)){
703 BESDEBUG(PARSER, prolog <<
"Variable has been marked with a " << DMRPP_VLSA_ELEMENT << endl);
704 array->set_is_vlsa(
true);
709 parent->add_var_nocopy(array);
711 group->add_var_nocopy(array);
724void DMZ::process_group(DMR *dmr, D4Group *parent,
const xml_node &var_node)
727 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
728 if (is_eq(attr.name(),
"name")) {
729 name_value = attr.value();
733 if (name_value.empty())
734 throw BESInternalError(
"The required attribute 'name' was missing from a Group element.", __FILE__, __LINE__);
736 BaseType *btp = dmr->factory()->NewVariable(dods_group_c, name_value);
738 throw BESInternalError(
"Could not instantiate the Group '" + name_value +
"'.", __FILE__, __LINE__);
740 auto new_group =
dynamic_cast<DmrppD4Group*
>(btp);
744 new_group->set_is_dap4(
true);
747 new_group->set_parent(parent);
748 parent->add_group_nocopy(new_group);
751 new_group->set_xml_node(var_node);
755 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
756 if (is_eq(child.name(),
"Dimension")) {
757 process_dimension(new_group, child);
759 else if (is_eq(child.name(),
"Group")) {
760 process_group(dmr, new_group, child);
762 else if (member_of(variable_elements, child.name())) {
763 process_variable(dmr, new_group,
nullptr, child);
775 auto xml_root_node = d_xml_doc.first_child();
777 process_dataset(dmr, xml_root_node);
779 auto root_group = dmr->root();
783 throw BESInternalError(
"Expected the root group to also be an instance of DmrppD4Group.", __FILE__, __LINE__);
785 dg->set_xml_node(xml_root_node);
787 for (
auto child = xml_root_node.first_child(); child; child = child.next_sibling()) {
788 if (is_eq(child.name(),
"Dimension")) {
789 process_dimension(dg, child);
791 else if (is_eq(child.name(),
"Group")) {
792 process_group(dmr, dg, child);
795 else if (member_of(variable_elements, child.name())) {
796 process_variable(dmr, dg,
nullptr, child);
805bool DMZ::set_up_all_direct_io_flags_phase_1(DMR *dmr) {
807 if (d_xml_doc ==
nullptr){
808 throw BESInternalError(prolog +
"Received a null DMR pointer.", __FILE__, __LINE__);
811 bool dio_flag_value = set_up_direct_io_flag_phase_1(dmr->root());
813 dmr->set_global_dio_flag(dio_flag_value);
814 return dio_flag_value;
818bool DMZ::set_up_direct_io_flag_phase_1(D4Group *group) {
820 bool ret_value =
false;
821 for (
auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
822 BESDEBUG(
"dmrpp",
"Inside set_up_direct_io_flag: var name is "<<(*i)->name()<<endl);
823 if ((*i)->type() == dods_array_c) {
824 if (
true == set_up_direct_io_flag_phase_1(*i)) {
831 if (ret_value ==
false) {
832 for (
auto gi = group->grp_begin(), ge = group->grp_end(); gi != ge; ++gi) {
833 if (
true == set_up_direct_io_flag_phase_1(*gi)) {
843bool DMZ::set_up_direct_io_flag_phase_1(BaseType *btp) {
846 xml_node var_node = get_variable_xml_node(btp);
847 if (var_node ==
nullptr)
848 throw BESInternalError(
"Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
850 auto chunks = var_node.child(
"dmrpp:chunks");
854 bool ret_value =
false;
855 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
856 if (is_eq(attr.name(),
"deflateLevel")) {
864void DMZ::set_up_all_direct_io_flags_phase_2(DMR *dmr) {
866 if (d_xml_doc ==
nullptr){
867 throw BESInternalError(prolog +
"Received a null DMR pointer.", __FILE__, __LINE__);
870 set_up_direct_io_flag_phase_2(dmr->root());
874void DMZ::set_up_direct_io_flag_phase_2(D4Group *group) {
876 for (
auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
877 if ((*i)->type() == dods_array_c)
878 set_up_direct_io_flag_phase_2((*i));
881 for (
auto gi = group->grp_begin(), ge = group->grp_end(); gi != ge; ++gi)
882 set_up_direct_io_flag_phase_2((*gi));
886void DMZ::set_up_direct_io_flag_phase_2(BaseType *btp) {
888 bool is_integer_float =
false;
889 Array *t_a =
nullptr;
891 Type t = btp->type();
892 if (t == dods_array_c) {
893 t_a=
dynamic_cast<Array *
>(btp);
894 Type t_var = t_a->var()->type();
895 if (libdap::is_simple_type(t_var) && t_var != dods_str_c && t_var != dods_url_c && t_var!= dods_enum_c && t_var!=dods_opaque_c)
896 is_integer_float =
true;
900 if (is_integer_float ==
false)
905 xml_node var_node = get_variable_xml_node(btp);
906 if (var_node ==
nullptr)
907 throw BESInternalError(
"Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
909 auto chunks = var_node.child(
"dmrpp:chunks");
916 bool has_deflate_filter =
false;
918 vector<unsigned int>deflate_levels;
922 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
923 if (!has_deflate_filter && is_eq(attr.name(),
"compressionType")) {
924 filter = attr.value();
925 if (filter.find(
"deflate") == string::npos)
928 has_deflate_filter =
true;
930 else if (has_deflate_filter && deflate_levels.empty()) {
932 if (is_eq(attr.name(),
"deflateLevel")) {
934 string def_lev_str = attr.value();
937 vector<string> def_lev_str_vec =
BESUtil::split(def_lev_str,
' ' );
938 for (
const auto &def_lev:def_lev_str_vec)
939 deflate_levels.push_back(stoul(def_lev));
943 else if (is_eq(attr.name(),
"byteOrder")) {
944 string endian_str = attr.value();
945 if (endian_str==
"LE")
949 else if (is_eq(attr.name(),
"DIO") && is_eq(attr.value(),
"off")) {
951 BESDEBUG(PARSER, prolog <<
"direct IO is disabled : the variable name is: " <<btp->name() << endl);
956 if (!has_deflate_filter || (deflate_levels.empty()))
964 if (dc(btp)->is_disable_dio())
968 if (!is_le && is_integer_type(t_a->var()->type()))
973 vector<unsigned long long>chunk_dim_sizes;
974 for (
auto child = chunks.child(
"dmrpp:chunkDimensionSizes"); child; child = child.next_sibling()) {
975 if (is_eq(child.name(),
"dmrpp:chunkDimensionSizes")) {
976 string chunk_sizes_str = child.child_value();
977 vector<string> chunk_sizes_str_vec =
BESUtil::split(chunk_sizes_str,
' ' );
978 for (
const auto &chunk_size:chunk_sizes_str_vec)
979 chunk_dim_sizes.push_back(stoull(chunk_size));
986 if (chunk_dim_sizes.empty())
990 size_t num_chunks_children = 0;
991 for (
auto child = chunks.first_child(); child; child = child.next_sibling())
992 num_chunks_children++;
995 if (num_chunks_children == 1)
1002 vector <unsigned long long>dim_sizes;
1003 Array::Dim_iter p = t_a->dim_begin();
1004 while (p != t_a->dim_end()) {
1005 dim_sizes.push_back((
unsigned long long)(t_a->dimension_size_ll(p)));
1009 bool chunk_less_dim =
true;
1010 if (chunk_dim_sizes.size() == dim_sizes.size()) {
1011 for (
unsigned int i = 0; i<dim_sizes.size(); i++) {
1012 if (chunk_dim_sizes[i] > dim_sizes[i]) {
1013 chunk_less_dim =
false;
1019 chunk_less_dim =
false;
1021 if (!chunk_less_dim)
1027 size_t num_logical_chunks = 1;
1028 for (
unsigned int i = 0; i<dim_sizes.size(); i++)
1029 num_logical_chunks *=(
size_t)ceil((
float)dim_sizes[i] / (
float)chunk_dim_sizes[i]);
1030 if (num_logical_chunks != (num_chunks_children-1))
1036 BESDEBUG(PARSER, prolog <<
"Can do direct IO: the variable name is: " <<btp->name() << endl);
1040 Array::var_storage_info dmrpp_vs_info;
1043 dmrpp_vs_info.filter = filter;
1046 for (
const auto &def_lev:deflate_levels)
1047 dmrpp_vs_info.deflate_levels.push_back(def_lev);
1050 for (
const auto &chunk_dim:chunk_dim_sizes)
1051 dmrpp_vs_info.chunk_dims.push_back(chunk_dim);
1053 t_a->set_var_storage_info(dmrpp_vs_info);
1054 t_a->set_dio_flag();
1068void DMZ::process_attribute(D4Attributes *attributes,
const xml_node &dap_attr_node)
1072 for (xml_attribute attr = dap_attr_node.first_attribute(); attr; attr = attr.next_attribute()) {
1073 if (is_eq(attr.name(),
"name")) {
1074 name_value = attr.value();
1076 if (is_eq(attr.name(),
"type")) {
1077 type_value = attr.value();
1081 if (name_value.empty() || type_value.empty())
1082 throw BESInternalError(
"The required attribute 'name' or 'type' was missing from an Attribute element.", __FILE__, __LINE__);
1084 if (type_value ==
"Container") {
1086 auto *dap_attr_cont =
new D4Attribute(name_value, attr_container_c);
1087 attributes->add_attribute_nocopy(dap_attr_cont);
1092 if (dap_attr_node.first_child()) {
1093 for (
auto attr_node: dap_attr_node.children(
"Attribute")) {
1094 process_attribute(dap_attr_cont->attributes(), attr_node);
1098 else if (type_value ==
"OtherXML") {
1103 auto *attribute =
new D4Attribute(name_value, StringToD4AttributeType(type_value));
1104 attributes->add_attribute_nocopy(attribute);
1106 for (
auto value_elem = dap_attr_node.first_child(); value_elem; value_elem = value_elem.next_sibling()) {
1107 if (is_eq(value_elem.name(),
"Value")) {
1108 attribute->add_value(value_elem.child_value());
1131void DMZ::build_basetype_chain(BaseType *btp, stack<BaseType*> &bt)
1133 auto parent = btp->get_parent();
1137 if (parent && !(parent->type() == dods_group_c && parent->get_parent() ==
nullptr))
1138 build_basetype_chain(parent, bt);
1141xml_node DMZ::get_variable_xml_node_helper(
const xml_node &, stack<BaseType*> &)
1143#if !USE_CACHED_XML_NODE
1150 if (bt.top()->type() == dods_array_c && bt.top()->var()->is_constructor_type())
1156 string type_name = bt.top()->type() == dods_array_c ? bt.top()->var()->type_name(): bt.top()->type_name();
1157 string var_name = bt.top()->name();
1161 for (
auto node = parent_node.child(type_name.c_str()); node; node = node.next_sibling()) {
1162 for (xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
1163 if (is_eq(attr.name(),
"name") && is_eq(attr.value(), var_name.c_str())) {
1168 return get_variable_xml_node_helper(node, bt);
1185xml_node DMZ::get_variable_xml_node(BaseType *btp)
1187#if USE_CACHED_XML_NODE
1188 auto node = dc(btp)->get_xml_node();
1189 if (node ==
nullptr)
1190 throw BESInternalError(
string(
"The xml_node for '").append(btp->name()).append(
"' was not recorded."), __FILE__, __LINE__);
1197 stack<BaseType*> bt;
1198 build_basetype_chain(btp, bt);
1200 xml_node dataset = d_xml_doc.first_child();
1201 if (!dataset || !is_eq(dataset.name(),
"Dataset"))
1202 throw BESInternalError(
"No DMR++ has been parsed.", __FILE__, __LINE__);
1204 auto node = get_variable_xml_node_helper(dataset, bt);
1227DMZ::load_attributes(BaseType *btp)
1229 if (dc(btp)->get_attributes_loaded())
1232 load_attributes(btp, get_variable_xml_node(btp));
1235 dc(btp)->set_attributes_loaded(
true);
1237 switch (btp->type()) {
1244 case dods_array_c: {
1245 dc(btp->var())->set_attributes_loaded(
true);
1252 case dods_structure_c:
1253 case dods_sequence_c:
1255 auto *c =
dynamic_cast<Constructor*
>(btp);
1257 for (
auto i = c->var_begin(), e = c->var_end(); i != e; i++) {
1258 if ((*i)->type() == dods_array_c)
1259 dc((*i)->var())->set_attributes_loaded(
true);
1261 dc(*i)->set_attributes_loaded(
true);
1278DMZ::load_attributes(BaseType *btp, xml_node var_node)
const
1280 if (dc(btp)->get_attributes_loaded())
1288 auto attributes = btp->BaseType::attributes();
1289 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
1290 if (is_eq(child.name(),
"Attribute")) {
1291 process_attribute(attributes, child);
1295 dc(btp)->set_attributes_loaded(
true);
1303DMZ::load_attributes(Constructor *constructor)
1305 load_attributes(constructor, get_variable_xml_node(constructor));
1306 for (
auto i = constructor->var_begin(), e = constructor->var_end(); i != e; ++i) {
1308 if((*i)->type() == dods_group_c){
1309 throw BESInternalError(
1310 prolog +
"Found a Group as a member of a " + constructor->type_name() +
" data type. " +
1311 "This violates the DAP4 data model and cannot be processed!", __FILE__, __LINE__);
1313 load_attributes(*i);
1318DMZ::load_attributes(D4Group *group) {
1320 if (group->get_parent() ==
nullptr) {
1321 xml_node dataset = d_xml_doc.child(
"Dataset");
1323 throw BESInternalError(
"Could not find the 'Dataset' element in the DMR++ XML document.", __FILE__, __LINE__);
1324 load_attributes(group, dataset);
1327 load_attributes(group, get_variable_xml_node(group));
1330 for (
auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
1335 if((*i)->type() == dods_group_c){
1336 throw BESInternalError(
1337 prolog +
"Found a Group instance in the variables collection for Group " + group->name() +
". " +
1338 "This violates the DAP4 data model and cannot be processed!", __FILE__, __LINE__);
1340 load_attributes(*i);
1343 for (
auto i = group->grp_begin(), e = group->grp_end(); i != e; ++i) {
1344 load_attributes(*i);
1348void DMZ::load_all_attributes(libdap::DMR *dmr)
1350 if(d_xml_doc ==
nullptr){
1351 throw BESInternalError(prolog +
"Received a null DMR pointer.", __FILE__, __LINE__);
1353 load_attributes(dmr->root());
1373DMZ::process_compact(BaseType *btp,
const xml_node &compact)
1378 auto char_data = compact.child_value();
1380 throw BESInternalError(
"The dmrpp::compact is missing data values.",__FILE__,__LINE__);
1382 std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
1385 if (btp->type()== dods_structure_c || btp->type() == dods_sequence_c || btp->type() == dods_grid_c)
1386 throw BESInternalError(
"The dmrpp::compact element must be the child of an array or a scalar variable", __FILE__, __LINE__);
1389 Type dtype =btp->type();
1390 bool is_array_subset =
false;
1391 if (dtype == dods_array_c) {
1392 auto *da =
dynamic_cast<DmrppArray *
>(btp);
1393 if (da->is_projected())
1394 is_array_subset =
true;
1396 dtype = btp->var()->type();
1399 if (is_array_subset) {
1400 auto *da =
dynamic_cast<DmrppArray *
>(btp);
1401 process_compact_subset(da,decoded);
1407 throw BESInternalError(
"DMR++ document fail: An Array may not be the template for an Array.", __FILE__, __LINE__);
1422 case dods_float32_c:
1423 case dods_float64_c:
1424 btp->val2buf(
reinterpret_cast<void *
>(decoded.data()));
1425 btp->set_read_p(
true);
1431 std::string str(decoded.begin(), decoded.end());
1432 if (btp->type() == dods_array_c) {
1433 auto *array =
dynamic_cast<DmrppArray *
>(btp);
1435 throw BESInternalError(
"Internal state error. Object claims to be array but is not.",__FILE__,__LINE__);
1437 if(array->is_flsa()){
1439 auto fls_length = array->get_fixed_string_length();
1440 auto pad_type = array->get_fixed_length_string_pad();
1441 auto str_start =
reinterpret_cast<char *
>(decoded.data());
1442 vector<string> fls_values;
1443 while(fls_values.size() < btp->length_ll()){
1444 string aValue = DmrppArray::ingest_fixed_length_string(str_start,fls_length, pad_type);
1445 fls_values.emplace_back(aValue);
1446 str_start += fls_length;
1448 array->set_value(fls_values, (
int) fls_values.size());
1449 array->set_read_p(
true);
1453 throw BESInternalError(
"Variable Length Strings are not yet supported.",__FILE__,__LINE__);
1457 if(btp->type() == dods_str_c) {
1458 auto *st =
static_cast<DmrppStr *
>(btp);
1460 st->set_read_p(
true);
1463 auto *st =
static_cast<DmrppUrl *
>(btp);
1465 st->set_read_p(
true);
1473 throw BESInternalError(
"Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
1476 case dods_structure_c:
1478 case dods_sequence_c:
1489void DMZ::process_compact_subset(
DmrppArray *da, std::vector<u_int8_t> &decoded) {
1491 if (da->var()->type() == dods_str_c || da->var()->type() == dods_url_c)
1492 throw BESInternalError(
"Currently we don't support the subset for the compacted array of string",__FILE__,__LINE__);
1494 int64_t num_buf_bytes = da->width_ll(
true);
1495 vector<unsigned char> buf_bytes;
1496 buf_bytes.resize(num_buf_bytes);
1497 vector<unsigned long long> da_dims = da->get_shape(
false);
1498 unsigned long subset_index = 0;
1499 vector<unsigned long long> subset_pos;
1500 handle_subset(da,da->dim_begin(),subset_index, subset_pos,buf_bytes,decoded);
1502 da->val2buf(
reinterpret_cast<void *
>(buf_bytes.data()));
1504 da->set_read_p(
true);
1507void DMZ::process_vlsa(libdap::BaseType *btp,
const pugi::xml_node &vlsa_element)
1513 if (btp->type() != dods_array_c) {
1514 throw BESInternalError(prolog +
"Received an unexpected "+ btp->type_name() +
1515 " Expected an instance of DmrppArray!", __FILE__, __LINE__);
1517 auto *array =
dynamic_cast<DmrppArray *
>(btp);
1519 throw BESInternalError(
"Internal state error. "
1520 "Object claims to be array but is not.", __FILE__, __LINE__);
1522 if(array->var()->type() != dods_str_c && array->var()->type() != dods_url_c){
1523 throw BESInternalError(prolog +
"Internal state error. "
1524 "Expected array of dods_str_c, got " +
1525 array->var()->type_name(), __FILE__, __LINE__);
1528 vector<string>entries;
1529 vlsa::read(vlsa_element, entries);
1531 array->set_is_vlsa(
true);
1532 array->set_value(entries, (
int) entries.size());
1533 array->set_read_p(
true);
1537DMZ::process_missing_data(BaseType *btp,
const xml_node &missing_data)
1539 BESDEBUG(PARSER, prolog <<
"Coming to process_missing_data() " << endl);
1542 auto char_data = missing_data.child_value();
1544 throw BESInternalError(
"The dmrpp::missing_data doesn't contain missing data values.",__FILE__,__LINE__);
1546 std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
1548 if (btp->type() != dods_array_c && btp->type() !=dods_byte_c)
1549 throw BESInternalError(
"The dmrpp::missing_data element must be the child of an array or a unsigned char scalar variable", __FILE__, __LINE__);
1551 if (btp->type() == dods_byte_c) {
1552 auto db =
dynamic_cast<DmrppByte *
>(btp);
1553 db->set_value(decoded[0]);
1554 db->set_read_p(
true);
1557 auto *da =
dynamic_cast<DmrppArray *
>(btp);
1559 vector<Bytef> result_bytes;
1563 auto result_size = (uLongf)(da->get_size(
false) *da->prototype()->width());
1564 result_bytes.resize(result_size);
1566 if (da->get_size(
false) == 1)
1567 memcpy(result_bytes.data(),decoded.data(),result_size);
1569 int retval = uncompress(result_bytes.data(), &result_size, decoded.data(), decoded.size());
1571 throw BESInternalError(
"The dmrpp::missing_data - fail to uncompress the mssing data.", __FILE__, __LINE__);
1574 if (da->is_projected()) {
1576 int64_t num_buf_bytes = da->width_ll(
true);
1577 vector<unsigned char> buf_bytes;
1578 buf_bytes.resize(num_buf_bytes);
1579 vector<unsigned long long> da_dims = da->get_shape(
false);
1580 unsigned long subset_index = 0;
1581 vector<unsigned long long> subset_pos;
1582 handle_subset(da,da->dim_begin(),subset_index, subset_pos,buf_bytes,result_bytes);
1584 da->val2buf(
reinterpret_cast<void *
>(buf_bytes.data()));
1588 da->val2buf(
reinterpret_cast<void *
>(result_bytes.data()));
1590 da->set_read_p(
true);
1595DMZ::supported_special_structure_type_internal(Constructor *var_ctor) {
1597 bool ret_value =
true;
1598 Constructor::Vars_iter vi = var_ctor->var_begin();
1599 Constructor::Vars_iter ve = var_ctor->var_end();
1600 for (; vi != ve; vi++) {
1603 Type t_bt = bt->type();
1606 if (libdap::is_simple_type(t_bt) ==
false) {
1608 if (t_bt != dods_array_c) {
1613 auto t_a =
dynamic_cast<Array *
>(bt);
1614 Type t_array_var = t_a->var()->type();
1615 if (!libdap::is_simple_type(t_array_var) || t_array_var == dods_url_c || t_array_var == dods_enum_c || t_array_var==dods_opaque_c) {
1621 else if (t_bt == dods_url_c || t_bt == dods_enum_c || t_bt==dods_opaque_c) {
1632DMZ::supported_special_structure_type(BaseType *btp)
1634 bool ret_value =
false;
1635 Type t = btp->type();
1636 if ((t == dods_array_c && btp->var()->type() == dods_structure_c) || t==dods_structure_c) {
1637 Constructor *var_constructor =
nullptr;
1638 if (t==dods_structure_c)
1639 var_constructor =
dynamic_cast<Constructor*
>(btp);
1641 var_constructor =
dynamic_cast<Constructor*
>(btp->var());
1642 if (!var_constructor){
1643 throw BESInternalError(
1644 prolog +
"Failed to cast " + btp->var()->type_name() +
" " + btp->name() +
1645 " to an instance of Constructor." , __FILE__, __LINE__);
1648 ret_value = supported_special_structure_type_internal(var_constructor);
1656DMZ::process_special_structure_data(BaseType *btp,
const xml_node &special_structure_data)
1658 BESDEBUG(PARSER, prolog <<
"Coming to process_special_structure_data() " << endl);
1660 if (supported_special_structure_type(btp) ==
false)
1661 throw BESInternalError(
"The dmrpp::the datatype is not a supported special structure variable", __FILE__, __LINE__);
1663 auto char_data = special_structure_data.child_value();
1665 throw BESInternalError(
"The dmrpp::special_structure_data doesn't contain special structure data values.",__FILE__,__LINE__);
1667 std::vector <u_int8_t> values = base64::Base64::decode(char_data);
1668 size_t total_value_size = values.size();
1670 if(btp->type() == dods_array_c) {
1672 auto ar =
dynamic_cast<DmrppArray *
>(btp);
1673 if(ar->is_projected())
1674 throw BESInternalError(
"The dmrpp::currently we don't support subsetting of special_structure_data.",__FILE__,__LINE__);
1676 int64_t nelms = ar->length_ll();
1677 size_t values_offset = 0;
1679 for (int64_t element = 0; element < nelms; ++element) {
1681 auto dmrpp_s =
dynamic_cast<DmrppStructure*
>(ar->var()->ptr_duplicate());
1683 throw InternalErr(__FILE__, __LINE__,
"Cannot obtain the structure pointer.");
1685 process_special_structure_data_internal(dmrpp_s, values, total_value_size, values_offset);
1686 ar->set_vec_ll((uint64_t)element,dmrpp_s);
1692 size_t values_offset = 0;
1693 auto dmrpp_s =
dynamic_cast<DmrppStructure*
>(btp);
1695 throw InternalErr(__FILE__, __LINE__,
"Cannot obtain the structure pointer.");
1696 process_special_structure_data_internal(dmrpp_s, values , total_value_size, values_offset);
1699 btp->set_read_p(
true);
1703void DMZ::process_special_structure_data_internal(
DmrppStructure * dmrpp_s, std::vector<u_int8_t> &values ,
size_t total_value_size,
size_t & values_offset){
1705 Constructor::Vars_iter vi = dmrpp_s->var_begin();
1706 Constructor::Vars_iter ve = dmrpp_s->var_end();
1708 for (; vi != ve; vi++) {
1710 Type t_bt = bt->type();
1711 if (libdap::is_simple_type(t_bt) && t_bt != dods_str_c && t_bt != dods_url_c && t_bt!= dods_enum_c && t_bt!=dods_opaque_c) {
1713 BESDEBUG(
"dmrpp",
"var name is: " << bt->name() <<
"'" << endl);
1714 BESDEBUG(
"dmrpp",
"var values_offset is: " << values_offset <<
"'" << endl);
1715 bt->val2buf(values.data() + values_offset);
1716 values_offset += bt->width_ll();
1718 else if (t_bt == dods_str_c) {
1719 BESDEBUG(
"dmrpp",
"var string name is: " << bt->name() <<
"'" << endl);
1720 BESDEBUG(
"dmrpp",
"var string values_offset is: " << values_offset <<
"'" << endl);
1721 if (total_value_size < values_offset)
1722 throw InternalErr(__FILE__, __LINE__,
"The offset of the retrieved value is out of the boundary.");
1723 size_t rest_buf_size = total_value_size - values_offset;
1724 u_int8_t* start_pointer = values.data() + values_offset;
1725 vector<char>temp_buf;
1726 temp_buf.resize(rest_buf_size);
1727 memcpy(temp_buf.data(),(
void*)start_pointer,rest_buf_size);
1729 size_t string_stop_index =0;
1730 vector<char> string_value;
1731 for (
size_t i = 0; i <rest_buf_size; i++) {
1732 if(temp_buf[i] ==
';') {
1733 string_stop_index = i;
1737 string_value.push_back(temp_buf[i]);
1739 string encoded_str(string_value.begin(),string_value.end());
1740 vector <u_int8_t> decoded_str = base64::Base64::decode(encoded_str);
1741 vector <char> decoded_vec;
1742 decoded_vec.resize(decoded_str.size());
1743 memcpy(decoded_vec.data(),(
void*)decoded_str.data(),decoded_str.size());
1744 string final_str(decoded_vec.begin(),decoded_vec.end());
1745 bt->val2buf(&final_str);
1746 values_offset = values_offset + string_stop_index+1;
1749 else if (t_bt == dods_array_c) {
1750 BESDEBUG(
"dmrpp",
"var array name is: " << bt->name() <<
"'" << endl);
1751 BESDEBUG(
"dmrpp",
"var array values_offset is: " << values_offset <<
"'" << endl);
1753 auto t_a =
dynamic_cast<Array *
>(bt);
1754 Type ar_basetype = t_a->var()->type();
1755 if (libdap::is_simple_type(ar_basetype) && ar_basetype != dods_str_c && ar_basetype != dods_url_c && ar_basetype!= dods_enum_c && ar_basetype!=dods_opaque_c) {
1756 bt->val2buf(values.data() + values_offset);
1757 values_offset += bt->width_ll();
1759 else if (ar_basetype == dods_str_c) {
1761 if(total_value_size < values_offset)
1762 throw InternalErr(__FILE__, __LINE__,
"The offset of the retrieved value is out of the boundary.");
1764 size_t rest_buf_size = total_value_size - values_offset;
1765 u_int8_t* start_pointer = values.data() + values_offset;
1766 vector<char>temp_buf;
1767 temp_buf.resize(rest_buf_size);
1768 memcpy(temp_buf.data(),(
void*)start_pointer,rest_buf_size);
1770 int64_t num_ar_elems = t_a->length_ll();
1774 vector<string> encoded_str;
1775 encoded_str.resize(num_ar_elems);
1777 unsigned int str_index = 0;
1778 size_t string_stop_index = 0;
1779 for (
size_t i = 0; i <rest_buf_size; i++) {
1780 if(temp_buf[i] !=
';')
1781 encoded_str[str_index].push_back(temp_buf[i]);
1784 if (str_index == num_ar_elems) {
1785 string_stop_index = i;
1791 vector<string> final_str;
1792 final_str.resize(num_ar_elems);
1795 for (
size_t i = 0; i <num_ar_elems; i++) {
1797 string temp_encoded_str(encoded_str[i].begin(),encoded_str[i].end());
1798 vector <u_int8_t> decoded_str = base64::Base64::decode(temp_encoded_str);
1799 vector <char> decoded_vec;
1800 decoded_vec.resize(decoded_str.size());
1801 memcpy(decoded_vec.data(),(
void*)decoded_str.data(),decoded_str.size());
1802 string temp_final_str(decoded_vec.begin(),decoded_vec.end());
1803 final_str[i] = temp_final_str;
1806 t_a->set_value_ll(final_str,num_ar_elems);
1807 values_offset = values_offset + string_stop_index+1;
1811 throw InternalErr(__FILE__, __LINE__,
"The base type of this structure is not integer or float or string. Currently it is not supported.");
1814 dmrpp_s->set_read_p(
true);
1833void DMZ::process_chunk(
DmrppCommon *dc,
const xml_node &chunk)
const
1839 string chunk_position_in_array;
1841 bool href_trusted =
false;
1843 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
1844 if (is_eq(attr.name(),
"offset")) {
1845 offset = attr.value();
1847 else if (is_eq(attr.name(),
"nBytes")) {
1848 size = attr.value();
1850 else if (is_eq(attr.name(),
"chunkPositionInArray")) {
1851 chunk_position_in_array = attr.value();
1853 else if (is_eq(attr.name(),
"fm")) {
1854 filter_mask = attr.value();
1856 else if (is_eq(attr.name(),
"href")) {
1857 href = attr.value();
1859 else if (is_eq(attr.name(),
"trust") || is_eq(attr.name(),
"dmrpp:trust")) {
1860 href_trusted = is_eq(attr.value(),
"true");
1864 if (offset.empty() || size.empty())
1865 throw BESInternalError(
"Both size and offset are required for a chunk node.", __FILE__, __LINE__);
1866 if (!href.empty()) {
1867 shared_ptr<http::url> data_url(
new http::url(href, href_trusted));
1868 if (filter_mask.empty())
1869 dc->
add_chunk(data_url, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
1871 dc->
add_chunk(data_url, dc->get_byte_order(), stoull(size), stoull(offset), stoul(filter_mask), chunk_position_in_array);
1874 if (filter_mask.empty())
1875 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
1877 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoull(size), stoull(offset), stoul(filter_mask), chunk_position_in_array);
1880 dc->accumlate_storage_size(stoull(size));
1883void DMZ::process_block(
DmrppCommon *dc,
const xml_node &chunk,
unsigned int block_count)
const
1889 string chunk_position_in_array;
1891 bool href_trusted =
false;
1893 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
1894 if (is_eq(attr.name(),
"offset")) {
1895 offset = attr.value();
1897 else if (is_eq(attr.name(),
"nBytes")) {
1898 size = attr.value();
1900 else if (is_eq(attr.name(),
"href")) {
1901 href = attr.value();
1903 else if (is_eq(attr.name(),
"trust") || is_eq(attr.name(),
"dmrpp:trust")) {
1904 href_trusted = is_eq(attr.value(),
"true");
1909 if (offset.empty() || size.empty())
1910 throw BESInternalError(
"Both size and offset are required for a block node.", __FILE__, __LINE__);
1911 if (!href.empty()) {
1912 shared_ptr<http::url> data_url(
new http::url(href, href_trusted));
1913 dc->
add_chunk(data_url, dc->get_byte_order(), stoull(size), stoull(offset),
true, block_count);
1916 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoull(size), stoull(offset),
true, block_count);
1919 dc->accumlate_storage_size(stoull(size));
1928void DMZ::process_cds_node(
DmrppCommon *dc,
const xml_node &chunks)
1930 for (
auto child = chunks.child(
"dmrpp:chunkDimensionSizes"); child; child = child.next_sibling()) {
1931 if (is_eq(child.name(),
"dmrpp:chunkDimensionSizes")) {
1932 string sizes = child.child_value();
1938static void add_fill_value_information(
DmrppCommon *dc,
const string &value_string, libdap::Type fv_type)
1940 dc->set_fill_value_string(value_string);
1941 dc->set_fill_value_type(fv_type);
1942 dc->set_uses_fill_value(
true);
1953bool DMZ::process_chunks(BaseType *btp,
const xml_node &var_node)
const
1955 auto chunks = var_node.child(
"dmrpp:chunks");
1959 bool has_fill_value =
false;
1961 unsigned int block_count = 0;
1962 bool is_multi_lb_chunks =
false;
1964 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
1966 if (is_eq(attr.name(),
"compressionType")) {
1969 else if (is_eq(attr.name(),
"deflateLevel")) {
1970 string def_lev_str = attr.value();
1972 vector<string> def_lev_str_vec =
BESUtil::split(def_lev_str,
' ' );
1973 vector<unsigned int> def_levels;
1974 for (
const auto &def_lev:def_lev_str_vec)
1975 def_levels.push_back(stoul(def_lev));
1976 dc(btp)->set_deflate_levels(def_levels);
1978 else if (is_eq(attr.name(),
"fillValue")) {
1981 string unsupported_type;
1982 if(flagged_as_unsupported_type(var_node,unsupported_type)){
1984 msg << prolog <<
"Found a dmrpp:chunk/@fillValue with a value of ";
1985 msg <<
"'" << unsupported_type <<
"' this means that ";
1986 msg <<
"the Hyrax service is unable to process this variable/dataset.";
1987 throw BESInternalError(msg.str(),__FILE__,__LINE__);
1990 has_fill_value =
true;
1993 if (btp->type()==dods_url_c
1994 || btp->type() == dods_sequence_c || btp->type() == dods_grid_c)
1995 throw BESInternalError(
"Fill Value chunks are unsupported for URL, sequence and grid types.", __FILE__, __LINE__);
1997 if (btp->type() == dods_structure_c) {
1998 string fvalue_str = attr.value();
2001 if (btp->type() == dods_array_c) {
2002 auto array =
dynamic_cast<libdap::Array*
>(btp);
2003 add_fill_value_information(dc(btp), attr.value(), array->var()->type());
2006 add_fill_value_information(dc(btp), attr.value(), btp->type());
2008 else if (is_eq(attr.name(),
"byteOrder"))
2014 else if (is_eq(attr.name(),
"structOffset")) {
2015 string so_str = attr.value();
2018 vector<unsigned int> struct_offsets;
2019 for (
const auto &s_off:so_str_vec)
2020 struct_offsets.push_back(stoul(s_off));
2021 dc(btp)->set_struct_offsets(struct_offsets);
2024 else if (is_eq(attr.name(),
"LBChunk")) {
2025 string is_lbchunk_value = attr.value();
2026 if (is_lbchunk_value ==
"true") {
2027 is_multi_lb_chunks =
true;
2035 if (has_fill_value ==
false && dc(btp)->get_one_chunk_fill_value() ==
true)
2039 process_cds_node(dc(btp), chunks);
2044 bool is_chunked_storage =
false;
2045 for (
auto chunk = chunks.child(
"dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
2046 if (is_eq(chunk.name(),
"dmrpp:chunk")) {
2047 is_chunked_storage =
true;
2052 if (is_chunked_storage && is_multi_lb_chunks==
false) {
2054 for (
auto chunk = chunks.child(
"dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
2055 if (is_eq(chunk.name(),
"dmrpp:chunk")) {
2056 process_chunk(dc(btp), chunk);
2064 for (
auto chunk = chunks.child(
"dmrpp:block"); chunk; chunk = chunk.next_sibling()) {
2065 if (is_eq(chunk.name(),
"dmrpp:block")) {
2072 if (block_count > 0) {
2073 if (block_count == 1)
2074 throw BESInternalError(
" The number of linked block is 1, but it should be > 1.", __FILE__, __LINE__);
2075 if (block_count >1) {
2077 dc(btp)->set_using_linked_block();
2080 for (
auto chunk = chunks.child(
"dmrpp:block"); chunk; chunk = chunk.next_sibling()) {
2081 if (is_eq(chunk.name(),
"dmrpp:block")) {
2082 process_block(dc(btp), chunk, block_count);
2083 BESDEBUG(PARSER, prolog <<
"This count of linked block of this variable is: " << block_count << endl);
2087 dc(btp)->set_total_linked_blocks(block_count);
2090 else if (is_multi_lb_chunks) {
2092 queue <vector<pair<unsigned long long,unsigned long long>>> mb_index_queue;
2093 vector<pair<unsigned long long, unsigned long long>> offset_length_pair;
2096 for (
auto chunk = chunks.child(
"dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
2099 if (is_eq(chunk.name(),
"dmrpp:chunk"))
2100 add_mblock_index(chunk, mb_index_queue,offset_length_pair);
2103 mb_index_queue.push(offset_length_pair);
2106 for (
auto chunk = chunks.child(
"dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
2107 if (is_eq(chunk.name(),
"dmrpp:chunk"))
2108 process_multi_blocks_chunk(dc(btp),chunk, mb_index_queue);
2123vector<unsigned long long> DMZ::get_array_dims(Array *array)
2125 vector<unsigned long long> array_dim_sizes;
2126 for (
auto i= array->dim_begin(), e = array->dim_end(); i != e; ++i) {
2127 array_dim_sizes.push_back(array->dimension_size_ll(i));
2130 return array_dim_sizes;
2143size_t DMZ::logical_chunks(
const vector <unsigned long long> &array_dim_sizes,
const DmrppCommon *dc)
2146 if (chunk_dim_sizes.size() != array_dim_sizes.size()) {
2148 oss <<
"Expected the chunk and array rank to match (chunk: " << chunk_dim_sizes.size() <<
", array: "
2149 << array_dim_sizes.size() <<
")";
2150 throw BESInternalError(oss.str(), __FILE__, __LINE__);
2153 size_t num_logical_chunks = 1;
2154 auto i = array_dim_sizes.begin();
2155 for (
auto chunk_dim_size: chunk_dim_sizes) {
2156 auto array_dim_size = *i++;
2157 num_logical_chunks *= (size_t)ceil((
float)array_dim_size / (
float)chunk_dim_size);
2160 return num_logical_chunks;
2177set< vector<unsigned long long> > DMZ::get_chunk_map(
const vector<shared_ptr<Chunk>> &chunks)
2179 set< vector<unsigned long long> > chunk_map;
2180 for (
auto const &chunk: chunks) {
2181 chunk_map.insert(chunk->get_position_in_array());
2195void DMZ::process_fill_value_chunks(BaseType *btp,
const set<shape> &chunk_map,
const shape &chunk_shape,
2196 const shape &array_shape,
unsigned long long chunk_size,
unsigned int struct_size)
2200 DmrppChunkOdometer odometer(array_shape, chunk_shape);
2202 const auto &s = odometer.indices();
2203 if (chunk_map.find(s) == chunk_map.end()) {
2208 vector<pair<Type,int>> structure_type_element;
2209 bool ret_value = is_simple_dap_structure_scalar_array(btp,structure_type_element);
2211 if (struct_size !=0)
2212 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), chunk_size, s, struct_size);
2214 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), chunk_size, s, structure_type_element);
2217 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), chunk_size, s);
2219 }
while (odometer.next());
2232 if (dc(btp)->get_chunks_loaded())
2236 xml_node var_node = get_variable_xml_node(btp);
2237 if (var_node ==
nullptr)
2238 throw BESInternalError(
"Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
2242 int chunks_found = 0;
2243 int chunk_found = 0;
2244 int compact_found = 0;
2246 int missing_data_found = 0;
2247 int special_structure_data_found = 0;
2250 if (process_chunks(btp, var_node)) {
2252 BESDEBUG(PARSER, prolog <<
"This variable's chunks storage size is: " << dc(btp)->get_var_chunks_storage_size() << endl);
2253 auto array =
dynamic_cast<Array*
>(btp);
2256 if (array && !dc(btp)->get_chunk_dimension_sizes().empty()) {
2257 auto const &array_shape = get_array_dims(array);
2258 size_t num_logical_chunks = logical_chunks(array_shape, dc(btp));
2260 if (num_logical_chunks != dc(btp)->get_chunks_size()) {
2261 auto const &chunk_map = get_chunk_map(dc(btp)->get_immutable_chunks());
2264 auto const &chunk_shape = dc(btp)->get_chunk_dimension_sizes();
2265 unsigned long long chunk_size_bytes = array->var()->width();
2266 vector<unsigned int> s_off = dc(btp)->get_struct_offsets();
2268 chunk_size_bytes = s_off.back();
2270 for (
auto dim_size: chunk_shape)
2271 chunk_size_bytes *= dim_size;
2272 unsigned int struct_size =(s_off.empty())?0:s_off.back();
2273 process_fill_value_chunks(btp, chunk_map, dc(btp)->get_chunk_dimension_sizes(),
2274 array_shape, chunk_size_bytes,struct_size);
2278 if (num_logical_chunks == 1)
2279 dc(btp)->set_one_chunk_fill_value(
true);
2280 dc(btp)->set_processing_fv_chunks();
2287 else if (array && dc(btp)->get_immutable_chunks().empty()) {
2288 auto const &array_shape = get_array_dims(array);
2291 shape pia(0,array_shape.size());
2295 unsigned long long array_size_bytes = 1;
2296 for (
auto dim_size: array_shape)
2297 array_size_bytes *= dim_size;
2299 if (array->var()->type() == dods_str_c) {
2301 size_t str_size = dcp->get_fill_value().size();
2302 string fvalue = dcp->get_fill_value();
2311 if(dcp->get_fill_value()==
"") {
2315 array_size_bytes *=str_size;
2316 dcp->add_chunk(dcp->get_byte_order(), fvalue, dcp->get_fill_value_type(), array_size_bytes, pia);
2319 array_size_bytes *= array->var()->width();
2322 vector<pair<Type,int>> structure_type_element;
2323 bool ret_value = is_simple_dap_structure_scalar_array(btp,structure_type_element);
2325 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), array_size_bytes, pia, structure_type_element);
2327 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), array_size_bytes, pia);
2333 else if (btp->type()!=dods_array_c && dc(btp)->get_immutable_chunks().empty()) {
2334 if (btp->type() == dods_grid_c || btp->type() == dods_sequence_c || btp->type() ==dods_url_c) {
2336 oss <<
" For scalar variable with the contiguous storage that holds the fillvalue, only numeric"
2337 <<
" types are supported.";
2342 if (btp->type() == dods_str_c) {
2344 size_t array_size = dcp->get_fill_value().size();
2345 string fvalue = dcp->get_fill_value();
2352 if(dcp->get_fill_value()==
"") {
2356 dcp->add_chunk(dcp->get_byte_order(), fvalue, dcp->get_fill_value_type(), array_size, pia);
2359 vector<pair<Type,int>> structure_type_element;
2360 bool ret_value = is_simple_dap_structure_scalar_array(btp,structure_type_element);
2362 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), btp->width(), pia, structure_type_element);
2364 dcp->add_chunk(dcp->get_byte_order(), dcp->get_fill_value(), dcp->get_fill_value_type(), btp->width(), pia);
2371 auto chunk = var_node.child(
"dmrpp:chunk");
2374 process_chunk(dc(btp), chunk);
2377 auto compact = var_node.child(
"dmrpp:compact");
2380 process_compact(btp, compact);
2383 auto missing_data = var_node.child(
"dmrpp:missingdata");
2385 missing_data_found = 1;
2386 process_missing_data(btp, missing_data);
2389 auto special_structure_data = var_node.child(
"dmrpp:specialstructuredata");
2390 if (special_structure_data) {
2391 special_structure_data_found = 1;
2392 process_special_structure_data(btp, special_structure_data);
2395 auto vlsa_element = var_node.child(DMRPP_VLSA_ELEMENT);
2398 process_vlsa(btp, vlsa_element);
2402 if (DmrppRequestHandler::d_require_chunks) {
2403 int elements_found = chunks_found + chunk_found + compact_found + vlsa_found + missing_data_found + special_structure_data_found;
2404 if (elements_found != 1) {
2406 oss <<
"Expected chunk, chunks or compact or variable length string or missing data or special structure data information in the DMR++ data. Found " << elements_found
2407 <<
" types of nodes.";
2412 dc(btp)->set_chunks_loaded(
true);
2415bool DMZ::is_simple_dap_structure_scalar_array(BaseType *btp, vector<pair<Type,int>> &structure_type_element) {
2417 bool ret_value =
false;
2419 if (btp->type()==dods_array_c) {
2421 auto t_a =
dynamic_cast<Array *
>(btp);
2422 Type t_array_var = t_a->var()->type();
2423 if (t_array_var == dods_structure_c) {
2424 auto t_s =
dynamic_cast<Structure *
>(t_a->var());
2425 ret_value = is_simple_dap_structure_internal(t_s, structure_type_element);
2428 else if (btp->type() == dods_structure_c) {
2429 auto t_s =
dynamic_cast<Structure *
>(btp);
2430 ret_value = is_simple_dap_structure_internal(t_s, structure_type_element);
2436bool DMZ::is_simple_dap_structure_internal(
const Structure *ds, vector<pair<Type,int>> &structure_type_element) {
2438 bool ret_value =
true;
2439 for (
const auto &bt:ds->variables()) {
2441 Type t_bt = bt->type();
2444 if (t_bt == dods_array_c) {
2445 auto t_a =
dynamic_cast<Array *
>(bt);
2446 Type t_array_var = t_a->var()->type();
2448 if (libdap::is_simple_type(t_array_var) ==
true && t_array_var != dods_str_c) {
2449 pair<Type,int> temp_pair;
2450 int64_t num_eles= t_a->length_ll();
2451 temp_pair.first = t_array_var;
2452 temp_pair.second = (int)(num_eles);
2453 structure_type_element.push_back(temp_pair);
2460 else if (libdap::is_simple_type(t_bt) ==
true && t_bt != dods_str_c) {
2461 pair<Type,int> temp_pair;
2462 temp_pair.first = t_bt;
2463 temp_pair.second = 1;
2464 structure_type_element.push_back(temp_pair);
2475void DMZ::handle_subset(
DmrppArray *da, libdap::Array::Dim_iter dim_iter,
unsigned long & subset_index, vector<unsigned long long> & subset_pos,
2476 vector<unsigned char>& subset_buf, vector<unsigned char>& whole_buf) {
2479 vector<unsigned long long> da_dims = da->get_shape(
false);
2482 unsigned int bytes_per_elem = da->prototype()->width();
2485 uint64_t start = da->dimension_start_ll(dim_iter,
true);
2486 uint64_t stop = da->dimension_stop_ll(dim_iter,
true);
2487 uint64_t stride = da->dimension_stride_ll(dim_iter,
true);
2493 if (dim_iter == da->dim_end() && stride == 1) {
2496 subset_pos.push_back(start);
2497 unsigned long long start_index = INDEX_nD_TO_1D( da_dims,subset_pos);
2498 subset_pos.pop_back();
2500 subset_pos.push_back(stop);
2501 unsigned long long stop_index = INDEX_nD_TO_1D( da_dims,subset_pos);
2502 subset_pos.pop_back();
2505 unsigned char * temp_subset_buf = subset_buf.data() + subset_index*bytes_per_elem;
2506 unsigned char * temp_whole_buf = whole_buf.data() + start_index*bytes_per_elem;
2507 size_t num_bytes_to_copy = (stop_index-start_index+1)*bytes_per_elem;
2509 memcpy(temp_subset_buf,temp_whole_buf,num_bytes_to_copy);
2512 subset_index = subset_index +(stop_index-start_index+1);
2516 for (uint64_t myDimIndex = start; myDimIndex <= stop; myDimIndex += stride) {
2519 if (dim_iter != da->dim_end()) {
2521 subset_pos.push_back(myDimIndex);
2524 handle_subset(da,dim_iter,subset_index, subset_pos,subset_buf,whole_buf);
2525 subset_pos.pop_back();
2529 subset_pos.push_back(myDimIndex);
2530 unsigned int sourceIndex = INDEX_nD_TO_1D( da_dims,subset_pos);
2531 subset_pos.pop_back();
2533 unsigned char * temp_subset_buf = subset_buf.data() + subset_index*bytes_per_elem;
2534 unsigned char * temp_whole_buf = whole_buf.data() + sourceIndex*bytes_per_elem;
2535 memcpy(temp_subset_buf,temp_whole_buf,bytes_per_elem);
2543void DMZ::add_mblock_index(
const xml_node &chunk, queue<vector<pair<unsigned long long, unsigned long long >>>& mb_index_queue,
2544 vector<pair<unsigned long long, unsigned long long>>& offset_length_pair)
const{
2546 string LBIndex_value;
2547 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
2548 if (is_eq(attr.name(),
"LinkedBlockIndex")) {
2549 LBIndex_value = attr.value();
2555 if (LBIndex_value.empty() ==
false) {
2557 pair<unsigned long long, unsigned long long> temp_offset_length;
2560 bool found_offset =
false;
2561 bool found_length =
false;
2562 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
2563 if (is_eq(attr.name(),
"offset")) {
2564 string offset = attr.value();
2565 temp_offset_length.first = stoull(offset);
2566 found_offset =
true;
2568 else if (is_eq(attr.name(),
"nBytes")) {
2569 string size = attr.value();
2570 temp_offset_length.second = stoull(size);
2571 found_length =
true;
2573 if (found_offset && found_length)
2578 if (LBIndex_value ==
"0") {
2579 if (offset_length_pair.empty() ==
false) {
2580 mb_index_queue.push(offset_length_pair);
2583 offset_length_pair.clear();
2584 offset_length_pair.push_back(temp_offset_length);
2587 offset_length_pair.push_back(temp_offset_length);
2590 offset_length_pair.push_back(temp_offset_length);
2595void DMZ::process_multi_blocks_chunk(dmrpp::DmrppCommon *dc,
const pugi::xml_node &chunk, std::queue<std::vector<std::pair<unsigned long long, unsigned long long>>>& mb_index_queue)
const {
2602 string chunk_position_in_array;
2604 bool href_trusted =
false;
2609 bool multi_lbs_chunk =
false;
2610 auto LBI_attr = chunk.last_attribute();
2611 if (is_eq(LBI_attr.name(),
"LinkedBlockIndex")) {
2612 string LBI_attr_value = LBI_attr.value();
2613 if (LBI_attr_value ==
"0")
2614 multi_lbs_chunk =
true;
2619 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
2620 if (is_eq(LBI_attr.name(),
"LinkedBlockIndex")) {
2621 string LBI_attr_value = LBI_attr.value();
2622 if (LBI_attr_value ==
"0")
2623 multi_lbs_chunk =
true;
2631 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
2633 if (is_eq(attr.name(),
"offset")) {
2634 offset = attr.value();
2636 else if (is_eq(attr.name(),
"nBytes")) {
2637 size = attr.value();
2639 else if (is_eq(attr.name(),
"chunkPositionInArray")) {
2640 chunk_position_in_array = attr.value();
2642 else if (is_eq(attr.name(),
"fm")) {
2643 filter_mask = attr.value();
2645 else if (is_eq(attr.name(),
"href")) {
2646 href = attr.value();
2648 else if (is_eq(attr.name(),
"trust") || is_eq(attr.name(),
"dmrpp:trust")) {
2649 href_trusted = is_eq(attr.value(),
"true");
2653 if (offset.empty() || size.empty())
2654 throw BESInternalError(
"Both size and offset are required for a chunk node.", __FILE__, __LINE__);
2656 if (multi_lbs_chunk) {
2658 vector<pair<unsigned long long, unsigned long long>> temp_pair;
2659 if (!mb_index_queue.empty())
2660 temp_pair = mb_index_queue.front();
2662 if (!href.empty()) {
2663 shared_ptr<http::url> data_url(
new http::url(href, href_trusted));
2664 dc->
add_chunk(data_url, dc->get_byte_order(), chunk_position_in_array,temp_pair);
2667 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), chunk_position_in_array, temp_pair);
2669 mb_index_queue.pop();
2673 if (!href.empty()) {
2674 shared_ptr<http::url> data_url(
new http::url(href, href_trusted));
2675 dc->
add_chunk(data_url, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
2678 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoull(size), stoull(offset), chunk_position_in_array);
2683 dc->accumlate_storage_size(stoull(size));
2688size_t DMZ::INDEX_nD_TO_1D (
const std::vector < unsigned long long > &dims,
2689 const std::vector < unsigned long long > &pos) {
2694 if(dims.size () != pos.size ())
2695 throw InternalErr(__FILE__,__LINE__,
"dimension error in INDEX_nD_TO_1D routine.");
2699 for (
const auto & one_pos:pos) {
2701 for (
size_t j = start; j < dims.size (); j++)
exception thrown if internal error encountered
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
static TheBESKeys * TheKeys()
Access to the singleton.
static bool read_bool_key(const std::string &key, bool default_value)
Read a boolean-valued key from the bes.conf file.
DMZ()=default
Build a DMZ without simultaneously parsing an XML document.
virtual void load_chunks(libdap::BaseType *btp)
Load the chunk information into a variable.
void parse_xml_doc(const std::string &filename)
Build the DOM tree for a DMR++ XML document.
virtual void build_thin_dmr(libdap::DMR *dmr)
populate the DMR instance as a 'thin DMR'
void parse_xml_string(const std::string &contents)
Build a DOM tree for a DMR++ using content from a string.
Extend libdap::Array so that a handler can read data using a DMR++ file.
Size and offset information of data included in DMR++ files.
void set_multi_linked_blocks_chunk(bool value)
Set the value of the boolean variable that indicates this variable contains multiple linked blocks in...
void set_disable_dio(bool value)
Set the value of the compact property.
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Adds a chunk to the vector of chunk refs (byteStreams) and returns the size of the chunks internal ve...
void set_filter(const std::string &value)
Set the value of the filters property.
virtual void ingest_byte_order(const std::string &byte_order_string)
Parses the text content of the XML element chunks:byteOrder.
virtual const std::vector< unsigned long long > & get_chunk_dimension_sizes() const
The chunk dimension sizes held in a const vector.
void set_compact(bool value)
Set the value of the compact property.
void set_missing_data(bool value)
Set the value of the missing data.
virtual void set_one_chunk_fill_value(bool ufv)
Set the one_chunk_fill_value property.