30#include <unordered_set>
43#include <libdap/Str.h>
44#include <libdap/util.h>
45#include <libdap/D4Attributes.h>
46#include <libdap/Array.h>
50#include <BESNotFoundError.h>
51#include <BESInternalError.h>
52#include <BESInternalFatalError.h>
54#include <TheBESKeys.h>
55#include <BESContextManager.h>
58#include "DmrppTypeFactory.h"
59#include "DmrppD4Group.h"
60#include "DmrppArray.h"
61#include "DmrppStructure.h"
62#include "D4ParserSax2.h"
64#include "UnsupportedTypeException.h"
70namespace build_dmrpp_util {
74#define VERBOSE(x) do { if (verbose) (x); } while(false)
75#define prolog std::string("# build_dmrpp::").append(__func__).append("() - ")
77#define INVOCATION_CONTEXT "invocation"
105string h5_filter_name(H5Z_filter_t filter_type) {
107 switch(filter_type) {
108 case H5Z_FILTER_NONE:
109 name =
"H5Z_FILTER_NONE";
111 case H5Z_FILTER_DEFLATE:
112 name =
"H5Z_FILTER_DEFLATE";
114 case H5Z_FILTER_SHUFFLE:
115 name =
"H5Z_FILTER_SHUFFLE";
117 case H5Z_FILTER_FLETCHER32:
118 name =
"H5Z_FILTER_FLETCHER32";
120 case H5Z_FILTER_SZIP:
121 name =
"H5Z_FILTER_SZIP";
123 case H5Z_FILTER_NBIT:
124 name =
"H5Z_FILTER_NBIT";
126 case H5Z_FILTER_SCALEOFFSET:
127 name =
"H5Z_FILTER_SCALEOFFSET";
131 ostringstream oss(
"ERROR! Unknown HDF5 FILTER (H5Z_filter_t) type: ", std::ios::ate);
133 throw BESInternalError(oss.str(),__FILE__,__LINE__);
144hid_t create_h5plist(hid_t dataset){
147 plist_id = H5Dget_create_plist(dataset);
149 throw BESInternalError(
"Unable to open HDF5 dataset id.", __FILE__, __LINE__);
158DmrppCommon *toDC(BaseType *btp){
159 auto *dc =
dynamic_cast<DmrppCommon *
>(btp);
162 msg <<
"ERROR: Expected a BaseType that was also a DmrppCommon instance.";
163 msg <<
"(variable_name: "<< ((btp)?btp->name():
"unknown") <<
").";
164 throw BESInternalError(msg.str(), __FILE__, __LINE__);
174DmrppArray *toDA(BaseType *btp){
175 auto *da =
dynamic_cast<DmrppArray *
>(btp);
178 msg <<
"ERROR: Expected a BaseType that was also a DmrppArray instance.";
179 msg <<
"(variable_name: "<< ((btp)?btp->name():
"unknown") <<
").";
180 throw BESInternalError(msg.str(), __FILE__, __LINE__);
192static void set_filter_information(hid_t dataset_id, DmrppCommon *dc,
bool disable_dio) {
194 hid_t plist_id = create_h5plist(dataset_id);
197 int numfilt = H5Pget_nfilters(plist_id);
198 VERBOSE(cerr << prolog <<
"Number of filters associated with dataset: " << numfilt << endl);
201 unsigned int cd_values[20];
202 vector<unsigned int> deflate_levels;
204 for (
int filter = 0; filter < numfilt; filter++) {
206 H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts,
207 cd_values, 0,
nullptr,
nullptr);
208 VERBOSE(cerr << prolog <<
"Found H5 Filter Type: " << h5_filter_name(filter_type) <<
" (" << filter_type <<
")" << endl);
209 switch (filter_type) {
210 case H5Z_FILTER_DEFLATE:
211 filters.append(
"deflate ");
212 VERBOSE(cerr << prolog <<
"Deflate compression level: " << cd_values[0] << endl);
213 deflate_levels.push_back(cd_values[0]);
215 case H5Z_FILTER_SHUFFLE:
216 filters.append(
"shuffle ");
218 case H5Z_FILTER_FLETCHER32:
219 filters.append(
"fletcher32 ");
222 ostringstream oss(
"Unsupported HDF5 filter: ", std::ios::ate);
224 oss <<
" (" << h5_filter_name(filter_type) <<
")";
225 throw BESInternalError(oss.str(), __FILE__, __LINE__);
231 filters = filters.substr(0, filters.size() - 1);
233 dc->set_deflate_levels(deflate_levels);
234 if (!filters.empty())
250is_hdf5_fill_value_defined(hid_t dataset_id)
254 H5Eset_auto2(H5E_DEFAULT,
nullptr,
nullptr);
256 auto plist_id = create_h5plist(dataset_id);
261 short ret_value = -1;
264 H5D_fill_value_t status;
265 if ((H5Pfill_value_defined(plist_id, &status)) < 0) {
267 throw BESInternalError(
"Unable to access HDF5 Fillvalue information.", __FILE__, __LINE__);
269 if (status == H5D_FILL_VALUE_DEFAULT)
271 else if (status == H5D_FILL_VALUE_USER_DEFINED)
273 else if (status == H5D_FILL_VALUE_UNDEFINED)
291get_value_as_string(hid_t h5_type_id, vector<char> &value)
293 H5T_class_t class_type = H5Tget_class(h5_type_id);
294 switch (class_type) {
297 sign = H5Tget_sign(h5_type_id);
298 switch (H5Tget_size(h5_type_id)) {
300 if (sign == H5T_SGN_2)
301 return to_string(*(int8_t *) (value.data()));
303 return to_string(*(uint8_t *) (value.data()));
306 if (sign == H5T_SGN_2)
307 return to_string(*(int16_t *) (value.data()));
309 return to_string(*(uint16_t *) (value.data()));
312 if (sign == H5T_SGN_2)
313 return to_string(*(int32_t *) (value.data()));
315 return to_string(*(uint32_t *) (value.data()));
318 if (sign == H5T_SGN_2)
319 return to_string(*(int64_t *) (value.data()));
321 return to_string(*(uint64_t *) (value.data()));
324 throw BESInternalError(
"Unable to extract integer fill value.", __FILE__, __LINE__);
331 switch (H5Tget_size(h5_type_id)) {
333 oss << *(
float *) (value.data());
337 oss << *(
double *) (value.data());
341 throw BESInternalError(
"Unable to extract float fill value.", __FILE__, __LINE__);
347 if (H5Tis_variable_str(h5_type_id)) {
350 string fv_str(value.begin(),value.end());
353 stringstream msg(prolog);
354 msg <<
"UnsupportedTypeException: Your data granule contains a variable length H5T_STRING ";
355 msg <<
"as a fillValue type. This is not yet supported by the dmr++ creation machinery. ";
356 msg <<
"The variable/dataset type screening code should have intercepted this prior. ";
357 msg <<
"fillValue(" + to_string(fv_str.length()) +
" chars): 0x";
358 for(
auto c : fv_str){
359 msg << std::hex << +c ;
361 throw UnsupportedTypeException(msg.str());
364 string str_fv(value.begin(),value.end());
370 string msg(prolog +
"UnsupportedTypeException: Your data granule contains an H5T_ARRAY as a fillValue type. "
371 "This is not yet supported by the dmr++ creation machinery."
372 "The variable/dataset type screening code should intercepted this prior.");
373 throw UnsupportedTypeException(msg);
378 string msg(prolog +
"UnsupportedTypeException: The fill value of a compound datatype should not be obtained in this function. "
379 "get_compound_fv_as_string() is the right function to get the value.");
380 throw UnsupportedTypeException(msg);
385 throw BESInternalError(
"Unable to extract fill value from HDF5 file.", __FILE__, __LINE__);
391get_compound_base_fill_value_as_string(hid_t h5_type_id,
char* value_ptr)
393 H5T_class_t class_type = H5Tget_class(h5_type_id);
394 switch (class_type) {
397 sign = H5Tget_sign(h5_type_id);
398 switch (H5Tget_size(h5_type_id)) {
400 if (sign == H5T_SGN_2)
401 return to_string(*(int8_t *) value_ptr);
403 return to_string(*(uint8_t *) value_ptr);
406 if (sign == H5T_SGN_2)
407 return to_string(*(int16_t *) value_ptr);
409 return to_string(*(uint16_t *) value_ptr);
412 if (sign == H5T_SGN_2)
413 return to_string(*(int32_t *) value_ptr);
415 return to_string(*(uint32_t *) value_ptr);
418 if (sign == H5T_SGN_2)
419 return to_string(*(int64_t *) value_ptr);
421 return to_string(*(uint64_t *) value_ptr);
424 throw BESInternalError(
"Unable to extract integer fill value.", __FILE__, __LINE__);
430 switch (H5Tget_size(h5_type_id)) {
432 oss << *(
float *) value_ptr;
436 oss << *(
double *) value_ptr;
440 throw BESInternalError(
"Unable to extract float fill value.", __FILE__, __LINE__);
444 throw BESInternalError(
"The member of compound datatype that has user-defined datatype has to be either integer or float..", __FILE__, __LINE__);
449string obtain_compound_user_defined_fvalues(hid_t dtype_id, hid_t h5_plist_id, vector<char> &value) {
454 if ((memtype = H5Tget_native_type(dtype_id, H5T_DIR_ASCEND))<0) {
455 H5Pclose(h5_plist_id);
456 throw BESInternalError (
"Fail to obtain memory datatype.", __FILE__, __LINE__);
460 if ((nmembs = H5Tget_nmembers(memtype)) < 0) {
462 H5Pclose(h5_plist_id);
463 string err_msg =
"Fail to obtain number of HDF5 compound datatype.";
464 throw BESInternalError (err_msg, __FILE__, __LINE__);
470 for (
unsigned int u = 0; u < (unsigned)nmembs; u++) {
473 H5T_class_t memb_cls = H5T_NO_CLASS;
474 size_t memb_offset = 0;
477 if((memb_id = H5Tget_member_type(memtype, u)) < 0) {
479 H5Pclose(h5_plist_id);
480 string err_msg =
"Fail to obtain the datatype of an HDF5 compound datatype member.";
481 throw BESInternalError (err_msg, __FILE__, __LINE__);
485 if((memb_cls = H5Tget_member_class (memtype, u)) < 0) {
486 H5Pclose(h5_plist_id);
489 string err_msg =
"Fail to obtain the datatype class of an HDF5 compound datatype member.";
490 throw BESInternalError (err_msg, __FILE__, __LINE__);
496 memb_offset= H5Tget_member_offset(memtype,u);
498 if (memb_cls == H5T_ARRAY) {
500 hid_t at_base_type = H5Tget_super(memb_id);
501 size_t at_base_type_size = H5Tget_size(at_base_type);
502 H5T_class_t array_cls = H5Tget_class(at_base_type);
504 if (array_cls != H5T_INTEGER && array_cls !=H5T_FLOAT) {
507 string err_msg =
"The base class of an HDF5 compound datatype member must be integer or float.";
508 throw BESInternalError (err_msg, __FILE__, __LINE__);
512 int at_ndims = H5Tget_array_ndims(memb_id);
514 H5Pclose(h5_plist_id);
516 H5Tclose(at_base_type);
518 string err_msg =
"Fail to obtain number of dimensions of the array datatype.";
519 throw BESInternalError (err_msg, __FILE__, __LINE__);
522 vector<hsize_t>at_dims_h(at_ndims,0);
525 if (H5Tget_array_dims(memb_id,at_dims_h.data())<0) {
526 H5Pclose(h5_plist_id);
528 H5Tclose(at_base_type);
530 string err_msg =
"Fail to obtain each imension size of the array datatype.";
531 throw BESInternalError (err_msg, __FILE__, __LINE__);
534 vector<hsize_t>at_dims_offset(at_ndims,0);
535 size_t total_array_nums = 1;
536 for (
const auto & ad:at_dims_h)
537 total_array_nums *=ad;
540 for (
unsigned ar_index = 0; ar_index <total_array_nums; ar_index++) {
541 char *value_ptr = value.data() + memb_offset + ar_index *at_base_type_size;
542 string tmp_value = get_compound_base_fill_value_as_string(at_base_type,value_ptr);
543 if (u == 0 && ar_index== 0)
544 ret_value = tmp_value;
546 ret_value = ret_value +
' '+ tmp_value;
549 H5Tclose(at_base_type);
555 char *value_ptr = value.data() + memb_offset;
556 string tmp_value = get_compound_base_fill_value_as_string(memb_id,value_ptr);
558 ret_value = tmp_value;
560 ret_value = ret_value +
' '+ tmp_value;
570unsigned short is_supported_compound_type(hid_t h5_type) {
572 unsigned short ret_value = 1;
573 bool has_string_memb_type =
false;
575 if ((memtype = H5Tget_native_type(h5_type, H5T_DIR_ASCEND)) < 0) {
576 throw InternalErr(__FILE__, __LINE__,
"Fail to obtain memory datatype.");
580 H5T_class_t memb_cls = H5T_NO_CLASS;
582 char *memb_name =
nullptr;
584 if ((nmembs = H5Tget_nmembers(memtype)) < 0) {
585 throw InternalErr(__FILE__, __LINE__,
"Fail to obtain number of HDF5 compound datatype.");
588 for (
unsigned int u = 0; u < (unsigned) nmembs; u++) {
590 if ((memb_id = H5Tget_member_type(memtype, u)) < 0)
591 throw InternalErr(__FILE__, __LINE__,
592 "Fail to obtain the datatype of an HDF5 compound datatype member.");
595 memb_cls = H5Tget_member_class(memtype, u);
598 memb_name = H5Tget_member_name(memtype, u);
599 if (memb_name ==
nullptr)
600 throw InternalErr(__FILE__, __LINE__,
"Fail to obtain the name of an HDF5 compound datatype member.");
602 if (memb_cls == H5T_COMPOUND)
604 else if (memb_cls == H5T_ARRAY) {
606 hid_t at_base_type = H5Tget_super(memb_id);
607 H5T_class_t array_cls = H5Tget_class(at_base_type);
608 if (array_cls != H5T_INTEGER && array_cls != H5T_FLOAT && array_cls != H5T_STRING)
610 else if (array_cls == H5T_STRING && has_string_memb_type ==
false)
611 has_string_memb_type =
true;
612 H5Tclose(at_base_type);
615 }
else if (memb_cls != H5T_INTEGER && memb_cls != H5T_FLOAT) {
616 if (memb_cls == H5T_STRING) {
617 if (has_string_memb_type ==
false)
618 has_string_memb_type =
true;
631 if (has_string_memb_type)
639get_compound_fv_as_string(hid_t dtype_id, hid_t h5_plist_id, vector<char> &value)
641 H5D_fill_value_t fill_value_status;
642 if (H5Pfill_value_defined(h5_plist_id, &fill_value_status)<0) {
643 H5Pclose(h5_plist_id);
644 throw BESInternalError(
"H5Pfill_value_defined failed.", __FILE__, __LINE__);
648 string H5_Default_fvalue =
"0";
649 if (fill_value_status == H5D_FILL_VALUE_DEFAULT)
650 ret_str = H5_Default_fvalue;
651 else if (fill_value_status == H5D_FILL_VALUE_USER_DEFINED) {
654 if (is_supported_compound_type(dtype_id) == 2) {
656 string msg(prolog +
"UnsupportedTypeException: Your data granule contains an H5T_COMPOUND as user-defined fillValue type"
657 "and one member of H5T_COMPOUND is a string. "
658 "This is not yet supported by the dmr++ creation machinery. ");
659 string str_fv(value.begin(),value.end());
660 throw UnsupportedTypeException(msg);
663 ret_str = obtain_compound_user_defined_fvalues(dtype_id, h5_plist_id, value);
665 else if (fill_value_status == H5D_FILL_VALUE_UNDEFINED) {
666 H5Pclose(h5_plist_id);
667 throw BESInternalError(
"The fill value is undefined, the dmrpp module cannot handle this case now.", __FILE__, __LINE__);
673bool is_supported_vlen_type(hid_t dataset_id, hid_t h5_type) {
675 bool ret_value =
false;
676 hid_t base_type = H5Tget_super(h5_type);
677 hid_t dspace = H5Dget_space(dataset_id);
678 if (H5S_SIMPLE == H5Sget_simple_extent_type(dspace) &&
679 (H5Tget_class(base_type) == H5T_INTEGER || H5Tget_class(base_type) == H5T_FLOAT))
693string get_hdf5_fill_value_str(hid_t dataset_id)
696 H5Eset_auto2(H5E_DEFAULT,
nullptr,
nullptr);
699 hid_t plist_id = create_h5plist(dataset_id);
701 throw BESInternalError(
"Unable to open HDF5 dataset id.", __FILE__, __LINE__);
704 hid_t dtype_id = H5Dget_type(dataset_id);
706 throw BESInternalError(
"Unable to get HDF5 dataset type id.", __FILE__, __LINE__);
708 vector<char> value(H5Tget_size(dtype_id), 0);
709 if (H5Pget_fill_value(plist_id, dtype_id, value.data()) < 0)
710 throw BESInternalError(
"Unable to access HDF5 Fill Value.", __FILE__, __LINE__);
714 if (H5Tget_class(dtype_id) == H5T_COMPOUND)
715 fvalue_str = get_compound_fv_as_string(dtype_id,plist_id,value);
717 fvalue_str = get_value_as_string(dtype_id, value);
736string_pad_type convert_h5_str_pad_type(
const H5T_str_t str_pad){
737 string_pad_type pad_type;
739 case H5T_STR_SPACEPAD:
740 pad_type = dmrpp::space_pad;
743 case H5T_STR_NULLTERM:
744 pad_type = dmrpp::null_term;
747 case H5T_STR_NULLPAD:
748 pad_type = dmrpp::null_pad;
754 msg <<
"ERROR: Received unrecognized value for H5T_str_t: " << str_pad << endl;
755 throw BESInternalError(msg.str(),__FILE__,__LINE__);
768string_pad_type get_pad_type(
const hid_t dataset) {
769 hid_t h5_type = H5Dget_type(dataset);
772 msg <<
"ERROR: H5Dget_type() failed. returned: " << h5_type;
773 throw BESInternalError(msg.str(),__FILE__, __LINE__);
775 H5T_str_t str_pad = H5Tget_strpad(h5_type);
778 msg <<
"ERROR: H5Tget_strpad() failed. returned: " << str_pad;
779 throw BESInternalError(msg.str(),__FILE__, __LINE__);
781 return convert_h5_str_pad_type(str_pad);
792void add_fixed_length_string_array_state(
const hid_t dataset_id, DmrppArray *array_var){
794 hid_t h5_type = H5Dget_type(dataset_id);
795 if (H5Tis_variable_str(h5_type) > 0 ){
796 cout <<
"# The dataset '" << array_var->name() <<
"' is a variable length string array, skipping..." << endl;
800 VERBOSE( cerr << prolog <<
"Processing the array dariable: " << array_var->name() << endl);
801 auto data_type = array_var->var()->type();
803 if(data_type == libdap::dods_str_c){
804 VERBOSE( cerr << prolog <<
"The array template variable has type libdap::dods_str_c" << endl);
808 auto pad_type = get_pad_type(dataset_id);
809 VERBOSE( cerr << prolog <<
"pad_type: " << pad_type << endl);
810 array_var->set_fixed_length_string_pad(pad_type);
812 auto type_size = H5Tget_size(h5_type);
813 VERBOSE( cerr << prolog <<
"type_size: " << type_size << endl);
814 array_var->set_fixed_string_length(type_size);
826static void add_string_array_info(
const hid_t dataset, BaseType *btp){
828 Type dap_type = btp->type();
829 if(dap_type != dods_array_c){
831 VERBOSE( cerr << prolog <<
"Variable " << btp->name() <<
" is not a DAP Array. Skipping..." << endl);
834 auto dap_array = toDA(btp);
835 if (dap_array->var()->type() != dods_str_c) {
837 VERBOSE( cerr << prolog <<
"Variable " << dap_array->name() <<
" is an Array of " << dap_array->var()->type_name() <<
" not String. Skipping..." << endl);
841 auto h5_dataset_type = H5Dget_type(dataset);
842 if(h5_dataset_type == H5I_INVALID_HID){
843 throw BESInternalError(
"ERROR: H5Dget_type() failed for variable '" + dap_array->name() +
"'",
847 auto h5_type_class = H5Tget_class(h5_dataset_type);
848 if(h5_type_class != H5T_STRING){
849 VERBOSE( cerr << prolog <<
"H5Dataset " << dap_array->name() <<
" is not a String type (type: " << h5_type_class <<
"). Skipping..." << endl);
853 hid_t dspace = H5Dget_space(dataset);
854 if (H5S_SCALAR == H5Sget_simple_extent_type(dspace)){
855 VERBOSE( cerr << prolog <<
"H5Dataset " << dap_array->name() <<
" is a scalar type. Skipping..." << endl);
859 if (H5Tis_variable_str(h5_dataset_type) > 0) {
860 VERBOSE( cerr << prolog <<
"Found variable length string array: " << dap_array->name() << endl);
861 dap_array->set_is_vlsa(
true);
864 VERBOSE( cerr << prolog <<
"Found fixed length string array: " << dap_array->name() << endl);
865 add_fixed_length_string_array_state( dataset, dap_array);
874string byte_order_str(hid_t dataset){
875 string byte_order_string;
876 hid_t dtypeid = H5Dget_type(dataset);
877 auto b_order = H5Tget_order(dtypeid);
880 byte_order_string =
"LE";
883 byte_order_string =
"BE";
889 ostringstream oss(
"Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
890 oss << b_order <<
".";
891 throw BESInternalError(oss.str(), __FILE__, __LINE__);
893 return byte_order_string;
896void obtain_structure_offset(hid_t dataset, vector<unsigned int>& struct_offsets) {
898 hid_t dtypeid = H5Dget_type(dataset);
901 size_t memb_offset = 0;
903 int nmembs = H5Tget_nmembers(dtypeid);
906 throw BESInternalError(
"Cannot get the number of base datatypes in a compound datatype.", __FILE__, __LINE__);
909 for (
unsigned int u = 0; u < (unsigned) nmembs; u++) {
911 if ((memb_id = H5Tget_member_type(dtypeid, u)) < 0) {
913 throw BESInternalError(
"Cannot get the number of base datatypes in a compound datatype.", __FILE__, __LINE__);
917 memb_offset = H5Tget_member_offset(dtypeid, u);
919 struct_offsets.push_back(memb_offset);
926 size_t type_size = H5Tget_size(dtypeid);
927 if (type_size == 0) {
929 throw BESInternalError(
"Cannot get the correct data type size.", __FILE__, __LINE__);
931 struct_offsets.push_back(type_size);
943void process_contiguous_layout_dariable(hid_t dataset, BaseType *btp){
944 VERBOSE(cerr << prolog <<
" Storage: contiguous" << endl);
946 haddr_t cont_addr = H5Dget_offset(dataset);
947 hsize_t cont_size = H5Dget_storage_size(dataset);
948 string byte_order = byte_order_str(dataset);
950 VERBOSE(cerr << prolog <<
" Addr: " << cont_addr << endl);
951 VERBOSE(cerr << prolog <<
" Size: " << cont_size << endl);
952 VERBOSE(cerr << prolog <<
"byteOrder: " << byte_order << endl);
956 VERBOSE(cerr << prolog <<
" Before add_chunk: " <<btp->name() << endl);
957 dc->
add_chunk(byte_order, cont_size, cont_addr,
"");
966void process_chunked_layout_dariable(hid_t dataset, BaseType *btp,
bool disable_dio) {
968 DmrppCommon *dc = toDC(btp);
969 hid_t fspace_id = H5Dget_space(dataset);
970 int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
971 string byte_order = byte_order_str(dataset);
973 hsize_t num_chunks = 0;
974 herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
976 throw BESInternalError(
"Could not get the number of chunks for variable "+ btp->name(), __FILE__, __LINE__);
979 VERBOSE(cerr << prolog <<
"Storage: chunked." << endl);
980 VERBOSE(cerr << prolog <<
"Number of chunks is: " << num_chunks << endl);
982 set_filter_information(dataset, dc, disable_dio);
985 vector<hsize_t> chunk_dims(dataset_rank, 0);
987 unsigned int chunk_rank = 0;
988 hid_t plist_id = create_h5plist(dataset);
990 chunk_rank = H5Pget_chunk(plist_id, dataset_rank, chunk_dims.data());
998 if (chunk_rank != dataset_rank)
999 throw BESNotFoundError(
1000 "Found a chunk with rank different than the dataset's (aka variables') rank", __FILE__,
1005 for (
unsigned int i = 0; i < num_chunks; ++i) {
1006 vector<hsize_t> chunk_coords(dataset_rank, 0);
1010 unsigned filter_mask = 0;
1012 status = H5Dget_chunk_info(dataset, fspace_id, i, chunk_coords.data(),
1013 &filter_mask, &addr, &size);
1015 VERBOSE(cerr <<
"ERROR" << endl);
1016 throw BESInternalError(
"Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
1019 VERBOSE(cerr << prolog <<
"chk_idk: " << i <<
", addr: " << addr <<
", size: " << size << endl);
1020 dc->
add_chunk(byte_order, size, addr, filter_mask, chunk_coords);
1024H5D_layout_t get_h5_storage_layout(hid_t dataset){
1025 H5D_layout_t layout_type;
1026 hid_t plist_id = create_h5plist(dataset);
1028 layout_type = H5Pget_layout(plist_id);
1037void process_compact_layout_scalar(hid_t dataset, BaseType *btp)
1042 VERBOSE(cerr << prolog <<
"Processing scalar dariable. Storage: compact" << endl);
1044 hid_t dtypeid = H5Dget_type(dataset);
1045 VERBOSE(cerr << prolog <<
" H5Dget_type(): " << dtypeid << endl);
1047 auto type_size = H5Tget_size(dtypeid);
1048 VERBOSE(cerr << prolog <<
" H5Tget_size(): " << type_size <<
" (The size of the datatype in bytes)" << endl);
1050 size_t compact_storage_size = H5Dget_storage_size(dataset);
1051 VERBOSE(cerr << prolog <<
" H5Dget_storage_size(): " << compact_storage_size <<
" (The amount of storage space, in bytes, or 0.)" << endl);
1052 if (compact_storage_size == 0) {
1053 throw BESInternalError(
"Cannot obtain the compact storage size.", __FILE__, __LINE__);
1056 Type dap_type = btp->type();
1057 unsigned long long memRequired = 0;
1058 if (dap_type == dods_str_c)
1061 memRequired = type_size;
1063 memRequired = btp->length() * type_size;
1067 if (H5Tis_variable_str(dtypeid) == 0) {
1068 if (compact_storage_size != memRequired)
1069 throw BESInternalError(
"Compact storage size does not match D4Array or scalar.", __FILE__,
1083 case dods_float32_c:
1084 case dods_float64_c:
1088 vector<uint8_t> values(memRequired, 0);
1089 get_data(dataset,
reinterpret_cast<void *
>(values.data()));
1090 btp->set_read_p(
true);
1091 btp->val2buf(
reinterpret_cast<void *
>(values.data()));
1098 auto str =
dynamic_cast<libdap::Str *
>(btp);
1099 if (H5Tis_variable_str(dtypeid) > 0) {
1100 vector<string> finstrval;
1102 finstrval.emplace_back(
"");
1103 read_vlen_string(dataset, 1,
nullptr,
nullptr,
nullptr, finstrval);
1104 string vlstr = finstrval[0];
1105 str->set_value(vlstr);
1106 str->set_read_p(
true);
1110 vector<uint8_t> values(memRequired, 0);
1111 get_data(dataset,
reinterpret_cast<void *
>(values.data()));
1112 string fstr(values.begin(), values.end());
1113 str->set_value(fstr);
1114 str->set_read_p(
true);
1120 throw BESInternalError(
"Unsupported compact storage variable type.", __FILE__, __LINE__);
1125void process_compact_flsa(hid_t dataset, BaseType *btp){
1127 add_string_array_info(dataset, btp);
1129 auto pad_type = get_pad_type(dataset);
1130 VERBOSE( cerr << prolog <<
"pad_type: " << pad_type << endl);
1132 auto h5_type = H5Dget_type(dataset);
1133 VERBOSE( cerr << prolog <<
"H5Dget_type(): " << h5_type << endl);
1137 auto fls_length = H5Tget_size(h5_type);
1138 VERBOSE( cerr << prolog <<
"fls_length: " << fls_length << endl);
1140 auto memRequired = btp->length_ll() * fls_length;
1142 auto array = toDA(btp);
1143 auto &string_buf = array->compact_str_buffer();
1144 string_buf.resize(memRequired);
1145 get_data(dataset,
reinterpret_cast<void *
>(string_buf.data()));
1146 array->set_read_p(
true);
1149void process_compact_layout_array(hid_t dataset, BaseType *btp) {
1151 VERBOSE(cerr << prolog <<
"BEGIN (" << btp->type_name() <<
" " << btp->name() <<
")" << endl);
1153 hid_t dtypeid = H5Dget_type(dataset);
1154 VERBOSE(cerr << prolog <<
" H5Dget_type(): " << dtypeid << endl);
1156 auto type_size = H5Tget_size(dtypeid);
1157 VERBOSE(cerr << prolog <<
" H5Tget_size(): " << type_size <<
" (The size of the datatype in bytes)" << endl);
1159 size_t compact_storage_size = H5Dget_storage_size(dataset);
1160 VERBOSE(cerr << prolog <<
" H5Dget_storage_size(): " << compact_storage_size <<
" (The amount of storage space, in bytes, or 0.)" << endl);
1161 if (compact_storage_size == 0) {
1162 throw BESInternalError(
"Cannot obtain the compact storage size.", __FILE__, __LINE__);
1165 Type dap_type = btp->type();
1166 unsigned long long memRequired = 0;
1167 if (dap_type == dods_str_c)
1170 memRequired = type_size;
1172 memRequired = btp->length() * type_size;
1176 if (H5Tis_variable_str(dtypeid) == 0) {
1177 if (compact_storage_size != memRequired)
1178 throw BESInternalError(
"Compact storage size does not match D4Array or scalar.", __FILE__,
1182 auto array = toDA(btp);
1183 switch (array->var()->type()) {
1192 case dods_float32_c:
1193 case dods_float64_c:
1197 vector<uint8_t> values(memRequired, 0);
1198 get_data(dataset,
reinterpret_cast<void *
>(values.data()));
1199 array->set_read_p(
true);
1200 array->val2buf(
reinterpret_cast<void *
>(values.data()));
1207 if (H5Tis_variable_str(dtypeid) > 0) {
1209 vector<string> finstrval;
1211 finstrval.emplace_back(
"");
1212 read_vlen_string(dataset, 1,
nullptr,
nullptr,
nullptr, finstrval);
1213 array->set_value(finstrval, (
int) finstrval.size());
1214 array->set_read_p(
true);
1218 process_compact_flsa(dataset, btp);
1224 throw BESInternalError(
"Unsupported compact storage variable type.", __FILE__, __LINE__);
1234void process_compact_layout_dariable(hid_t dataset, BaseType *btp){
1236 VERBOSE(cerr << prolog <<
"Processing Compact Storage Layout Dariable" << endl);
1240 auto dc = toDC(btp);
1242 Type dap_type = btp->type();
1243 if ( dap_type == dods_structure_c
1244 || dap_type == dods_sequence_c
1245 || dap_type == dods_grid_c) {
1247 msg <<
"The variable " << btp->FQN() <<
" is an instance of " << btp->type_name() <<
", and utilizes ";
1248 msg <<
"the hdf5 compact storage layout (H5D_COMPACT). ";
1249 msg <<
"Only arrays of string and numeric data types are supported for the compact storage layout.";
1250 throw BESInternalError(msg.str(), __FILE__, __LINE__);
1253 auto layout_type = get_h5_storage_layout(dataset);
1254 if (layout_type != H5D_COMPACT)
1255 throw BESInternalError(
string(
"ERROR: The dataset is not stored with compact layout."), __FILE__, __LINE__);
1263 if (dap_type == dods_array_c) {
1264 process_compact_layout_array(dataset, btp);
1267 process_compact_layout_scalar(dataset, btp);
1277void set_fill_value(hid_t dataset, BaseType *btp){
1278 short fill_value_defined = is_hdf5_fill_value_defined(dataset);
1279 if (fill_value_defined >0) {
1280 string fill_value = get_hdf5_fill_value_str(dataset);
1281 auto dc = toDC(btp);
1288bool obtain_structure_string_value(hid_t memtype,
size_t ty_size, hssize_t num_elms, vector<char>& encoded_struct_value,
const vector<char>& struct_value,
string & err_msg) {
1290 bool ret_value =
true;
1291 size_t values_offset = 0;
1294 for (int64_t element = 0; element < num_elms; ++element) {
1297 size_t struct_elem_offset = ty_size*element;
1299 if ((nmembs = H5Tget_nmembers(memtype)) < 0) {
1300 err_msg =
"Fail to obtain number of HDF5 compound datatype.";
1308 for (
unsigned int u = 0; u < (unsigned)nmembs; u++) {
1311 H5T_class_t memb_cls = H5T_NO_CLASS;
1312 size_t memb_offset = 0;
1315 if((memb_id = H5Tget_member_type(memtype, u)) < 0) {
1316 err_msg =
"Fail to obtain the datatype of an HDF5 compound datatype member.";
1322 if((memb_cls = H5Tget_member_class (memtype, u)) < 0) {
1324 err_msg =
"Fail to obtain the datatype class of an HDF5 compound datatype member.";
1329 size_t memb_size = H5Tget_size(memb_id);
1334 memb_offset= H5Tget_member_offset(memtype,u);
1337 values_offset = struct_elem_offset + memb_offset;
1338 if (memb_cls == H5T_ARRAY) {
1340 hid_t at_base_type = H5Tget_super(memb_id);
1341 size_t at_base_type_size = H5Tget_size(at_base_type);
1342 H5T_class_t array_cls = H5Tget_class(at_base_type);
1348 int at_ndims = H5Tget_array_ndims(memb_id);
1349 if (at_ndims <= 0) {
1350 H5Tclose(at_base_type);
1352 err_msg =
"Fail to obtain number of dimensions of the array datatype.";
1357 vector<hsize_t>at_dims_h(at_ndims,0);
1360 if (H5Tget_array_dims(memb_id,at_dims_h.data())<0) {
1361 H5Tclose(at_base_type);
1363 err_msg =
"Fail to obtain each imension size of the array datatype.";
1368 vector<hsize_t>at_dims_offset(at_ndims,0);
1369 size_t total_array_nums = 1;
1370 for (
const auto & ad:at_dims_h)
1371 total_array_nums *=ad;
1373 if (array_cls == H5T_STRING) {
1375 vector<string> str_val;
1376 str_val.resize(total_array_nums);
1378 if (H5Tis_variable_str(at_base_type) >0){
1379 auto src = (
void*)(struct_value.data()+values_offset);
1380 auto temp_bp =(
char*)src;
1381 for (int64_t i = 0;i <total_array_nums; i++){
1383 get_vlen_str_data(temp_bp,tempstrval);
1384 str_val[i] = tempstrval;
1385 temp_bp += at_base_type_size;
1389 auto src = (
void*)(struct_value.data()+values_offset);
1390 vector<char> fix_str_val;
1391 fix_str_val.resize(total_array_nums*at_base_type_size);
1392 memcpy((
void*)fix_str_val.data(),src,total_array_nums*at_base_type_size);
1393 string total_in_one_string(fix_str_val.begin(),fix_str_val.end());
1394 for (int64_t i = 0; i<total_array_nums;i++)
1395 str_val[i] = total_in_one_string.substr(i*at_base_type_size,at_base_type_size);
1397 vector<string> encoded_str_val;
1398 encoded_str_val.resize(str_val.size());
1401 for (
int i = 0; i < str_val.size(); i++) {
1402 string temp_str = str_val[i];
1403 vector<u_int8_t>temp_val(temp_str.begin(),temp_str.end());
1404 encoded_str_val[i] = base64::Base64::encode(temp_val.data(), temp_str.size()) +
";";
1408 for (
const auto &es_val:encoded_str_val) {
1409 string temp_str = es_val;
1410 for(
const auto &ts:temp_str)
1411 encoded_struct_value.push_back(ts);
1416 vector<char> int_float_array;
1417 int_float_array.resize(total_array_nums*at_base_type_size);
1418 memcpy((
void*)int_float_array.data(),struct_value.data()+values_offset,total_array_nums*at_base_type_size);
1419 for (
const auto &int_float:int_float_array)
1420 encoded_struct_value.push_back(int_float);
1422 H5Tclose(at_base_type);
1425 else if (memb_cls == H5T_STRING) {
1429 if (H5Tis_variable_str(memb_id) >0){
1430 auto src = (
void*)(struct_value.data()+values_offset);
1431 auto temp_bp =(
char*)src;
1433 get_vlen_str_data(temp_bp,tempstrval);
1434 vector<u_int8_t>temp_val(tempstrval.begin(),tempstrval.end());
1435 encoded_str = base64::Base64::encode(temp_val.data(), tempstrval.size()) +
";";
1439 auto src = (
void*)(struct_value.data()+values_offset);
1440 vector<char> fix_str_val;
1441 fix_str_val.resize(memb_size);
1442 memcpy((
void*)fix_str_val.data(),src,memb_size);
1443 string fix_str_value(fix_str_val.begin(),fix_str_val.end());
1444 vector<u_int8_t>temp_val(fix_str_value.begin(),fix_str_value.end());
1445 encoded_str = base64::Base64::encode(temp_val.data(), fix_str_value.size()) +
";";
1447 for (
const auto &es:encoded_str)
1448 encoded_struct_value.push_back(es);
1452 vector<char> int_float;
1453 int_float.resize(memb_size);
1454 memcpy((
void*)int_float.data(),struct_value.data()+values_offset,memb_size);
1455 int_float.resize(memb_size);
1456 memcpy((
void*)int_float.data(),struct_value.data()+values_offset,memb_size);
1457 for (
const auto &int_f:int_float)
1458 encoded_struct_value.push_back(int_f);
1463 if (ret_value ==
false)
1471void process_string_in_structure(hid_t dataset, hid_t type_id, BaseType *btp) {
1474 size_t ty_size = -1;
1476 bool is_scalar =
false;
1478 if ((memtype = H5Tget_native_type(type_id, H5T_DIR_ASCEND))<0)
1479 throw InternalErr (__FILE__, __LINE__,
"Fail to obtain memory datatype.");
1481 ty_size = H5Tget_size(memtype);
1484 if ((dspace = H5Dget_space(dataset))<0) {
1486 throw InternalErr (__FILE__, __LINE__,
"Cannot obtain data space.");
1489 hssize_t num_elms = H5Sget_simple_extent_npoints(dspace);
1493 throw InternalErr (__FILE__, __LINE__,
"Cannot obtain the number of elements of the data space.");
1496 vector<char> struct_value;
1497 struct_value.resize(num_elms*ty_size);
1498 if (H5Dread(dataset,memtype, H5S_ALL,H5S_ALL,H5P_DEFAULT,(
void*)struct_value.data())<0) {
1501 throw InternalErr (__FILE__, __LINE__,
"Cannot read the dataset.");
1504 if (H5S_SCALAR == H5Sget_simple_extent_type(dspace))
1509 bool ret_value =
false;
1512 auto ds =
dynamic_cast<DmrppStructure *
>(btp);
1513 vector<char> & ds_buffer = ds->get_structure_str_buffer();
1514 ret_value = obtain_structure_string_value(memtype,ty_size,num_elms,ds_buffer,struct_value,err_msg);
1515 ds->set_special_structure_flag(
true);
1516 ds->set_read_p(
true);
1519 auto da =
dynamic_cast<DmrppArray *
>(btp);
1520 vector<char> &da_buffer = da->get_structure_array_str_buffer();
1521 ret_value = obtain_structure_string_value(memtype,ty_size,num_elms,da_buffer,struct_value,err_msg);
1522 da->set_special_structure_flag(
true);
1523 da->set_read_p(
true);
1527 if (ret_value ==
false)
1528 throw InternalErr (__FILE__, __LINE__, err_msg);
1533bool handle_vlen_float_int_internal(hid_t dset_id, BaseType *btp) {
1535 hid_t vlen_type = H5Dget_type(dset_id);
1536 hid_t vlen_basetype = H5Tget_super(vlen_type);
1537 if (H5Tget_class(vlen_basetype) != H5T_INTEGER && H5Tget_class(vlen_basetype) != H5T_FLOAT) {
1539 throw InternalErr(__FILE__, __LINE__,
"Only support float or intger variable-length datatype.");
1542 hid_t vlen_base_memtype = H5Tget_native_type(vlen_basetype, H5T_DIR_ASCEND);
1543 hid_t vlen_memtype = H5Tvlen_create(vlen_base_memtype);
1546 hid_t vlen_space = H5Dget_space(dset_id);
1547 if (H5Sget_simple_extent_type(vlen_space) != H5S_SIMPLE) {
1549 throw InternalErr(__FILE__, __LINE__,
"Only support array of float or intger variable-length datatype.");
1552 hssize_t vlen_number_elements = H5Sget_simple_extent_npoints(vlen_space);
1553 vector<hvl_t> vlen_data(vlen_number_elements);
1554 if (H5Dread(dset_id, vlen_memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, vlen_data.data()) <0) {
1556 throw InternalErr(__FILE__, __LINE__,
"Cannot read variable-length datatype data.");
1559 auto da =
dynamic_cast<DmrppArray *
>(btp);
1561 string err_msg =
"Expected to find a DmrppArray instance but did not in handle_vlen_float_int_internal().";
1562 throw BESInternalError(err_msg, __FILE__, __LINE__);
1564 switch (da->var()->type()) {
1575 case dods_float32_c:
1576 case dods_float64_c: {
1578 libdap::Array::Dim_iter last_dim_iter = da->dim_end()-1;
1579 int64_t last_dim_size = da->dimension_size(last_dim_iter);
1580 size_t bytes_per_element = da->var()->width_ll();
1581 size_t total_data_buf_size = da->get_size(
false)*bytes_per_element;
1582 vector<char> data_buf(total_data_buf_size,0);
1583 char *temp_data_buf_ptr = data_buf.data();
1585 for (ssize_t i = 0; i < vlen_number_elements; i++) {
1587 size_t vlen_element_size = vlen_data[i].len * bytes_per_element;
1588 vector<char> temp_buf(vlen_element_size);
1591 memcpy(temp_data_buf_ptr,vlen_data[i].p,vlen_element_size);
1595 temp_data_buf_ptr += last_dim_size*bytes_per_element;
1598 da->val2buf(data_buf.data());
1599 da->set_missing_data(
true);
1600 da->set_read_p(
true);
1605 throw InternalErr(__FILE__, __LINE__,
"Vector::val2buf: bad type");
1608 H5Dvlen_reclaim(vlen_memtype, vlen_space, H5P_DEFAULT, (
void*)(vlen_data.data()));
1609 H5Sclose(vlen_space);
1610 H5Tclose(vlen_base_memtype);
1611 H5Tclose(vlen_basetype);
1612 H5Tclose(vlen_type);
1613 H5Tclose(vlen_memtype);
1619bool handle_vlen_float_int(hid_t dataset, BaseType *btp) {
1621 bool ret_value =
false;
1622 hid_t type_id = H5Dget_type(dataset);
1623 if (H5Tget_class(type_id) == H5T_VLEN)
1624 ret_value = handle_vlen_float_int_internal(dataset,btp);
1629void handle_vlen_float_int_index(hid_t file, BaseType *btp) {
1631 string vlen_index_name = btp->FQN();
1632 size_t vlen_name_pos = vlen_index_name.rfind(
"_vlen_index");
1633 if (vlen_name_pos == string::npos) {
1634 string err_msg = vlen_index_name +
" is not a variable length index variable name.";
1636 throw BESInternalError(err_msg, __FILE__, __LINE__);
1639 string vlen_name = vlen_index_name.substr(0,vlen_name_pos);
1641 H5Eset_auto2(H5E_DEFAULT,
nullptr,
nullptr);
1642 hid_t dset_id = H5Dopen2(file, vlen_name.c_str(), H5P_DEFAULT);
1644 throw BESInternalError(
"HDF5 vlen dataset '" + vlen_name +
"' cannot be opened.", __FILE__, __LINE__);
1646 hid_t vlen_type = H5Dget_type(dset_id);
1647 hid_t vlen_basetype = H5Tget_super(vlen_type);
1648 if (H5Tget_class(vlen_basetype) != H5T_INTEGER && H5Tget_class(vlen_basetype) != H5T_FLOAT) {
1650 throw InternalErr(__FILE__, __LINE__,
"Only support float or intger variable-length datatype.");
1653 hid_t vlen_base_memtype = H5Tget_native_type(vlen_basetype, H5T_DIR_ASCEND);
1654 hid_t vlen_memtype = H5Tvlen_create(vlen_base_memtype);
1657 hid_t vlen_space = H5Dget_space(dset_id);
1658 if (H5Sget_simple_extent_type(vlen_space) != H5S_SIMPLE) {
1660 throw InternalErr(__FILE__, __LINE__,
"Only support array of float or intger variable-length datatype.");
1663 hssize_t vlen_number_elements = H5Sget_simple_extent_npoints(vlen_space);
1664 vector<hvl_t> vlen_data(vlen_number_elements);
1665 if (H5Dread(dset_id, vlen_memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, vlen_data.data()) <0) {
1667 throw InternalErr(__FILE__, __LINE__,
"Cannot read variable-length datatype data.");
1670 auto da =
dynamic_cast<DmrppArray *
>(btp);
1673 string err_msg =
"Expected to find a DmrppArray instance but did not in handle_vlen_float_int_internal().";
1674 throw BESInternalError(err_msg, __FILE__, __LINE__);
1676 if (da->var()->type() != dods_int32_c) {
1678 string err_msg =
"vlen_index datatype must be 32-bit integer.";
1679 throw BESInternalError(err_msg, __FILE__, __LINE__);
1681 vector<int> vlen_index_data;
1682 for (ssize_t i = 0; i<vlen_number_elements; i++)
1683 vlen_index_data.push_back(vlen_data[i].len);
1684 da->set_value_ll(vlen_index_data.data(),vlen_number_elements);
1685 da->set_missing_data(
true);
1686 da->set_read_p(
true);
1688 H5Dvlen_reclaim(vlen_memtype, vlen_space, H5P_DEFAULT, (
void*)(vlen_data.data()));
1689 H5Sclose(vlen_space);
1690 H5Tclose(vlen_base_memtype);
1691 H5Tclose(vlen_basetype);
1692 H5Tclose(vlen_type);
1693 H5Tclose(vlen_memtype);
1707static void get_variable_chunk_info(hid_t dataset, BaseType *btp,
bool disable_dio) {
1710 string type_name = btp->type_name();
1711 if (btp->type() == dods_array_c) {
1712 auto array = toDA(btp);
1713 type_name = array->var()->type_name();
1715 cerr << prolog <<
"Processing dataset/variable: " << type_name <<
" " << btp->name() << endl;
1718 if (
true == handle_vlen_float_int(dataset,btp))
1722 set_fill_value(dataset, btp);
1725 hid_t type_id = H5Dget_type(dataset);
1727 string err_msg =
"Cannot obtain the HDF5 data type of the dataset: " + btp->name() ;
1728 throw BESInternalError(err_msg, __FILE__, __LINE__);
1730 if (H5T_COMPOUND == H5Tget_class(type_id)) {
1732 unsigned short supported_compound_type = is_supported_compound_type(type_id);
1733 if (supported_compound_type ==2) {
1735 process_string_in_structure(dataset,type_id, btp);
1739 else if (supported_compound_type ==1) {
1741 auto layout_type = get_h5_storage_layout(dataset);
1744 if (layout_type != H5D_COMPACT) {
1746 vector<unsigned int> struct_offsets;
1747 obtain_structure_offset(dataset,struct_offsets);
1748 VERBOSE(cerr << prolog <<
"struct_offsets[0]: " << struct_offsets[0]<< endl);
1750 auto dc = toDC(btp);
1751 dc->set_struct_offsets(struct_offsets);
1758 auto layout_type = get_h5_storage_layout(dataset);
1760 switch (layout_type) {
1761 case H5D_CONTIGUOUS: {
1762 process_contiguous_layout_dariable(dataset, btp);
1766 process_chunked_layout_dariable(dataset, btp, disable_dio);
1770 process_compact_layout_dariable(dataset,btp);
1774 ostringstream oss(
"Unsupported HDF5 dataset layout type: ", std::ios::ate);
1775 oss << layout_type <<
".";
1776 throw BESInternalError(oss.str(), __FILE__, __LINE__);
1786string get_type_decl(BaseType *btp){
1787 stringstream type_decl;
1788 if(btp->type() == libdap::dods_array_c){
1789 auto array = toDA(btp);
1790 type_decl << array->var()->type_name() <<
" " << btp->FQN();
1791 for(
auto dim_itr = array->dim_begin(); dim_itr!=array->dim_end(); dim_itr++){
1792 auto dim = *dim_itr;
1794 if(!dim.name.empty()){
1795 type_decl << dim.name <<
"=";
1797 type_decl << dim.size <<
"]";
1801 type_decl << btp->type_name() <<
" " << btp->FQN();
1803 return type_decl.str();
1807bool is_unsupported_type(hid_t dataset_id, BaseType *btp,
string &msg){
1808 VERBOSE(cerr << prolog <<
"BEGIN " << get_type_decl(btp) << endl);
1810 bool is_unsupported =
false;
1811 hid_t h5_type_id = H5Dget_type(dataset_id);
1812 H5T_class_t class_type = H5Tget_class(h5_type_id);
1814 bool isArray = btp->type() == dods_array_c;
1816 switch (class_type) {
1818 if (H5Tis_variable_str(h5_type_id) && isArray) {
1820 msgs <<
"UnsupportedTypeException: Your data contains the dataset/variable: ";
1821 msgs << get_type_decl(btp) <<
" ";
1822 msgs <<
"which the underlying HDF5/NetCDF-4 file has stored as a";
1823 msgs << (isArray?
"n array of ":
" ");
1824 msgs <<
"variable length string";
1825 msgs << (isArray?
"s (AVLS). ":
". ");
1826 msgs <<
"This data architecture is not currently supported by ";
1827 msgs <<
"the dmr++ creation machinery. One solution available to you is to rewrite the granule ";
1828 msgs <<
"so that these arrays are represented as arrays of fixed length strings (AFLS). While ";
1829 msgs <<
"these may not be as 'elegant' as AVLS, the ragged ends of the AFLS compress well, so ";
1830 msgs <<
"the storage penalty is minimal.";
1832 is_unsupported =
false;
1838 msgs <<
"UnsupportedTypeException: Your data contains the dataset/variable: ";
1839 msgs << get_type_decl(btp) <<
" ";
1840 msgs <<
"which the underlying HDF5/NetCDF-4 file has stored as an array of H5T_ARRAY. ";
1841 msgs <<
"This is not yet supported by the dmr++ creation machinery.";
1843 is_unsupported =
true;
1846 case H5T_COMPOUND: {
1847 unsigned short supported_compound_type = is_supported_compound_type(h5_type_id);
1848 if (supported_compound_type == 0) {
1850 msgs <<
"UnsupportedTypeException: Your data contains the dataset/variable: ";
1851 msgs << get_type_decl(btp) <<
" ";
1852 msgs <<
"which the underlying HDF5/NetCDF-4 file has stored as an HDF5 compound datatype and ";
1853 msgs <<
"the basetype of the compound datatype is not integer or float. ";
1854 msgs <<
"This is not yet supported by the dmr++ creation machinery.";
1856 is_unsupported =
true;
1862 bool supported_vlen_type = is_supported_vlen_type(dataset_id,h5_type_id);
1863 if (supported_vlen_type ==
false) {
1865 msgs <<
"UnsupportedTypeException: Your data contains the dataset/variable: ";
1866 msgs << get_type_decl(btp) <<
" ";
1867 msgs <<
"which the underlying HDF5/NetCDF-4 file has stored as an HDF5 vlen datatype and ";
1868 msgs <<
"the basetype of the vlen datatype is not integer or float. ";
1869 msgs <<
"This is not yet supported by the dmr++ creation machinery.";
1871 is_unsupported =
true;
1881 VERBOSE(cerr << prolog <<
"END is_unsupported: " << (is_unsupported?
"true":
"false") << endl);
1882 return is_unsupported;
1893bool process_variable_length_string_scalar(
const hid_t dataset, BaseType *btp){
1897 if(btp->type() != dods_str_c) {
1901 auto h5_type_id = H5Dget_type(dataset);
1902 if(H5Tis_variable_str(h5_type_id) <= 0) {
1906 VERBOSE(cerr << prolog <<
"Processing VLSS: " << btp->FQN() <<
"\n");
1908 vector<string> vls_values;
1909 vls_values.emplace_back(
"");
1912 read_vlen_string(dataset, 1,
nullptr,
nullptr,
nullptr, vls_values);
1913 string vlss = vls_values[0];
1914 VERBOSE(cerr << prolog <<
" read_vlen_string(): " << vlss << endl);
1918 auto dc = toDC(btp);
1922 auto str =
dynamic_cast<libdap::Str *
>(btp);
1923 str->set_value(vlss);
1924 str->set_read_p(
true);
1938bool process_variable_length_string_array(
const hid_t dataset, BaseType *btp){
1940 if(btp->type() != dods_array_c) {
1943 auto dap_array = toDA(btp);
1945 throw BESInternalError(
"Malformed DAP object " + btp->FQN() +
1946 " Identifies as dods_array_c but cast to DmrppArray fails!", __FILE__, __LINE__);
1949 if(dap_array->prototype()->type() != dods_str_c){
1953 hid_t h5_type_id = H5Dget_type(dataset);
1954 if(H5Tis_variable_str(h5_type_id) <= 0) {
1957 VERBOSE(cerr << prolog <<
"h5_type_id: " << h5_type_id <<
"\n");
1959 dap_array->set_is_vlsa(
true);
1960 VERBOSE(cerr << prolog <<
"Processing VLSA: " << dap_array->FQN() <<
"\n");
1962 auto dspace = H5Dget_space(dataset);
1964 int ndims = H5Sget_simple_extent_ndims(dspace);
1965 VERBOSE(cerr << prolog <<
"ndims: " << ndims <<
"\n");
1967 vector<hsize_t>count(ndims,0);
1968 if(H5Sget_simple_extent_dims(dspace, count.data(),
nullptr) < 0){
1970 H5Tclose(h5_type_id);
1972 throw BESInternalError(
"Failed to get hdf5 count for variable: " + btp->FQN(), __FILE__, __LINE__);
1977 for(
int i=0; i<ndims; i++) {
1981 VERBOSE(cerr << prolog << msg.str() <<
"\n");
1983 vector<hsize_t> offset(ndims,0);
1984 for(
int i=0; i<ndims; i++)
1985 offset.emplace_back(0);
1993 hssize_t num_elements = H5Sget_simple_extent_npoints(dspace);
1994 if (num_elements < 0) {
1996 H5Tclose(h5_type_id);
1998 throw BESInternalError(
"Failed to obtain the number of elements for the variable : " + btp->FQN(), __FILE__, __LINE__);
2001 VERBOSE(cerr << prolog <<
"num_elements: " << num_elements <<
"\n");
2003 vector<string> vls_values(num_elements,
"");
2004 read_vlen_string(dataset,
2012 VERBOSE(cerr << prolog <<
" vls_values.size(): " << vls_values.size() <<
"\n");
2014 for (
const auto &sval: vls_values) {
2015 VERBOSE(cerr << prolog <<
" vls_values[" << to_string(indx++) <<
"]: '" << sval <<
"'\n");
2019 dap_array->set_value(vls_values,(
int) vls_values.size());
2020 dap_array->set_read_p(
true);
2025bool check_enable_cf_fake_cv(BaseType *btp,
const string& FQN) {
2027 bool ret_value =
false;
2028 if (FQN.find_last_of(
'/')==0) {
2029 if (btp->type() == dods_array_c) {
2030 auto da =
dynamic_cast<DmrppArray *
>(btp);
2032 string err_msg =
"Expected to find a DmrppArray instance but did not in check_enable_cf_fake_cv().";
2033 throw BESInternalError(err_msg, __FILE__, __LINE__);
2036 if (btp->var()->type() == dods_float32_c && da->dimensions() == 1) {
2038 const D4Attributes *d4_attrs = btp->attributes();
2040 if (d4_attrs->empty()) {
2042 Array::Dim_iter da_dim = da->dim_begin();
2043 if (da->dimension_name(da_dim) == btp->name())
2060hid_t get_h5_dataset_id(hid_t file, BaseType *btp,
const unordered_set<string> &nc4_non_coord_candidate) {
2061 D4Attributes *d4_attrs = btp->attributes();
2063 throw BESInternalError(
"Expected to find an attribute table for " + btp->name() +
" but did not.",
2064 __FILE__, __LINE__);
2069 const D4Attribute *attr = d4_attrs->get(
"fullnamepath");
2080 if (attr->num_values() == 1)
2081 FQN = attr->value(0);
2085 VERBOSE(cerr << prolog <<
"Working on: " << FQN << endl);
2092 H5Eset_auto2(H5E_DEFAULT,
nullptr,
nullptr);
2093 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
2096 dataset = H5Dopen2(file,btp->name().c_str(),H5P_DEFAULT);
2098 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
2109 H5Eset_auto2(H5E_DEFAULT,
nullptr,
nullptr);
2110 string FQN = btp->FQN();
2111 if (nc4_non_coord_candidate.find(btp->name()) != nc4_non_coord_candidate.end()) {
2112 string real_name_candidate =
"_nc4_non_coord_" + btp->name();
2113 size_t fqn_last_fslash_pos = btp->FQN().find_last_of(
'/');
2114 string real_path_candidate = btp->FQN().substr(0, fqn_last_fslash_pos + 1) + real_name_candidate;
2115 dataset = H5Dopen2(file, real_path_candidate.c_str(), H5P_DEFAULT);
2124 VERBOSE(cerr << prolog <<
"Working on: " << FQN << endl);
2126 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
2128 VERBOSE(cerr << prolog <<
"WARNING: HDF5 dataset '" << FQN <<
"' cannot be opened." << endl);
2130 else if(check_enable_cf_fake_cv(btp, FQN) ==
true)
2150void mk_nc4_non_coord_candidates(D4Group *group, unordered_set<string> &nc4_non_coord_candidate){
2153 unordered_set<string> dimname_list;
2154 D4Dimensions *grp_dims = group->dims();
2157 for (
auto di = grp_dims->dim_begin(), de = grp_dims->dim_end(); di != de; ++di)
2158 dimname_list.insert((*di)->name());
2161 if (!dimname_list.empty()) {
2163 for (
auto btp = group->var_begin(), ve = group->var_end(); btp != ve; ++btp) {
2164 if (dimname_list.find((*btp)->name())!=dimname_list.end())
2165 nc4_non_coord_candidate.insert((*btp)->name());
2179void get_chunks_for_all_variables(hid_t file, D4Group *group,
bool disable_dio) {
2181 unordered_set<string> nc4_non_coord_candidate;
2182 mk_nc4_non_coord_candidates(group,nc4_non_coord_candidate);
2186 for(
auto btp : group->variables()) {
2187 VERBOSE(cerr << prolog <<
"-------------------------------------------------------" << endl);
2188 VERBOSE(cerr << prolog);
2189 VERBOSE(btp->print_decl(cerr,
"",
false,
false,
false) );
2190 VERBOSE(cerr << endl);
2192 auto dataset = get_h5_dataset_id(file, btp, nc4_non_coord_candidate);
2200 if (is_unsupported_type(dataset, btp, msg)) {
2201 throw UnsupportedTypeException(msg);
2204 if (!process_variable_length_string_scalar(dataset, btp) && !process_variable_length_string_array(dataset,btp)) {
2206 VERBOSE(cerr << prolog <<
"Building chunks for: " << get_type_decl(btp) << endl);
2207 get_variable_chunk_info(dataset, btp, disable_dio);
2209 VERBOSE(cerr << prolog <<
"Annotating String Arrays as needed for: " << get_type_decl(btp) << endl);
2210 add_string_array_info(dataset, btp);
2220 VERBOSE(cerr << prolog <<
"Unable to open " << btp->FQN()
2221 <<
" with the hdf5 api. Skipping chunk production. "
2222 <<
"Need to check if we need to embed the data to the dmrpp file." << endl);
2225 D4Attributes *d4_attrs = btp->attributes();
2226 if (d4_attrs==
nullptr)
2229 if (d4_attrs->empty() ==
false) {
2231 D4Attribute *attr = d4_attrs->find(
"units");
2233 string attr_value = attr->value(0);
2234 if (attr_value ==
"level") {
2235 auto dc =
dynamic_cast<DmrppCommon *
>(btp);
2237 string err_msg =
"Expected to find a DmrppCommon instance but did not in get_chunks_for_all_variables().";
2238 throw BESInternalError(err_msg, __FILE__, __LINE__);
2240 auto da =
dynamic_cast<DmrppArray *
>(btp);
2242 string err_msg =
"Expected to find a DmrppArray instance but did not in get_chunks_for_all_variables().";
2243 throw BESInternalError(err_msg, __FILE__, __LINE__);
2246 if (da->dimensions() ==1 && btp->var()->type() == dods_int32_c){
2248 vector<int> level_value;
2249 level_value.resize((
size_t)(da->length()));
2250 for (int32_t i = 0; i <da->length(); i++)
2253 da->set_value(level_value.data(),da->length());
2254 da->set_missing_data(
true);
2255 da->set_read_p(
true);
2259 attr = d4_attrs->find(
"orig_datatype");
2261 string attr_value = attr->value(0);
2262 if (attr_value ==
"VLEN_INDEX") {
2264 handle_vlen_float_int_index(file,btp);
2270 if (btp->type() == dods_array_c) {
2272 auto da =
dynamic_cast<DmrppArray *
>(btp);
2274 string err_msg =
"Expected to find a DmrppArray instance but did not in get_chunks_for_all_variables().";
2275 throw BESInternalError(err_msg, __FILE__, __LINE__);
2278 if (da->dimensions() ==1 && btp->var()->type() == dods_float32_c){
2280 da->set_missing_data(
true);
2282 vector<float> level_value;
2283 level_value.resize((
size_t)(da->length()));
2284 for (int32_t i = 0; i <da->length(); i++)
2287 da->set_value(level_value.data(),da->length());
2288 da->set_missing_data(
true);
2289 da->set_read_p(
true);
2298 for(
auto g:group->groups()) {
2299 get_chunks_for_all_variables(file, g,disable_dio);
2309void add_chunk_information(
const string &h5_file_name, DMRpp *dmrpp,
bool disable_dio)
2312 hid_t file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
2315 msg <<
"Error: HDF5 file '" << h5_file_name <<
"' cannot be opened." << endl;
2316 throw BESNotFoundError(msg.str(), __FILE__, __LINE__);
2321 get_chunks_for_all_variables(file, dmrpp->root(), disable_dio);
2346void qc_input_file(
const string &file_fqn)
2351 if (file_fqn.empty()) {
2353 msg <<
"HDF5 input file name must be provided (-f <input>) and be a fully qualified path name." << endl;
2354 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
2357 std::ifstream file(file_fqn, ios::binary);
2358 auto errnum = errno;
2362 msg <<
"Encountered a Read/writing error when attempting to open the file: " << file_fqn << endl;
2363 msg <<
"* strerror(errno): " << strerror(errnum) << endl;
2364 msg <<
"* failbit: " << (((file.rdstate() & std::ifstream::failbit) != 0) ?
"true" :
"false") << endl;
2365 msg <<
"* badbit: " << (((file.rdstate() & std::ifstream::badbit) != 0) ?
"true" :
"false") << endl;
2366 msg <<
"Things to check:" << endl;
2367 msg <<
"* Does the file exist at expected location?" << endl;
2368 msg <<
"* Does your user have permission to read the file?" << endl;
2369 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
2373 const char netcdf3Signature[] = {
'C',
'D',
'F'};
2377 signature.resize(8);
2378 file.read(&signature[0], signature.size());
2380 htri_t temp_is_hdf5 = H5Fis_hdf5(file_fqn.c_str());
2382 bool isHDF5 = (temp_is_hdf5>0)?
true:false;
2389 char newSignature[3];
2390 file.read(&signature[0], signature.size());
2392 bool isNetCDF3 = memcmp(newSignature, netcdf3Signature,
sizeof(netcdf3Signature)) == 0;
2395 msg <<
"The file submitted, " << file_fqn <<
", ";
2396 msg <<
"is a NetCDF-3 classic file and is not compatible with dmr++ production at this time." << endl;
2397 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
2401 msg <<
"The provided file: " << file_fqn <<
" - ";
2402 msg <<
"is neither an HDF5 or a NetCDF-4 file, currently only HDF5 and NetCDF-4 files ";
2403 msg <<
"are supported for dmr++ production" << endl;
2404 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
2417static string recreate_cmdln_from_args(
int argc,
char *argv[])
2420 for(
int i=0; i<argc; i++) {
2433std::string what_time_is_it(){
2435 auto now = std::chrono::system_clock::now();
2438 auto time_t_now = std::chrono::system_clock::to_time_t(now);
2442 const std::tm* gmt_time = gmtime_r(&time_t_now, &tbuf);
2445 std::stringstream ss;
2446 ss << std::put_time(gmt_time,
"%Y-%m-%dT%H:%M:%SZ");
2459void inject_build_dmrpp_metadata_worker( DMRpp *dmrpp,
const string &bes_conf_doc,
const string &invocation)
2461 dmrpp->set_version(CVER);
2464 auto version =
new D4Attribute(
"build_dmrpp_metadata", StringToD4AttributeType(
"container"));
2466 auto creation_date =
new D4Attribute(
"created", StringToD4AttributeType(
"string"));
2467 creation_date->add_value(what_time_is_it());
2468 version->attributes()->add_attribute_nocopy(creation_date);
2470 auto build_dmrpp_version =
new D4Attribute(
"build_dmrpp", StringToD4AttributeType(
"string"));
2471 build_dmrpp_version->add_value(CVER);
2472 version->attributes()->add_attribute_nocopy(build_dmrpp_version);
2474 auto bes_version =
new D4Attribute(
"bes", StringToD4AttributeType(
"string"));
2475 bes_version->add_value(CVER);
2476 version->attributes()->add_attribute_nocopy(bes_version);
2479 ldv << libdap_name() <<
"-" << libdap_version();
2480 auto libdap4_version =
new D4Attribute(
"libdap", StringToD4AttributeType(
"string"));
2481 libdap4_version->add_value(ldv.str());
2482 version->attributes()->add_attribute_nocopy(libdap4_version);
2484 if(!bes_conf_doc.empty()) {
2485 stringstream ss(bes_conf_doc);
2487 string new_bes_conf;
2490 while (getline(ss, line)) {
2492 if (line.find(
"BES.module.") == string::npos) {
2494 new_bes_conf += line +
"\n";
2499 auto config =
new D4Attribute(
"configuration", StringToD4AttributeType(
"string"));
2500 config->add_value(new_bes_conf);
2501 version->attributes()->add_attribute_nocopy(config);
2504 if(!invocation.empty()) {
2506 auto invoke =
new D4Attribute(
"invocation", StringToD4AttributeType(
"string"));
2507 invoke->add_value(invocation);
2508 version->attributes()->add_attribute_nocopy(invoke);
2511 dmrpp->root()->attributes()->add_attribute_nocopy(version);
2528 void inject_build_dmrpp_metadata(
int argc,
char **argv,
const string &bes_conf_file_used_to_create_dmr, DMRpp *dmrpp)
2530 string bes_configuration;
2532 if(!bes_conf_file_used_to_create_dmr.empty()) {
2538 invocation = recreate_cmdln_from_args(argc, argv);
2540 inject_build_dmrpp_metadata_worker(dmrpp, bes_configuration, invocation);
2554void inject_build_dmrpp_metadata(DMRpp *dmrpp)
2558 string bes_configuration;
2566 invocation = BESContextManager::TheManager()->
get_context(INVOCATION_CONTEXT, found);
2569 inject_build_dmrpp_metadata_worker(dmrpp, bes_configuration, invocation);
2585void build_dmrpp_from_dmr_file(
const string &dmrpp_href_value,
const string &dmr_filename,
const string &h5_file_fqn,
2586 bool add_production_metadata,
const string &bes_conf_file_used_to_create_dmr,
bool disable_dio,
int argc,
char *argv[])
2590 DmrppTypeFactory dtf;
2591 dmrpp.set_factory(&dtf);
2593 ifstream in(dmr_filename.c_str());
2594 D4ParserSax2 parser;
2595 parser.intern(in, &dmrpp,
false);
2597 add_chunk_information(h5_file_fqn, &dmrpp, disable_dio);
2599 if (add_production_metadata) {
2600 inject_build_dmrpp_metadata(argc, argv, bes_conf_file_used_to_create_dmr, &dmrpp);
2605 cout << writer.get_doc();
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
std::string get_as_config() const
static TheBESKeys * TheKeys()
Access to the singleton.
static std::string ConfigFile
virtual void print_dmrpp(libdap::XMLWriter &xml, const std::string &href="", bool constrained=false, bool print_chunks=true)
Print the DMR++ response.
void set_is_flsa(bool state)
Marks the array as a Fixed length string array, or not, depending on state.
virtual void set_fill_value_string(const std::string &fv)
Set the fill value (using a string)
void set_disable_dio(bool value)
Set the value of the compact property.
virtual void set_uses_fill_value(bool ufv)
Set the uses_fill_value property.
void set_chunk_dimension_sizes(const std::vector< unsigned long long > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Adds a chunk to the vector of chunk refs (byteStreams) and returns the size of the chunks internal ve...
void set_filter(const std::string &value)
Set the value of the filters property.
void set_compact(bool value)
Set the value of the compact property.
void get_data(hid_t dset, void *buf)