bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
build_dmrpp_util_h4.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the Hyrax data server.
4
5// Copyright (c) 2022 OPeNDAP, Inc.
6// Author: James Gallagher <jgallagher@opendap.org>
7// Copyright (c) The HDF Group
8// Author: Kent Yang <myang6@hdfgroup.org>
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include <iostream>
26#include <memory>
27#include <iterator>
28#include <unordered_set>
29#include <iomanip> // std::put_time()
30#include <ctime> // std::gmtime_r()
31
32#include "config.h"
33#include "hdf.h" // HDF4 header file
34#include "mfhdf.h" // Include the HDF4 header file
35#include "HdfEosDef.h"
36
37#include <libdap/Str.h>
38#include <libdap/Structure.h>
39#include <libdap/util.h>
40#include <libdap/D4Attributes.h>
41
42#include <BESNotFoundError.h>
43#include <BESInternalError.h>
44#include <BESInternalFatalError.h>
45
46#include <TheBESKeys.h>
47#include <BESContextManager.h>
48
49#include "DMRpp.h"
50#include "DmrppTypeFactory.h"
51#include "DmrppD4Group.h"
52#include "DmrppArray.h"
53#include "DmrppByte.h"
54#include "D4ParserSax2.h"
55
56#define COMP_INFO 512
57
58#define ERROR(x) do { cerr << "ERROR: " << x << " " << __FILE__ << ":" << __LINE__ << endl; } while(false)
59
60/*
61 * Hold mapping information for SDS objects.
62 */
63using SD_mapping_info_t = struct {
64 int32 nblocks;
65 int32 *offsets;
66 int32 *lengths;
67 int32 id;
68 int32 data_type;
69 intn is_empty;
70 VOIDP fill_value;
71};
72
73using namespace std;
74using namespace libdap;
75using namespace dmrpp;
76
77namespace build_dmrpp_util_h4 {
78
79bool verbose = false; // Optionally set by build_dmrpp's main().
80
81#define VERBOSE(x) do { if (verbose) (x); } while(false)
82#define prolog std::string("build_dmrpp_h4::").append(__func__).append("() - ")
83
84constexpr auto INVOCATION_CONTEXT = "invocation";
85
86// This function is adapted from H4mapper implemented by the HDF group.
87// h4mapper can be found from https://docs.hdfgroup.org/archive/support/projects/h4map/h4map_writer.html
88int SDfree_mapping_info(SD_mapping_info_t *map_info)
89{
90 intn ret_value = SUCCEED;
91
92 /* nothing to free */
93 if (map_info == nullptr)
94 return SUCCEED;
95
96 map_info->nblocks = 0;
97
98 if (map_info->offsets != nullptr) {
99 HDfree(map_info->offsets);
100 map_info->offsets = nullptr;
101 }
102
103 if (map_info->lengths) {
104 HDfree(map_info->lengths);
105 map_info->lengths = nullptr;
106 }
107
108 return ret_value;
109}
110
111void close_hdf4_file_ids(int32 sd_id, int32 file_id) {
112 Vend(file_id);
113 Hclose(file_id);
114 SDend(sd_id);
115}
116
117// Try to combine the adjacent linked blocks to one block. KY 2024-03-12
118size_t combine_linked_blocks(const SD_mapping_info_t &map_info, vector<int> & merged_lengths, vector<int> &merged_offsets) {
119
120 int num_eles = map_info.nblocks;
121
122#if 0
123 for (int i = 0; i<offset.size(); i++) {
124 cout<<"offset["<<i<<"]= "<<offset[i] <<endl;
125 cout<<"length["<<i<<"]= "<<length[i] <<endl;
126 }
127#endif
128
129 // The first element offset should always be fixed.
130 merged_offsets.push_back(map_info.offsets[0]);
131
132 int temp_length = map_info.lengths[0];
133
134 for (int i = 0; i <(num_eles-1); i++) {
135
136 // If not contiguous, push back the i's new length;
137 // push back the i+1's unchanged offset;
138 // save the i+1's length to the temp_length variable.
139 if (map_info.offsets[i+1] !=(map_info.offsets[i] + map_info.lengths[i])) {
140 merged_lengths.push_back(temp_length);
141 merged_offsets.push_back(map_info.offsets[i+1]);
142 temp_length = map_info.lengths[i+1];
143 }
144 else { // contiguous, just update the temp length variable.
145 temp_length +=map_info.lengths[i+1];
146 }
147
148 }
149
150 // Update the last length.
151 merged_lengths.push_back(temp_length);
152
153 return merged_lengths.size();
154
155}
156
157
165vector<unsigned long long>
166write_chunk_position_in_array(int rank, const unsigned long long *lengths, const int32_t *strides)
167{
168 vector<unsigned long long> chunk_pos;
169
170 for(int i = 0; i < rank; i++){
171 unsigned long long index = lengths[i] * (unsigned long long)strides[i];
172 chunk_pos.push_back(index);
173 }
174
175 return chunk_pos;
176}
177
191
192int read_chunk(int sdsid, SD_mapping_info_t *map_info, int *origin)
193{
194 intn ret_value = SUCCEED;
195
196#if 0
197 /* Free any info before resetting. */
198 SDfree_mapping_info(map_info);
199#endif
200 /* Reset map_info. */
201 /* HDmemset(map_info, 0, sizeof(SD_mapping_info_t)); */
202
203 /* Save SDS id since HDmemset reset it. map_info->id will be reused. */
204 /* map_info->id = sdsid; */
205
206 // First check if this chunk/data stream has any block of data.
207 intn info_count = SDgetdatainfo(sdsid, origin, 0, 0, nullptr, nullptr);
208 if (info_count == FAIL) {
209 ERROR("SDgetedatainfo() failed in read_chunk().\n");
210 return FAIL;
211 }
212
213 // If we find it has data, retrieve the offsets and length information for this chunk or data stream.
214 if (info_count > 0) {
215 map_info->nblocks = (int32) info_count;
216 map_info->offsets = (int32 *)HDmalloc(sizeof(int32)*map_info->nblocks);
217 if (map_info->offsets == nullptr) {
218 ERROR("HDmalloc() failed: Out of Memory");
219 return FAIL;
220 }
221 map_info->lengths = (int32 *)HDmalloc(sizeof(int32)*map_info->nblocks);
222 if (map_info->lengths == nullptr) {
223 HDfree(map_info->offsets);
224 ERROR("HDmalloc() failed: Out of Memory");
225 return FAIL;
226 }
227
228 ret_value = SDgetdatainfo(sdsid, origin, 0, info_count, map_info->offsets, map_info->lengths);
229 return ret_value;
230 }
231
232 return ret_value;
233
234} /* read_chunk */
235
236string get_sds_fill_value_str(int32 sdsid, int32 datatype) {
237
238 intn emptySDS = 0;
239
240 if (SDcheckempty(sdsid,&emptySDS) == FAIL) {
241 SDendaccess(sdsid);
242 throw BESInternalError("SDcheckempty fails. ",__FILE__,__LINE__);
243 }
244
245 string ret_value;
246 switch (datatype) {
247
248 case DFNT_UINT8:
249 case DFNT_UCHAR: {
250 uint8_t fvalue;
251 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
252 ret_value = to_string(fvalue);
253 // If the SDS is empty, we return the default netCDF fill value since SDS is following netCDF.
254 else if(emptySDS == 1)
255 ret_value = to_string(255);
256 }
257 break;
258 case DFNT_INT8:
259 case DFNT_CHAR: {
260 int8_t fvalue;
261 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
262 ret_value = to_string(fvalue);
263 else if(emptySDS == 1)
264 ret_value = to_string(FILL_BYTE);
265 }
266 break;
267
268 case DFNT_INT16: {
269 int16_t fvalue;
270 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
271 ret_value = to_string(fvalue);
272 else if(emptySDS == 1)
273 ret_value = to_string(FILL_SHORT);
274 }
275 break;
276
277 case DFNT_UINT16: {
278 uint16_t fvalue;
279 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
280 ret_value = to_string(fvalue);
281 else if(emptySDS == 1)
282 ret_value = to_string(65535);
283 }
284 break;
285
286 case DFNT_INT32: {
287 int32_t fvalue;
288 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
289 ret_value = to_string(fvalue);
290 else if(emptySDS == 1)
291 ret_value = to_string(FILL_LONG);
292 }
293 break;
294
295 case DFNT_UINT32: {
296 uint32_t fvalue;
297 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
298 ret_value = to_string(fvalue);
299 else if(emptySDS == 1)
300 ret_value = to_string(4294967295);
301 }
302 break;
303
304 case DFNT_FLOAT: {
305 float fvalue;
306 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
307 ret_value = to_string(fvalue);
308 else if(emptySDS == 1)
309 ret_value = to_string(FILL_FLOAT);
310 }
311 break;
312
313 case DFNT_DOUBLE: {
314 double fvalue;
315 if (SDgetfillvalue(sdsid, &fvalue) == SUCCEED)
316 ret_value = to_string(fvalue);
317 else if(emptySDS == 1)
318 ret_value = to_string(FILL_DOUBLE);
319
320 }
321 break;
322 default:
323 break;
324
325 }
326 return ret_value;
327}
328bool SD_set_fill_value(int32 sdsid, int32 datatype, BaseType *btp) {
329
330 string fill_value = get_sds_fill_value_str(sdsid,datatype);
331 if (!fill_value.empty()) {
332 auto dc = dynamic_cast<DmrppCommon *>(btp);
333 if (!dc) {
334 ERROR("Expected to find a DmrppCommon instance but did not.");
335 return false;
336 }
337 dc->set_uses_fill_value(true);
338 dc->set_fill_value_string(fill_value);
339 }
340 return true;
341}
342bool ingest_sds_info_to_chunk(int file, int32 obj_ref, BaseType *btp) {
343
344 int32 sds_index = SDreftoindex(file, obj_ref);
345 if (sds_index == FAIL) {
346 ERROR("SDreftoindex() failed");
347 return false;
348 }
349
350 int sdsid = SDselect(file, sds_index);
351 if (sdsid == FAIL) {
352 ERROR("SDselect() failed");
353 return false;
354 }
355
356 VERBOSE(cerr << "Name: " << btp->name() << endl);
357 VERBOSE(cerr << "DMR FQN: " << btp->FQN() << endl);
358 VERBOSE(cerr << "sdsid: " << sdsid << endl);
359
360 char obj_name[H4_MAX_NC_NAME]; /* name of the object */
361 int32 rank = -1; /* number of dimensions */
362 int32 dimsizes[H4_MAX_VAR_DIMS]; /* dimension sizes */
363 int32 data_type = -1; /* data type */
364 int32 num_attrs = -1; /* number of global attributes */
365
366 int status = SDgetinfo(sdsid, obj_name, &rank, dimsizes, &data_type, &num_attrs);
367 if (status == FAIL) {
368 ERROR("SDgetinfo() failed.");
369 SDendaccess(sdsid);
370 return false;
371 }
372
373 HDF_CHUNK_DEF cdef;
374 int32 chunk_flag = -1; /* chunking flag */
375
376 status = SDgetchunkinfo(sdsid, &cdef, &chunk_flag);
377 if (status == FAIL) {
378 ERROR("SDgetchunkinfo() failed.");
379 SDendaccess(sdsid);
380 return false;
381 }
382
383 switch (chunk_flag) {
384 case HDF_NONE: /* No chunking. */
385 VERBOSE(cerr << "No chunking.\n");
386 break;
387 case HDF_CHUNK: /* Chunking. */
388 case HDF_COMP: /* Compression. */
389 if (rank <= 0) {
390 ERROR("Invalid rank.");
391 SDendaccess(sdsid);
392 return false;
393 }
394 VERBOSE(cerr << "HDF_CHUNK or HDF_COMP.\n");
395 break;
396 case HDF_NBIT:
397 /* NBIT compression. */
398 ERROR("NBit Compression chunking not supported.");
399 SDendaccess(sdsid);
400 return false;
401 default:
402 ERROR("Unknown chunking flag.");
403 SDendaccess(sdsid);
404 return false;
405 }
406
407 string endian_name;
408 hdf_ntinfo_t info; /* defined in hdf.h near line 142. */
409 int result = Hgetntinfo(data_type, &info);
410 if (result == FAIL) {
411 ERROR("Hgetntinfo() failed.");
412 SDendaccess(sdsid);
413 return false;
414 }
415 else {
416 if (strncmp(info.byte_order, "bigEndian", 9) == 0)
417 endian_name = "BE";
418 else if (strncmp(info.byte_order, "littleEndian", 12) == 0)
419 endian_name = "LE";
420 else
421 endian_name = "UNKNOWN";
422 }
423
424 auto dc = dynamic_cast<DmrppCommon *>(btp);
425 if (!dc){
426 ERROR("Expected to find a DmrppCommon instance but did not.");
427 SDendaccess(sdsid);
428 return false;
429 }
430
431 // Need to check SDS compression info. Unlike HDF5, HDF4 can be compressed without using chunks.
432 // So we need to cover both cases. KY 02/12/2024
433
434 comp_coder_t comp_coder_type = COMP_CODE_NONE;
435 comp_info c_info;
436
437 if (SDgetcompinfo(sdsid, &comp_coder_type, &c_info) == FAIL) {
438 ERROR("SDgetcompinfo() failed.");
439 SDendaccess(sdsid);
440 return false;
441 }
442
443 // Also need to consider the case when the variable is compressed but not chunked.
444 if (chunk_flag == HDF_CHUNK || chunk_flag == HDF_COMP ) {
445 vector<unsigned long long> chunk_dimension_sizes(rank, 0);
446 for (int i = 0; i < rank; i++) {
447 if (chunk_flag == HDF_CHUNK) {
448 chunk_dimension_sizes[i] = cdef.chunk_lengths[i];
449 }
450 else { // chunk_flag is HDF_COMP
451 chunk_dimension_sizes[i] = cdef.comp.chunk_lengths[i];
452 }
453 }
454 if (chunk_flag == HDF_COMP) {
455 // Add the deflated-compression level. KY 02/20/24
456 if (comp_coder_type == COMP_CODE_DEFLATE) {
457 dc->ingest_compression_type("deflate");
458 vector<unsigned int> deflate_levels;
459 deflate_levels.push_back(c_info.deflate.level);
460 dc->set_deflate_levels(deflate_levels);
461 } else {
462 SDendaccess(sdsid);
463 ERROR("Encounter unsupported compression method. Currently only support deflate compression.");
464 return false;
465 }
466
467 }
468 dc->set_chunk_dimension_sizes(chunk_dimension_sizes);
469
470 // Leave the following code for the time being in case there are issues in the optimization
471#if 0
472 SD_mapping_info_t map_info;
473 map_info.is_empty = 0;
474 map_info.nblocks = 0;
475 map_info.offsets = nullptr;
476 map_info.lengths = nullptr;
477 map_info.data_type = data_type;
478
479#endif
480
481 vector<unsigned long long> position_in_array(rank, 0);
482
483 /* Initialize steps. */
484 vector<int> steps(rank, 0);
485 vector<int32_t> strides(rank, 0);
486 int number_of_chunks = 1;
487 for (int i = 0; i < rank; i++) {
488 // The following line considers the un-even chunk case, otherwise we will miss the extra chunk(s).
489 steps[i] = 1 + ((dimsizes[i] - 1) / chunk_dimension_sizes[i]);
490 number_of_chunks = number_of_chunks * steps[i];
491 strides[i] = 0;
492 }
493
494 VERBOSE(cerr << "number_of_chunks: " << number_of_chunks << endl);
495 VERBOSE(cerr << "rank: " << rank << endl);
496 VERBOSE(cerr << "chunk dimension sizes: ");
497 VERBOSE(copy(chunk_dimension_sizes.begin(), chunk_dimension_sizes.end(),
498 ostream_iterator<int32>(cerr, " ")));
499 VERBOSE(cerr<<endl);
500
501 // Obtain the offset/length of all chunks.
502 vector<uint32> info_count(number_of_chunks);
503 intn max_num_blocks = SDgetallchunkdatainfo(sdsid,number_of_chunks,rank,steps.data(),0,info_count.data(),
504 nullptr,nullptr);
505 if (max_num_blocks == FAIL) {
506 ERROR("SDgetallchunkdatainfo failed.");
507 SDendaccess(sdsid);
508 return false;
509 }
510
511 vector<int32>offsetarray(max_num_blocks*number_of_chunks);
512 vector<int32>lengtharray(max_num_blocks*number_of_chunks);
513
514 max_num_blocks = SDgetallchunkdatainfo(sdsid,number_of_chunks,rank,steps.data(),max_num_blocks,info_count.data(),
515 offsetarray.data(),lengtharray.data());
516 if (max_num_blocks == FAIL) {
517 ERROR("SDgetallchunkdatainfo failed.");
518 SDendaccess(sdsid);
519 return false;
520 }
521 bool LBChunk = (max_num_blocks>1);
522
523 intn ol_counter = 0;
524 for (int k = 0; k < number_of_chunks; ++k) {
525
526 auto pia = write_chunk_position_in_array(rank, chunk_dimension_sizes.data(), strides.data());
527
528 // When we find this chunk contains multiple blocks.
529 if (info_count[k] >1) {
530
531 VERBOSE(cerr << "number of blocks in a chunk is: " << info_count[k]<< endl);
532 for (unsigned int i = 0; i < info_count[k]; i++) {
533 VERBOSE(cerr << "offsets[" << k << ", " << i << "]: " << offsetarray[ol_counter] << endl);
534 VERBOSE(cerr << "lengths[" << k << ", " << i << "]: " << lengtharray[ol_counter] << endl);
535 // add the block index for this chunk.
536 dc->add_chunk(endian_name, lengtharray[ol_counter], offsetarray[ol_counter], pia,true,i);
537 ol_counter++;
538 }
539 ol_counter +=max_num_blocks-info_count[k];
540
541 }
542
543 else if (info_count[k] == 1) {
544
545 dc->add_chunk(endian_name, lengtharray[ol_counter], offsetarray[ol_counter], pia);
546 ol_counter +=max_num_blocks;
547 }
548 else if (info_count[k] == 0)
549 ol_counter +=max_num_blocks;
550
551 // Increase strides for each dimension. The fastest varying dimension is rank-1.
552 int scale = 1;
553 for(int i = rank-1; i >= 0 ; i--) {
554 if ((k+1) % scale == 0) {
555 strides[i] = ++strides[i] % steps[i];
556 }
557 scale = scale * steps[i];
558 }
559 }
560 if (LBChunk)
561 dc->set_multi_linked_blocks_chunk(LBChunk);
562 }
563
564 // Leave the following code for the time being in case there are issues in the optimization.
565#if 0
566 // Also need to consider the case when the variable is compressed but not chunked.
567 if (chunk_flag == HDF_CHUNK || chunk_flag == HDF_COMP ) {
568 vector<unsigned long long> chunk_dimension_sizes(rank, 0);
569 for (int i = 0; i < rank; i++) {
570 if (chunk_flag == HDF_CHUNK) {
571 chunk_dimension_sizes[i] = cdef.chunk_lengths[i];
572 }
573 else { // chunk_flag is HDF_COMP
574 chunk_dimension_sizes[i] = cdef.comp.chunk_lengths[i];
575 }
576 }
577 if (chunk_flag == HDF_COMP) {
578 // Add the deflated-compression level. KY 02/20/24
579 if (comp_coder_type == COMP_CODE_DEFLATE) {
580 dc->ingest_compression_type("deflate");
581 vector<unsigned int> deflate_levels;
582 deflate_levels.push_back(c_info.deflate.level);
583 dc->set_deflate_levels(deflate_levels);
584 } else {
585 SDendaccess(sdsid);
586 ERROR("Encounter unsupported compression method. Currently only support deflate compression.");
587 return false;
588 }
589
590 }
591 dc->set_chunk_dimension_sizes(chunk_dimension_sizes);
592
593 SD_mapping_info_t map_info;
594 map_info.is_empty = 0;
595 map_info.nblocks = 0;
596 map_info.offsets = nullptr;
597 map_info.lengths = nullptr;
598 map_info.data_type = data_type;
599
600 vector<unsigned long long> position_in_array(rank, 0);
601
602 /* Initialize steps. */
603 vector<int> steps(rank, 0);
604 vector<int32_t> strides(rank, 0);
605 int number_of_chunks = 1;
606 for(int i = 0; i < rank; i++) {
607 steps[i] = 1 + ((dimsizes[i] - 1) / chunk_dimension_sizes[i]);
608 number_of_chunks = number_of_chunks * steps[i];
609 strides[i] = 0;
610 }
611
612 VERBOSE(cerr << "number_of_chunks: " << number_of_chunks << endl);
613 VERBOSE(cerr << "rank: " << rank << endl);
614 VERBOSE(cerr << "chunk dimension sizes: ");
615 VERBOSE(copy(chunk_dimension_sizes.begin(), chunk_dimension_sizes.end(),
616 ostream_iterator<int32>(cerr, " ")));
617 VERBOSE(cerr<<endl);
618
619 bool LBChunk = false;
620
621 for (int k = 0; k < number_of_chunks; ++k) {
622 auto info_count = read_chunk(sdsid, &map_info, strides.data());
623 if (info_count == FAIL) {
624 ERROR("read_chunk() failed.");
625 SDendaccess(sdsid);
626 return false;
627 }
628
629 auto pia = write_chunk_position_in_array(rank, chunk_dimension_sizes.data(), strides.data());
630
631 // When we find this chunk contains multiple blocks.
632 if (map_info.nblocks >1) {
633
634 if (!LBChunk)
635 LBChunk = true;
636 VERBOSE(cerr << "number of blocks in a chunk is: " << map_info.nblocks << endl);
637 for (unsigned int i = 0; i < map_info.nblocks; i++) {
638 VERBOSE(cerr << "offsets[" << k << ", " << i << "]: " << map_info.offsets[i] << endl);
639 VERBOSE(cerr << "lengths[" << k << ", " << i << "]: " << map_info.lengths[i] << endl);
640 // add the block index for this chunk.
641 dc->add_chunk(endian_name, map_info.lengths[i], map_info.offsets[i], pia,true,i);
642
643 }
644 }
645
646 else if (map_info.nblocks == 1) {
647 dc->add_chunk(endian_name, map_info.lengths[0], map_info.offsets[0], pia);
648 }
649
650 // Increase strides for each dimension. The fastest varying dimension is rank-1.
651 int scale = 1;
652 for(int i = rank-1; i >= 0 ; i--) {
653 if ((k+1) % scale == 0) {
654 strides[i] = ++strides[i] % steps[i];
655 }
656 scale = scale * steps[i];
657 }
658 SDfree_mapping_info(&map_info);
659 }
660 if (LBChunk)
661 dc->set_multi_linked_blocks_chunk(LBChunk);
662 }
663#endif
664 else if (chunk_flag == HDF_NONE) {
665 SD_mapping_info_t map_info;
666 map_info.is_empty = 0;
667 map_info.nblocks = 0;
668 map_info.offsets = nullptr;
669 map_info.lengths = nullptr;
670 map_info.data_type = data_type;
671
672 // Also need to consider the case when the variable is compressed but not chunked.
673 if (comp_coder_type != COMP_CODE_NONE) {
674 if (comp_coder_type == COMP_CODE_DEFLATE) {
675 dc->ingest_compression_type("deflate");
676 vector<unsigned int> deflate_levels;
677 deflate_levels.push_back(c_info.deflate.level);
678 dc->set_deflate_levels(deflate_levels);
679 }
680 else {
681 ERROR("Encounter unsupported compression method. Currently only support deflate compression.");
682 SDendaccess(sdsid);
683 return false;
684 }
685 }
686
687 vector<int> origin(rank, 0);
688 auto info_count = read_chunk(sdsid, &map_info, origin.data());
689 if (info_count == FAIL) {
690 ERROR("read_chunk() failed in the middle of ingest_sds_info_to_chunk().");
691 SDendaccess(sdsid);
692 return false;
693 }
694
695 vector<unsigned long long> position_in_array(rank, 0);
696 if (map_info.nblocks ==1) {
697 dc->add_chunk(endian_name, map_info.lengths[0], map_info.offsets[0], position_in_array);
698 }
699 else if (map_info.nblocks>1) {
700 // Here we will see if we can combine the number of contiguous blocks to a bigger one.
701 // This is necessary since HDF4 may store small size data in large number of contiguous linked blocks.
702 // KY 2024-02-22
703 vector<int> merged_lengths;
704 vector<int> merged_offsets;
705 size_t merged_number_blocks = combine_linked_blocks(map_info, merged_lengths, merged_offsets);
706 for (unsigned i = 0; i < merged_number_blocks; i++) {
707 VERBOSE(cerr << "offsets[" << i << "]: " << map_info.offsets[i] << endl);
708 VERBOSE(cerr << "lengths[" << i << "]: " << map_info.lengths[i] << endl);
709
710 // This has linked blocks. Add this information to the dmrpp:chunks.
711 dc->add_chunk(endian_name, merged_lengths[i], merged_offsets[i], true,i);
712
713 }
714 }
715 SDfree_mapping_info(&map_info);
716 }
717 else {
718 // TODO Handle the other cases. jhrg 12/7/23
719 ERROR("Unknown chunking type.");
720 SDendaccess(sdsid);
721 return false;
722 }
723
724 // Add the fillvalue support
725 if (false == SD_set_fill_value(sdsid,data_type, btp)) {
726 ERROR("SD_set_fill_value failed");
727 SDendaccess(sdsid);
728 return false;
729 }
730
731 // Need to close the SDS interface. KY 2024-02-22
732 SDendaccess(sdsid);
733 return true;
734}
735
736size_t combine_linked_blocks_vdata( const vector<int>& lengths, const vector<int>& offsets, vector<int> & merged_lengths, vector<int> &merged_offsets) {
737
738 size_t num_eles = lengths.size();
739
740#if 0
741 for (int i = 0; i<offset.size(); i++) {
742 cout<<"offset["<<i<<"]= "<<offset[i] <<endl;
743 cout<<"length["<<i<<"]= "<<length[i] <<endl;
744 }
745#endif
746
747 // The first element offset should always be fixed.
748 merged_offsets.push_back(offsets[0]);
749 int temp_length = lengths[0];
750
751 for (size_t i = 0; i <(num_eles-1); i++) {
752
753 // If not contiguous, push back the i's new length;
754 // push back the i+1's unchanged offset.
755 // save the i+1's length to the temp length variable.
756 if (offsets[i+1] !=(offsets[i] + lengths[i])) {
757 merged_lengths.push_back(temp_length);
758 merged_offsets.push_back(offsets[i+1]);
759 temp_length = lengths[i+1];
760 }
761 else { // contiguous, just update the temp length variable.
762 temp_length +=lengths[i+1];
763 }
764
765 }
766
767 // Update the last length.
768 merged_lengths.push_back(temp_length);
769
770#if 0
771for (int i = 0; i <merged_lengths.size(); i++) {
772cout <<"merged_lengths["<<i<<"]= "<<merged_lengths[i]<<endl;
773cout <<"merged_offsets["<<i<<"]= "<<merged_offsets[i]<<endl;
774
775}
776#endif
777 return merged_lengths.size();
778
779}
780
781
782bool ingest_vdata_info_to_chunk(int32 file_id, int32 obj_ref, BaseType *btp) {
783
784 int32 vdata_id = VSattach(file_id, obj_ref, "r");
785 if (vdata_id == FAIL){
786 ERROR("VSattach() failed.");
787 return false;
788 }
789
790 // Retrieve endianess
791 int32 field_datatype = VFfieldtype(vdata_id,0);
792 if (field_datatype == FAIL){
793 ERROR("VFfieldtype() failed.");
794 VSdetach(vdata_id);
795 return false;
796 }
797
798 string endian_name;
799 hdf_ntinfo_t info; /* defined in hdf.h near line 142. */
800 int result = Hgetntinfo(field_datatype, &info);
801 if (result == FAIL) {
802 ERROR("Hgetntinfo() failed in ingest_vdata_info_to_chunk.");
803 VSdetach(vdata_id);
804 return false;
805 }
806 else {
807 if (strncmp(info.byte_order, "bigEndian", 9) == 0)
808 endian_name = "BE";
809 else if (strncmp(info.byte_order, "littleEndian", 12) == 0)
810 endian_name = "LE";
811 else
812 endian_name = "UNKNOWN";
813 }
814
815 int32 num_linked_blocks = VSgetdatainfo(vdata_id,0,0,nullptr,nullptr);
816
817 if (num_linked_blocks == FAIL){
818 ERROR("VSgetdatainfo() failed in ingest_vdata_info_to_chunk.");
819 VSdetach(vdata_id);
820 return false;
821 }
822 if (num_linked_blocks == 1) {// most time.
823
824 int32 offset = 0;
825 int32 length = 0;
826 if (VSgetdatainfo(vdata_id,0,1,&offset,&length) == FAIL){
827 ERROR("VSgetdatainfo() failed, cannot obtain offset and length info.");
828 VSdetach(vdata_id);
829 return false;
830 }
831
832 auto dc = dynamic_cast<DmrppCommon *>(btp);
833 if (!dc) {
834 ERROR("Expected to find a DmrppCommon instance but did not in ingested_vdata_info_to_chunk");
835 VSdetach(vdata_id);
836 return false;
837 }
838 auto da = dynamic_cast<DmrppArray *>(btp);
839 if (!da) {
840 ERROR("Expected to find a DmrppArray instance but did not in ingested_vdata_info_to_chunk");
841 VSdetach(vdata_id);
842 return false;
843 }
844
845 if (da->width_ll() != (int64_t)length) {
846 ERROR("The retrieved vdata size is no equal to the original size.");
847 VSdetach(vdata_id);
848 return false;
849 }
850 dc->add_chunk(endian_name, (unsigned long long)length, (unsigned long long)offset,"");
851
852#if 0
853 int32 total_num_fields = VFnfields(vdata_id);
854 if (total_num_fields == 1) {
855 int32 offset = 0;
856 int32 length = 0;
857 VSgetdatainfo(vdata_id,0,1,&offset,&length);
858cout <<"offset is: "<<offset <<endl;
859cout <<"length is: "<<length <<endl;
860
861 }
862#endif
863
864 }
865 else if (num_linked_blocks >1) {
866
867 //merging the linked blocks.
868 vector<int> lengths;
869 vector<int> offsets;
870 lengths.resize(num_linked_blocks);
871 offsets.resize(num_linked_blocks);
872 if (VSgetdatainfo(vdata_id, 0, num_linked_blocks, offsets.data(), lengths.data()) == FAIL) {
873 ERROR("VSgetdatainfo() failed, cannot obtain offset and length info when the num_linked_blocks is >1.");
874 VSdetach(vdata_id);
875 return false;
876 }
877 vector<int>merged_lengths;
878 vector<int>merged_offsets;
879 size_t merged_number_blocks = combine_linked_blocks_vdata(lengths,offsets,merged_lengths,merged_offsets);
880 auto dc = dynamic_cast<DmrppCommon *>(btp);
881 if (!dc) {
882 ERROR("Expected to find a DmrppArray instance but did not when num_linked_blocks is >1.");
883 VSdetach(vdata_id);
884 return false;
885 }
886
887 for (unsigned i = 0; i < merged_number_blocks; i++)
888 dc->add_chunk(endian_name, merged_lengths[i], merged_offsets[i],true,i);
889
890 }
891
892 VSdetach(vdata_id);
893 return true;
894
895}
896
897#if 0
898bool obtain_compress_encode_data(string &encoded_str, const Bytef*source_data,size_t source_data_size, string &err_msg) {
899
900 uLong ssize = (uLong)source_data_size;
901 uLongf csize = (uLongf)ssize*2;
902 vector<Bytef> compressed_src;
903 compressed_src.resize(source_data_size*2);
904
905 int retval = compress(compressed_src.data(), &csize, source_data, ssize);
906 if (retval != 0) {
907 err_msg = "Fail to compress the data";
908 return false;
909 }
910
911 encoded_str = base64::Base64::encode(compressed_src.data(),(int)csize);
912
913 return true;
914
915}
916#endif
917
918bool add_missing_cf_grid(const string &filename,BaseType *btp, const D4Attribute *eos_cf_attr, string &err_msg) {
919
920 VERBOSE(cerr<<"Coming to add_missing_cf_grid"<<endl);
921
922 string eos_cf_attr_value = eos_cf_attr->value(0);
923
924 VERBOSE(cerr<<"eos_cf_attr_value: "<<eos_cf_attr_value <<endl);
925
926 size_t space_pos = eos_cf_attr_value.find_last_of(' ');
927 if (space_pos ==string::npos) {
928 err_msg = "Attribute eos_latlon must have space inside";
929 return false;
930 }
931
932 string grid_name = eos_cf_attr_value.substr(0,space_pos);
933 string cf_name = eos_cf_attr_value.substr(space_pos+1);
934
935 VERBOSE(cerr<<"grid_name: "<<grid_name <<endl);
936 VERBOSE(cerr<<"cf_name: "<<cf_name<<endl);
937 bool is_xdim = true;
938 if (cf_name =="YDim")
939 is_xdim = false;
940
941 int32 gridfd = GDopen(const_cast < char *>(filename.c_str()), DFACC_READ);
942 if (gridfd <0) {
943 err_msg = "HDF-EOS: GDopen failed";
944 return false;
945 }
946 int32 gridid = GDattach(gridfd, const_cast<char *>(grid_name.c_str()));
947 if (gridid <0) {
948 err_msg = "HDF-EOS: GDattach failed to attach " + grid_name;
949 GDclose(gridfd);
950 return false;
951 }
952
953 int32 xdim = 0;
954 int32 ydim = 0;
955
956 float64 upleft[2];
957 float64 lowright[2];
958
959 // Retrieve dimensions and X-Y coordinates of corners
960 if (GDgridinfo(gridid, &xdim, &ydim, upleft,
961 lowright) == -1) {
962 GDdetach(gridid);
963 GDclose(gridfd);
964 err_msg = "GDgridinfo failed for grid name: " + grid_name;
965 return false;
966 }
967
968 auto dc = dynamic_cast<DmrppCommon *>(btp);
969 if (!dc) {
970 GDdetach(gridid);
971 GDclose(gridfd);
972 err_msg = "Expected to find a DmrppCommon instance but did not in add_missing_cf_grid";
973 return false;
974 }
975 auto da = dynamic_cast<DmrppArray *>(btp);
976 if (!da) {
977 GDdetach(gridid);
978 GDclose(gridfd);
979 err_msg = "Expected to find a DmrppArray instance but did not in add_missing_cf_grid";
980 return false;
981 }
982
983 da->set_missing_data(true);
984
985 int total_num = is_xdim?xdim:ydim;
986 vector<double>val(total_num);
987 double evalue = 0;
988 double svalue = 0;
989
990 if (is_xdim) {
991 svalue = upleft[0];
992 evalue = lowright[0];
993 }
994 else {
995 svalue = upleft[1];
996 evalue = lowright[1];
997 }
998
999 double step_v = (evalue - svalue)/total_num;
1000
1001 val[0] = svalue;
1002 for(int i = 1;i<total_num; i++)
1003 val[i] = val[i-1] + step_v;
1004
1005 da->set_value(val.data(),total_num);
1006 da->set_read_p(true);
1007
1008
1009 GDdetach(gridid);
1010 GDclose(gridfd);
1011
1012 return true;
1013
1014}
1015
1016bool add_missing_eos_latlon(const string &filename,BaseType *btp, const D4Attribute *eos_ll_attr, string &err_msg) {
1017
1018 VERBOSE(cerr<<"Coming to add_missing_eos_latlon"<<endl);
1019
1020 string eos_ll_attr_value = eos_ll_attr->value(0);
1021
1022 VERBOSE(cerr<<"eos_ll_attr_value: "<<eos_ll_attr_value <<endl);
1023
1024 size_t space_pos = eos_ll_attr_value.find_last_of(' ');
1025 if (space_pos ==string::npos) {
1026 err_msg = "Attribute eos_latlon must have space inside";
1027 return false;
1028 }
1029
1030 string grid_name = eos_ll_attr_value.substr(0,space_pos);
1031 string ll_name = eos_ll_attr_value.substr(space_pos+1);
1032
1033 VERBOSE(cerr<<"grid_name: "<<grid_name <<endl);
1034 VERBOSE(cerr<<"ll_name: "<<ll_name<<endl);
1035 bool is_lat = true;
1036 if (ll_name =="lon")
1037 is_lat = false;
1038
1039 int32 gridfd = GDopen(const_cast < char *>(filename.c_str()), DFACC_READ);
1040 if (gridfd <0) {
1041 err_msg = "HDF-EOS: GDopen failed";
1042 return false;
1043 }
1044 int32 gridid = GDattach(gridfd, const_cast<char *>(grid_name.c_str()));
1045 if (gridid <0) {
1046 err_msg = "HDF-EOS: GDattach failed to attach " + grid_name;
1047 GDclose(gridfd);
1048 return false;
1049 }
1050
1051 // Declare projection code, zone, etc grid parameters.
1052
1053 int32 projcode = -1;
1054 int32 zone = -1;
1055 int32 sphere = -1;
1056 float64 params[16];
1057
1058 int32 xdim = 0;
1059 int32 ydim = 0;
1060
1061 float64 upleft[2];
1062 float64 lowright[2];
1063
1064 if (GDprojinfo (gridid, &projcode, &zone, &sphere, params) <0) {
1065 GDdetach(gridid);
1066 GDclose(gridfd);
1067 err_msg = "GDprojinfo failed for grid name: " + grid_name;
1068 return false;
1069 }
1070
1071 // Retrieve dimensions and X-Y coordinates of corners
1072 if (GDgridinfo(gridid, &xdim, &ydim, upleft,
1073 lowright) == -1) {
1074 GDdetach(gridid);
1075 GDclose(gridfd);
1076 err_msg = "GDgridinfo failed for grid name: " + grid_name;
1077 return false;
1078 }
1079
1080
1081 // Retrieve pixel registration information
1082 int32 pixreg = 0;
1083 intn r = GDpixreginfo (gridid, &pixreg);
1084 if (r != 0) {
1085 GDdetach(gridid);
1086 GDclose(gridfd);
1087 err_msg = "GDpixreginfo failed for grid name: " + grid_name;
1088 return false;
1089 }
1090
1091 //Retrieve grid pixel origin
1092 int32 origin = 0;
1093 r = GDorigininfo (gridid, &origin);
1094 if (r != 0) {
1095 GDdetach(gridid);
1096 GDclose(gridfd);
1097 err_msg = "GDoriginfo failed for grid name: " + grid_name;
1098 return false;
1099 }
1100
1101 vector<int32>rows;
1102 vector<int32>cols;
1103 vector<float64>lon;
1104 vector<float64>lat;
1105 rows.resize(xdim*ydim);
1106 cols.resize(xdim*ydim);
1107 lon.resize(xdim*ydim);
1108 lat.resize(xdim*ydim);
1109
1110
1111 int i = 0;
1112 int j = 0;
1113 int k = 0;
1114
1115 /* Fill two arguments, rows and columns */
1116 // rows cols
1117 // /- xdim -/ /- xdim -/
1118 // 0 0 0 ... 0 0 1 2 ... x
1119 // 1 1 1 ... 1 0 1 2 ... x
1120 // ... ...
1121 // y y y ... y 0 1 2 ... x
1122
1123 for (k = j = 0; j < ydim; ++j) {
1124 for (i = 0; i < xdim; ++i) {
1125 rows[k] = j;
1126 cols[k] = i;
1127 ++k;
1128 }
1129 }
1130
1131
1132 r = GDij2ll (projcode, zone, params, sphere, xdim, ydim, upleft, lowright,
1133 xdim * ydim, rows.data(), cols.data(), lon.data(), lat.data(), pixreg, origin);
1134
1135 if (r != 0) {
1136 GDdetach(gridid);
1137 GDclose(gridfd);
1138 err_msg = "cannot calculate grid latitude and longitude for grid name: " + grid_name;
1139 return false;
1140 }
1141
1142 VERBOSE(cerr<<"The first value of longtitude: "<<lon[0]<<endl);
1143 VERBOSE(cerr<<"The first value of latitude: "<<lat[0]<<endl);
1144
1145 auto dc = dynamic_cast<DmrppCommon *>(btp);
1146 if (!dc) {
1147 GDdetach(gridid);
1148 GDclose(gridfd);
1149 err_msg = "Expected to find a DmrppCommon instance but did not in add_missing_eos_latlon";
1150 return false;
1151 }
1152 auto da = dynamic_cast<DmrppArray *>(btp);
1153 if (!da) {
1154 GDdetach(gridid);
1155 GDclose(gridfd);
1156 err_msg = "Expected to find a DmrppArray instance but did not in add_missing_eos_latlon";
1157 return false;
1158 }
1159
1160 da->set_missing_data(true);
1161
1162 if (is_lat) {
1163 // Need to check if CEA or GEO
1164 if (projcode == GCTP_CEA || projcode == GCTP_GEO) {
1165 vector<float64>out_lat;
1166 out_lat.resize(ydim);
1167 j=0;
1168 for (i =0; i<xdim*ydim;i = i+xdim){
1169 out_lat[j] = lat[i];
1170 j++;
1171 }
1172 da->set_value(out_lat.data(),ydim);
1173
1174 }
1175 else
1176 da->set_value(lat.data(),xdim*ydim);
1177 }
1178 else {
1179 if (projcode == GCTP_CEA || projcode == GCTP_GEO) {
1180 vector<float64>out_lon;
1181 out_lon.resize(xdim);
1182 for (i =0; i<xdim;i++)
1183 out_lon[i] = lon[i];
1184 da->set_value(out_lon.data(),xdim);
1185
1186 }
1187 else
1188 da->set_value(lon.data(),xdim*ydim);
1189
1190 }
1191 da->set_read_p(true);
1192
1193#if 0
1194 size_t orig_buf_size = tmp_data.size()*sizeof(double);
1195 uLong ssize = (uLong)orig_buf_size;
1196 uLongf csize = (uLongf)ssize*2;
1197 vector<Bytef> compressed_src;
1198 compressed_src.resize(orig_buf_size*2);
1199
1200 int retval = compress(compressed_src.data(), &csize, (const Bytef*)tmp_data.data(), ssize);
1201 if (retval != 0) {
1202 cout<<"Fail to compress the data"<<endl;
1203 exit(1);
1204 }
1205
1206 string encoded = base64::Base64::encode(compressed_src.data(),(int)csize);
1207
1208#endif
1209
1210 GDdetach(gridid);
1211 GDclose(gridfd);
1212
1213 return true;
1214
1215}
1216
1217bool add_missing_sp_latlon(BaseType *btp, const D4Attribute *sp_ll_attr, string &err_msg) {
1218
1219 VERBOSE(cerr<<"Coming to add_missing_sp_latlon"<<endl);
1220
1221 string sp_ll_attr_value = sp_ll_attr->value(0);
1222
1223 VERBOSE(cerr<<"sp_ll_attr_value: "<<sp_ll_attr_value <<endl);
1224 size_t space_pos = sp_ll_attr_value.find(' ');
1225 if (space_pos ==string::npos) {
1226 err_msg = "Attribute sp_h4_ll must have space inside";
1227 return false;
1228 }
1229 string ll_name = sp_ll_attr_value.substr(0,space_pos);
1230 VERBOSE(cerr<<"ll_name: "<<ll_name<<endl);
1231
1232 size_t sec_space_pos = sp_ll_attr_value.find(' ',space_pos+1);
1233 if (sec_space_pos ==string::npos) {
1234 err_msg = "Attribute sp_h4_ll must have two spaces inside";
1235 return false;
1236 }
1237 string ll_start_str = sp_ll_attr_value.substr(space_pos+1,sec_space_pos-space_pos-1);
1238 VERBOSE(cerr<<"ll_start: "<<ll_start_str<<endl);
1239
1240 string ll_res_str = sp_ll_attr_value.substr(sec_space_pos);
1241 VERBOSE(cerr<<"ll_res: "<<ll_res_str<<endl);
1242
1243 float ll_start = stof(ll_start_str);
1244 float ll_res = stof(ll_res_str);
1245
1246 auto dc = dynamic_cast<DmrppCommon *>(btp);
1247 if (!dc) {
1248 err_msg = "Expected to find a DmrppCommon instance but did not in add_missing_sp_latlon";
1249 return false;
1250 }
1251 auto da = dynamic_cast<DmrppArray *>(btp);
1252 if (!da) {
1253 err_msg = "Expected to find a DmrppArray instance but did not in add_missing_sp_latlon";
1254 return false;
1255 }
1256
1257 if (da->dimensions() !=1){
1258 err_msg = "The number of dimensions of the array should be 1.";
1259 return false;
1260 }
1261
1262 da->set_missing_data(true);
1263
1264 vector<float32> ll_value;
1265 ll_value.resize((size_t)(da->length()));
1266 for (int64_t i = 0; i <da->length(); i++) {
1267 ll_value[i] = ll_start+ll_res/2+i*ll_res;
1268 }
1269
1270 da->set_value(ll_value.data(),da->length());
1271 return true;
1272
1273
1274}
1280bool get_chunks_for_an_array(const string& filename, int32 sd_id, int32 file_id, BaseType *btp, bool disable_missing_data) {
1281
1282 VERBOSE(cerr<<"var name: "<<btp->name() <<endl);
1283 // Here we need to retrieve the attribute value dmr_sds_ref of btp.
1284 D4Attributes *d4_attrs = btp->attributes();
1285 if (!d4_attrs) {
1286 close_hdf4_file_ids(sd_id,file_id);
1287 throw BESInternalError("Expected to find an DAP4 attribute list for " + btp->name() + " but did not.",
1288 __FILE__, __LINE__);
1289 }
1290
1291 // Look for the full name path for this variable
1292 // If one was not given via an attribute, use BaseType::FQN() which
1293 // relies on the variable's position in the DAP dataset hierarchy.
1294 D4Attribute *attr = d4_attrs->find("dmr_sds_ref");
1295 int32 obj_ref = 0;
1296 bool is_sds = false;
1297 if (attr)
1298 is_sds = true;
1299 else
1300 attr = d4_attrs->find("dmr_vdata_ref");
1301
1302 if (attr) {
1303 obj_ref = stoi(attr->value(0));
1304 if (is_sds) {
1305 if (false == ingest_sds_info_to_chunk(sd_id, obj_ref,btp)) {
1306 close_hdf4_file_ids(sd_id,file_id);
1307 throw BESInternalError("Cannot retrieve SDS information correctly for " + btp->name() ,
1308 __FILE__, __LINE__);
1309 }
1310 }
1311
1312 else {
1313 if (false == ingest_vdata_info_to_chunk(file_id, obj_ref,btp)) {
1314 close_hdf4_file_ids(sd_id,file_id);
1315 throw BESInternalError("Cannot retrieve vdata information correctly for " + btp->name() ,
1316 __FILE__, __LINE__);
1317 }
1318 }
1319 }
1320 else if (disable_missing_data == false){
1321
1322 VERBOSE(cerr<<"coming to eos_latlon block"<<endl);
1323 // Here we will check if the eos_latlon exists. Add dmrpp::missingdata
1324 attr = d4_attrs->find("eos_latlon");
1325 if (attr) {
1326 string err_msg;
1327 bool ret_value = add_missing_eos_latlon(filename, btp, attr,err_msg);
1328 if (ret_value == false) {
1329 close_hdf4_file_ids(sd_id,file_id);
1330 throw BESInternalError(err_msg,__FILE__,__LINE__);
1331 }
1332
1333 }
1334 else {
1335 attr = d4_attrs->find("sp_h4_ll");
1336 if (attr) {
1337 string err_msg;
1338 bool ret_value = add_missing_sp_latlon(btp, attr,err_msg);
1339 if (ret_value == false) {
1340 close_hdf4_file_ids(sd_id,file_id);
1341 throw BESInternalError(err_msg,__FILE__,__LINE__);
1342 }
1343 }
1344 else {
1345 attr = d4_attrs->find("eos_cf_grid");
1346 if (attr) {
1347 string err_msg;
1348 bool ret_value = add_missing_cf_grid(filename, btp, attr,err_msg);
1349 if (ret_value == false) {
1350 close_hdf4_file_ids(sd_id,file_id);
1351 throw BESInternalError(err_msg,__FILE__,__LINE__);
1352 }
1353
1354 }
1355 else {
1356 close_hdf4_file_ids(sd_id,file_id);
1357 string error_msg = "Expected to find an attribute that stores either HDF4 SDS reference or HDF4 Vdata reference or eos lat/lon or special HDF4 lat/lon or special cf grid for ";
1358 throw BESInternalError(error_msg + btp->name() + " but did not.",__FILE__,__LINE__);
1359 }
1360 }
1361
1362 }
1363 }
1364
1365
1366 return true;
1367}
1368
1369bool handle_chunks_for_none_array(BaseType *btp, bool disable_missing_data, string &err_msg) {
1370
1371 bool ret_value = false;
1372
1373 VERBOSE(cerr<<"For none_array: var name: "<<btp->name() <<endl);
1374
1375 D4Attributes *d4_attrs = btp->attributes();
1376 if (d4_attrs->empty() == false && btp->type() == dods_byte_c) {
1377
1378 auto attr = d4_attrs->find("eos_cf_grid_mapping");
1379
1380 if (disable_missing_data == false) {
1381
1382 // Here we don't bother to check the attribute value since this CF grid variable value is dummy.
1383 if (attr) {
1384
1385 auto dc = dynamic_cast<DmrppCommon *>(btp);
1386 if (!dc) {
1387 err_msg = "Expected to find a DmrppCommon instance but did not in handle_chunks_for_none_array";
1388 return false;
1389 }
1390 auto db = dynamic_cast<DmrppByte *>(btp);
1391 if (!db) {
1392 err_msg = "Expected to find a DmrppByte instance but did not in handle_chunks_for_none_array";
1393 return false;
1394 }
1395
1396 VERBOSE(cerr<<"For none_array cf dummy grid variable: var name: "<<btp->name() <<endl);
1397
1398 char buf='p';
1399 db->set_missing_data(true);
1400 db->set_value((dods_byte)buf);
1401 db->set_read_p(true);
1402
1403 }
1404 }
1405 ret_value = true;
1406 }
1407
1408 return ret_value;
1409
1410}
1411
1412bool get_chunks_for_a_variable(const string& filename,int32 sd_id, int32 file_id, BaseType *btp, bool disable_missing_data) {
1413
1414 switch (btp->type()) {
1415 case dods_structure_c: {
1416 //TODO: this needs to be re-written since the data of the whole structure is retrieved. KY-2024-02-26
1417 // Handle this later. KY 2024-03-07
1418 // Comment about the above "TODO", the current HDF4 to DMR direct mapping will never go here. So
1419 // we may not need to handle this at all. KY 2024-03-12
1420#if 0
1421 auto sp = dynamic_cast<Structure *>(btp);
1422 //for_each(sp->var_begin(), sp->var_end(), [file](BaseType *btp) { get_chunks_for_a_variable(file, btp); });
1423#endif
1424 close_hdf4_file_ids(sd_id,file_id);
1425 throw BESInternalError("Structure scalar is not supported by DAP4 for HDF4 at this time.\n", __FILE__, __LINE__);
1426 }
1427 case dods_sequence_c:
1428 VERBOSE(cerr << btp->FQN() << ": Sequence is not supported by DMR++ for HDF4 at this time.\n");
1429 return false;
1430 case dods_grid_c: {
1431 close_hdf4_file_ids(sd_id,file_id);
1432 throw BESInternalError("Grids are not supported by DAP4.", __FILE__, __LINE__);
1433 }
1434 case dods_array_c:
1435 return get_chunks_for_an_array(filename,sd_id,file_id, btp,disable_missing_data);
1436 default: {
1437 string err_msg;
1438 bool ret_value = handle_chunks_for_none_array(btp,disable_missing_data,err_msg);
1439 if (ret_value == false) {
1440 if (err_msg.empty() == false) {
1441 close_hdf4_file_ids(sd_id,file_id);
1442 throw BESInternalError(err_msg, __FILE__, __LINE__);
1443 }
1444 else
1445 VERBOSE(cerr << btp->FQN() << ": " << btp->type_name() << " is not supported by DMR++ for HDF4 at this time.\n");
1446 }
1447 return ret_value;
1448 }
1449 }
1450}
1451
1459void get_chunks_for_all_variables(const string& filename, int32 sd_id, int32 file_id, D4Group *group, bool disable_missing_data) {
1460
1461 // variables in the group
1462 for(auto btp : group->variables()) {
1463 if (btp->type() != dods_group_c) {
1464 // if this is not a group, it is a variable
1465 // This is the part where we find out if a variable can be used with DMR++
1466 if (!get_chunks_for_a_variable(filename,sd_id,file_id, btp, disable_missing_data)) {
1467 ERROR("Could not include DMR++ metadata for variable " << btp->FQN());
1468 }
1469 }
1470 else {
1471 // child group
1472 auto g = dynamic_cast<D4Group*>(btp);
1473 if (!g)
1474 throw BESInternalError("Expected " + btp->name() + " to be a D4Group but it is not.", __FILE__, __LINE__);
1475 get_chunks_for_all_variables(filename,sd_id,file_id, g, disable_missing_data);
1476 }
1477 }
1478 // all groups in the group
1479 for (auto g = group->grp_begin(), ge = group->grp_end(); g != ge; ++g) {
1480 get_chunks_for_all_variables(filename,sd_id,file_id, *g, disable_missing_data);
1481 }
1482
1483}
1484
1490void add_chunk_information(const string &h4_file_name, DMRpp *dmrpp, bool disable_missing_data)
1491{
1492 // Open the hdf4 file
1493 int32 sd_id = SDstart(h4_file_name.c_str(), DFACC_READ);
1494 if (sd_id < 0) {
1495 stringstream msg;
1496 msg << "Error: HDF4 file '" << h4_file_name << "' cannot be opened." << endl;
1497 throw BESNotFoundError(msg.str(), __FILE__, __LINE__);
1498 }
1499
1500 int32 file_id = Hopen(h4_file_name.c_str(),DFACC_READ,0);
1501 if (file_id < 0) {
1502 stringstream msg;
1503 msg << "Error: HDF4 file '" << h4_file_name << "' Hopen failed." << endl;
1504 throw BESNotFoundError(msg.str(), __FILE__, __LINE__);
1505 }
1506
1507 if (Vstart(file_id)<0) {
1508 stringstream msg;
1509 msg << "Error: HDF4 file '" << h4_file_name << "' Vstart failed." << endl;
1510 throw BESNotFoundError(msg.str(), __FILE__, __LINE__);
1511 }
1512
1513
1514 // iterate over all the variables in the DMR
1515 get_chunks_for_all_variables(h4_file_name, sd_id,file_id, dmrpp->root(),disable_missing_data);
1516
1517 close_hdf4_file_ids(sd_id,file_id);
1518}
1519
1535void qc_input_file(const string &file_fqn)
1536{
1537 //Use an ifstream file to run a check on the provided file's signature
1538 // to see if it is an HDF4 file. - kln 11/20/23
1539
1540 if (file_fqn.empty()) {
1541 stringstream msg;
1542 msg << "HDF4 input file name must be provided (-f <input>) and be a fully qualified path name." << endl;
1543 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
1544 }
1545
1546 std::ifstream file(file_fqn, ios::binary);
1547 auto errnum = errno;
1548 if (!file) // This is same as if(file.fail()){...}
1549 {
1550 stringstream msg;
1551 msg << "Encountered a Read/writing error when attempting to open the file: " << file_fqn << endl;
1552 msg << "* strerror(errno): " << strerror(errnum) << endl;
1553 msg << "* failbit: " << (((file.rdstate() & std::ifstream::failbit) != 0) ? "true" : "false") << endl;
1554 msg << "* badbit: " << (((file.rdstate() & std::ifstream::badbit) != 0) ? "true" : "false") << endl;
1555 msg << "Things to check:" << endl;
1556 msg << "* Does the file exist at expected location?" << endl;
1557 msg << "* Does your user have permission to read the file?" << endl;
1558 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
1559 }
1560
1561 //HDF4 signature:
1562 int status = Hishdf(file_fqn.c_str());
1563
1564 //Check if file is NOT an HDF4 file
1565 if (status != 1) {
1566 stringstream msg;
1567 msg << "The provided file: " << file_fqn << " - ";
1568 msg << "is not an HDF4 file" << endl;
1569 throw BESInternalFatalError(msg.str(), __FILE__, __LINE__);
1570 }
1571}
1572
1573
1581static string recreate_cmdln_from_args(int argc, char *argv[])
1582{
1583 stringstream ss;
1584 for(int i=0; i<argc; i++) {
1585 if (i > 0)
1586 ss << " ";
1587 ss << argv[i];
1588 }
1589 return ss.str();
1590}
1591
1597std::string what_time_is_it(){
1598 // Get current time as a time_point
1599 auto now = std::chrono::system_clock::now();
1600
1601 // Convert to system time (time_t)
1602 auto time_t_now = std::chrono::system_clock::to_time_t(now);
1603
1604 // Convert to tm structure (GMT time)
1605 struct tm tbuf{};
1606 const std::tm* gmt_time = gmtime_r(&time_t_now, &tbuf);
1607
1608 // Format the time using a stringstream
1609 std::stringstream ss;
1610 ss << std::put_time(gmt_time, "%Y-%m-%dT%H:%M:%SZ");
1611
1612 return ss.str();
1613}
1614
1615
1624void inject_version_and_configuration_worker( DMRpp *dmrpp, const string &bes_conf_doc, const string &invocation)
1625{
1626 dmrpp->set_version(CVER);
1627
1628 // Build the version attributes for the DMR++
1629 auto version_unique = make_unique<D4Attribute>("build_dmrpp_metadata", StringToD4AttributeType("container"));
1630 auto version = version_unique.get();
1631
1632 auto creation_date_unique = make_unique<D4Attribute>("created", StringToD4AttributeType("string"));
1633 auto creation_date = creation_date_unique.get();
1634 creation_date->add_value(what_time_is_it());
1635 version->attributes()->add_attribute_nocopy(creation_date_unique.release());
1636
1637 auto build_dmrpp_version_unique = make_unique<D4Attribute>("build_dmrpp", StringToD4AttributeType("string"));
1638 auto build_dmrpp_version = build_dmrpp_version_unique.get();
1639 build_dmrpp_version->add_value(CVER);
1640 version->attributes()->add_attribute_nocopy(build_dmrpp_version_unique.release());
1641
1642 auto bes_version_unique = make_unique<D4Attribute>("bes", StringToD4AttributeType("string"));
1643 auto bes_version = bes_version_unique.get();
1644 bes_version->add_value(CVER);
1645 version->attributes()->add_attribute_nocopy(bes_version_unique.release());
1646
1647 stringstream ldv;
1648 ldv << libdap_name() << "-" << libdap_version();
1649 auto libdap4_version_unique = make_unique<D4Attribute>("libdap", StringToD4AttributeType("string"));
1650 auto libdap4_version = libdap4_version_unique.get();
1651 libdap4_version->add_value(ldv.str());
1652 version->attributes()->add_attribute_nocopy(libdap4_version_unique.release());
1653
1654 if(!bes_conf_doc.empty()) {
1655 // Add the BES configuration used to create the base DMR
1656 auto config_unique = make_unique<D4Attribute>("configuration", StringToD4AttributeType("string"));
1657 auto config = config_unique.get();
1658 config->add_value(bes_conf_doc);
1659 version->attributes()->add_attribute_nocopy(config_unique.release());
1660 }
1661
1662 if(!invocation.empty()) {
1663 // How was build_dmrpp invoked?
1664 auto invoke_unique = make_unique<D4Attribute>("invocation", StringToD4AttributeType("string"));
1665 auto invoke = invoke_unique.get();
1666 invoke->add_value(invocation);
1667 version->attributes()->add_attribute_nocopy(invoke_unique.release());
1668 }
1669 // Inject version and configuration attributes into DMR here.
1670 dmrpp->root()->attributes()->add_attribute_nocopy(version_unique.release());
1671}
1672
1673
1687 void inject_version_and_configuration(int argc, char **argv, const string &bes_conf_file_used_to_create_dmr, DMRpp *dmrpp)
1688{
1689 string bes_configuration;
1690 string invocation;
1691 if(!bes_conf_file_used_to_create_dmr.empty()) {
1692 // Add the BES configuration used to create the base DMR
1693 TheBESKeys::ConfigFile = bes_conf_file_used_to_create_dmr;
1694 bes_configuration = TheBESKeys::TheKeys()->get_as_config();
1695 }
1696
1697 invocation = recreate_cmdln_from_args(argc, argv);
1698
1699 inject_version_and_configuration_worker(dmrpp, bes_configuration, invocation);
1700
1701}
1702
1713void inject_version_and_configuration(DMRpp *dmrpp)
1714{
1715 bool found;
1716
1717 string bes_configuration;
1718 string invocation;
1719 if(!TheBESKeys::ConfigFile.empty()) {
1720 // Add the BES configuration used to create the base DMR
1721 bes_configuration = TheBESKeys::TheKeys()->get_as_config();
1722 }
1723
1724 // How was build_dmrpp invoked?
1725 invocation = BESContextManager::TheManager()->get_context(INVOCATION_CONTEXT, found);
1726
1727 // Do the work now...
1728 inject_version_and_configuration_worker(dmrpp, bes_configuration, invocation);
1729}
1730
1743void build_dmrpp_from_dmr_file(const string &dmrpp_href_value, const string &dmr_filename, const string &h4_file_fqn,
1744 bool add_production_metadata, bool disable_missing_data, const string &bes_conf_file_used_to_create_dmr, int argc, char *argv[])
1745{
1746 // Get dmr:
1747 DMRpp dmrpp;
1748 DmrppTypeFactory dtf;
1749 dmrpp.set_factory(&dtf);
1750
1751 ifstream in(dmr_filename.c_str());
1752 D4ParserSax2 parser;
1753 parser.intern(in, &dmrpp, false);
1754
1755 add_chunk_information(h4_file_fqn, &dmrpp,disable_missing_data);
1756
1757 if (add_production_metadata) {
1758 inject_version_and_configuration(argc,argv,bes_conf_file_used_to_create_dmr,&dmrpp);
1759 }
1760
1761 XMLWriter writer;
1762 dmrpp.print_dmrpp(writer, dmrpp_href_value);
1763 cout << writer.get_doc();
1764
1765}
1766
1767
1768} // namespace build_dmrpp_util
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
std::string get_as_config() const
static TheBESKeys * TheKeys()
Access to the singleton.
Definition TheBESKeys.cc:85
static std::string ConfigFile
Definition TheBESKeys.h:117
virtual void print_dmrpp(libdap::XMLWriter &xml, const std::string &href="", bool constrained=false, bool print_chunks=true)
Print the DMR++ response.
Definition DMRpp.cc:75