bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
DmrppCommon.h
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of the BES
5
6// Copyright (c) 2016 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#ifndef _dmrpp_common_h
26#define _dmrpp_common_h 1
27
28#include <string>
29#include <vector>
30#include <memory>
31#include <utility>
32
33#define PUGIXML_NO_XPATH
34#define PUGIXML_HEADER_ONLY
35#include <pugixml.hpp>
36
37#include <libdap/Type.h>
38
39namespace libdap {
40class DMR;
41class BaseType;
42class D4BaseTypeFactory;
43class D4Group;
44class D4Attributes;
45class D4EnumDef;
46class D4Dimension;
47class XMLWriter;
48}
49
50namespace http {
51class url;
52}
53
54namespace pugi {
55class xml_node;
56}
57
58namespace dmrpp {
59
60class DMZ;
61class Chunk;
62class DmrppArray;
63class SuperChunk;
64
65void join_threads(pthread_t threads[], unsigned int num_threads);
66
71 int8_t int8;
72 int16_t int16;
73 int32_t int32;
74 int64_t int64;
75
76 uint8_t uint8;
77 uint16_t uint16;
78 uint32_t uint32;
79 uint64_t uint64;
80
81 float f;
82 double d;
83};
84
97class DmrppCommon {
98
99 friend class DmrppCommonTest;
100 friend class DmrppParserTest;
101 friend class DMZTest;
102
103 bool d_compact = false;
104 bool d_missing_data = false;
105 bool d_disable_dio = false;
106
107 std::string d_filters;
108 std::string d_byte_order;
109 std::vector<unsigned long long> d_chunk_dimension_sizes;
110 std::vector<std::shared_ptr<Chunk>> d_chunks;
111 bool d_twiddle_bytes = false;
112
113 // These indicate that the chunks or attributes have been loaded into the
114 // variable when the DMR++ handler is using lazy-loading of this data.
115 bool d_chunks_loaded = false;
116 bool d_attributes_loaded = false;
117
118 bool d_uses_fill_value {false};
119 // Convert fill_value to the correct numeric datatype at the time of use. jhrg 4/24/22
120 std::string d_fill_value_str;
121 libdap::Type d_fill_value_type{libdap::dods_null_c};
122 fill_value_union d_fill_value;
123
124 // We need this parameter to handle the case when the variable only contains one chunk
125 // and the data in that chunk are all fill_value. It is OK to compress this filled chunk,
126 // However, for some data format like HDF5, it figures out that there is only one filled
127 // chunk data for this variable, so the chunk is not compressed at all. If Dmrpp applies
128 // de-compression to this variable, an error will occur. So we need this parameter to
129 // turn off the de-compression filter for this case. KY 07/28/22
130 bool d_one_chunk_fill_value {false};
131
132 // Each instance of DmrppByte, ..., holds a shared pointer to the DMZ so that
133 // it can fetch more information from the XML if needed - this is how the lazy-load
134 // feature is implemented. The xml_node object is used to simplify finding where
135 // in the XML information about a variable is stored - to limit searching the
136 // document, the code caches the XML node.
137 std::shared_ptr<DMZ> d_dmz;
138 pugi::xml_node d_xml_node;
139
140 // var_chunks_storage_size only applies to the chunking storage.
141 // For compact and contiguous storages, this variable is always 0 since there are no chunks.
142 unsigned long long var_chunks_storage_size = 0;
143 std::vector<unsigned int> deflate_levels;
144 bool processing_fv_chunks = false;
145 bool using_linked_block = false;
146 unsigned int total_linked_blocks =0;
147
148 bool multi_linked_blocks_chunk = false;
149
150 // Structure offset
151 std::vector<unsigned int> struct_offsets;
152protected:
153 virtual char *read_atomic(const std::string &name);
154 virtual char *read_atomic(const std::string &name, size_t & buf_size);
155 // This declaration allows code in the SuperChunky program to use the protected method.
156 // jhrg 10/25/21
157 friend void compute_super_chunks(dmrpp::DmrppArray *array, bool only_constrained, std::vector<dmrpp::SuperChunk *> &super_chunks);
158
159public:
160 static bool d_print_chunks;
161 static std::string d_dmrpp_ns;
162 static std::string d_ns_prefix;
163
164 DmrppCommon() = default;
165
166 explicit DmrppCommon(std::shared_ptr<DMZ> dmz) : d_dmz(std::move(dmz)) { }
167
168 DmrppCommon(const DmrppCommon &) = default;
169
170 virtual ~DmrppCommon()= default;
171
173 virtual std::string get_filters() const {
174 return d_filters;
175 }
176
177 void set_filter(const std::string &value);
178
179 const std::vector<unsigned int> & get_deflate_levels() const { return deflate_levels;}
180 void set_deflate_levels(const std::vector<unsigned int>& def_levels) {
181 for(const auto &def_level:def_levels)
182 deflate_levels.push_back(def_level);
183 }
184 void set_struct_offsets(const std::vector<unsigned int>& s_offs) {
185 for(const auto &s_o:s_offs)
186 struct_offsets.push_back(s_o);
187 }
188 const std::vector<unsigned int> & get_struct_offsets() const { return struct_offsets;}
189
190 void set_processing_fv_chunks() { processing_fv_chunks = true;}
191 bool get_processing_fv_chunks() const { return processing_fv_chunks; }
192
193 void set_using_linked_block() { using_linked_block = true;}
194 bool get_using_linked_block() const { return using_linked_block; }
195
196 void set_total_linked_blocks(unsigned tlbs) { total_linked_blocks = tlbs;}
197 unsigned int get_total_linked_blocks() const { return total_linked_blocks; }
198
199 virtual bool is_filters_empty() const {
200 return d_filters.empty();
201 }
202
204 virtual bool is_compact_layout() const {
205 return d_compact;
206 }
207
209 void set_compact(bool value) {
210 d_compact = value;
211 }
212
214 virtual bool is_missing_data() const {
215 return d_missing_data;
216 }
217
219 void set_missing_data(bool value) {
220 d_missing_data = value;
221 }
222
224 virtual bool is_multi_linked_blocks_chunk() const {
225 return multi_linked_blocks_chunk;
226 }
227
230 multi_linked_blocks_chunk = value;
231 }
232
233
235 virtual bool is_disable_dio() const {
236 return d_disable_dio;
237 }
238
240 void set_disable_dio(bool value) {
241 d_disable_dio = value;
242 }
243
245 virtual bool twiddle_bytes() const { return d_twiddle_bytes; }
246
247 // @brief Provide access to the DMZ instance bound to this variable
248 // virtual const std::shared_ptr<DMZ> &get_dmz() const { return d_dmz; }
249
251 virtual bool get_chunks_loaded() const { return d_chunks_loaded; }
252 virtual void set_chunks_loaded(bool state) { d_chunks_loaded = state; }
253
255 virtual bool get_attributes_loaded() const { return d_attributes_loaded; }
256 virtual void set_attributes_loaded(bool state) { d_attributes_loaded = state; }
257
258 virtual const pugi::xml_node &get_xml_node() const { return d_xml_node; }
259 virtual void set_xml_node(pugi::xml_node node) { d_xml_node = node; }
260
262 virtual const std::vector<std::shared_ptr<Chunk>> &get_immutable_chunks() const {
263 return d_chunks;
264 }
265
266 std::vector<std::shared_ptr<Chunk>> get_chunks() const { return d_chunks; }
267
270 virtual size_t get_chunks_size() const { return d_chunks.size(); }
271
274 virtual const std::vector<unsigned long long> &get_chunk_dimension_sizes() const {
275 return d_chunk_dimension_sizes;
276 }
277
280 virtual unsigned long long get_chunk_size_in_elements() const {
281 unsigned long long elements = 1;
282 for (auto d_chunk_dimension_size : d_chunk_dimension_sizes) {
283 elements *= d_chunk_dimension_size;
284 }
285
286 return elements;
287 }
288
290 virtual void set_uses_fill_value(bool ufv) { d_uses_fill_value = ufv; }
291
293 virtual void set_fill_value_string(const std::string &fv) { d_fill_value_str = fv; }
294
296 virtual void set_fill_value_type(libdap::Type t) { d_fill_value_type = t; }
297
299 virtual bool get_uses_fill_value() const { return d_uses_fill_value; }
300
302 virtual std::string get_fill_value() const { return d_fill_value_str; }
303
305 virtual libdap::Type get_fill_value_type() const { return d_fill_value_type; }
306
308 virtual void set_one_chunk_fill_value(bool ufv) { d_one_chunk_fill_value = ufv; }
309
311 virtual bool get_one_chunk_fill_value() const { return d_one_chunk_fill_value; }
312
313 void print_chunks_element(libdap::XMLWriter &xml, const std::string &name_space = "");
314
315 void print_compact_element(libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;
316 void print_missing_data_element(const libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;
317 void print_missing_data_element(const libdap::XMLWriter &xml, const std::string &name_space, const char *data, int length) const;
318 void print_special_structure_element(const libdap::XMLWriter &xml, const std::string &name_space = "", const std::string &encoded = "") const;
319
320 void print_dmrpp(libdap::XMLWriter &writer, bool constrained = false);
321
323 void set_chunk_dimension_sizes(const std::vector<unsigned long long> &chunk_dims) {
324 d_chunk_dimension_sizes.clear();
325 //d_chunk_dimension_sizes.resize(chunk_dims.size());
326 for (auto chunk_dim : chunk_dims) {
327 d_chunk_dimension_sizes.emplace_back(chunk_dim);
328 }
329 }
330
331 // These two functions duplicate code in DMZ but provides access to the DMZ::load_chunks()
332 // method without having to cast a BaseType to a DmrppCommon in order to use it. jhrg 11/12/21
333 virtual void load_chunks(libdap::BaseType *btp);
334 virtual void load_attributes(libdap::BaseType *btp);
335
336 virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string);
337
338 virtual void ingest_compression_type(const std::string &compression_type_string);
339
340 virtual void ingest_byte_order(const std::string &byte_order_string);
341 virtual std::string get_byte_order() const { return d_byte_order; }
342
343 // There are two main versions of add_chunk: One that takes a size and offset
344 // and one that takes a fill value. However, for each of those, there are versions
345 // that take a data URL (or not) and versions that take the 'chunk position in
346 // array' information as a string or as a vector< uint64_t >. Thus, there are
347 // a total of eight of these 'add_chunk()' functions. jhrg 4/22/22
348 virtual unsigned long add_chunk(
349 std::shared_ptr<http::url> d_data_url,
350 const std::string &byte_order,
351 unsigned long long size,
352 unsigned long long offset,
353 const std::string &position_in_array);
354
355 virtual unsigned long add_chunk(
356 std::shared_ptr<http::url> d_data_url,
357 const std::string &byte_order,
358 unsigned long long size,
359 unsigned long long offset,
360 unsigned int filter_mask,
361 const std::string &position_in_array);
362
363 virtual unsigned long add_chunk(
364 std::shared_ptr<http::url> d_data_url,
365 const std::string &byte_order,
366 unsigned long long size,
367 unsigned long long offset,
368 bool linked_block,
369 unsigned int linked_block_index);
370
371 virtual unsigned long add_chunk(
372 std::shared_ptr<http::url> d_data_url,
373 const std::string &byte_order,
374 unsigned long long size,
375 unsigned long long offset,
376 const std::vector<unsigned long long> &position_in_array, bool multi_linked_blocks, unsigned int multi_linked_block_index_in_dmrpp_file);
377
378
379 virtual unsigned long add_chunk(
380 std::shared_ptr<http::url> d_data_url,
381 const std::string &byte_order,
382 unsigned long long size,
383 unsigned long long offset,
384 const std::vector<unsigned long long> &position_in_array);
385
386
387 virtual unsigned long add_chunk(
388 std::shared_ptr<http::url> d_data_url,
389 const std::string &byte_order,
390 unsigned long long size,
391 unsigned long long offset,
392 unsigned int filter_mask,
393 const std::vector<unsigned long long> &position_in_array);
394
395
396 virtual unsigned long add_chunk(
397 const std::string &byte_order,
398 unsigned long long size,
399 unsigned long long offset,
400 const std::string &position_in_array);
401
402 virtual unsigned long add_chunk(
403 const std::string &byte_order,
404 unsigned long long size,
405 unsigned long long offset,
406 bool linked_block,
407 unsigned int linked_block_index);
408
409 // Multi-linked block handling for build_dmrpp
410 virtual unsigned long add_chunk(
411 const std::string &byte_order,
412 unsigned long long size,
413 unsigned long long offset,
414 const std::vector<unsigned long long> &position_in_array,
415 bool multi_linked_blocks,
416 unsigned int linked_block_index);
417
418
419 virtual unsigned long add_chunk(
420 const std::string &byte_order,
421 unsigned long long size,
422 unsigned long long offset,
423 const std::vector<unsigned long long> &position_in_array);
424
425 // Multi-linked block handling for retrieving the data
426
427 virtual unsigned long add_chunk(
428 std::shared_ptr<http::url> d_data_url,
429 const std::string &byte_order,
430 const std::string &position_in_array,
431 const std::vector<std::pair<unsigned long long, unsigned long long>> &lb_offset_length);
432
433 virtual unsigned long add_chunk(
434 const std::string &byte_order,
435 const std::string &position_in_array,
436 const std::vector<std::pair<unsigned long long, unsigned long long>> &lb_offset_length);
437
438 virtual unsigned long add_chunk(
439 const std::string &byte_order,
440 unsigned long long size,
441 unsigned long long offset,
442 unsigned int filter_mask,
443 const std::vector<unsigned long long> &position_in_array);
444
445 virtual unsigned long add_chunk(
446 const std::string &byte_order,
447 const std::string &fill_value,
448 libdap::Type fv_type,
449 unsigned long long chunk_size,
450 const std::vector<unsigned long long> &position_in_array);
451
452 // For structure, if we don't have structure offset information, we will calculate the structure size based on our best knowledge.
453 virtual unsigned long add_chunk(
454 const std::string &byte_order,
455 const std::string &fill_value,
456 libdap::Type fv_type,
457 unsigned long long chunk_size,
458 const std::vector<unsigned long long> &position_in_array,
459 const std::vector<std::pair<libdap::Type,int>> &structure_type_element);
460
461 // If we know the structure offset information, we will pass the structure offset to the chunk and this will be used for fill value handling.
462 virtual unsigned long add_chunk(
463 const std::string &byte_order,
464 const std::string &fill_value,
465 libdap::Type fv_type,
466 unsigned long long chunk_size,
467 const std::vector<unsigned long long> &position_in_array,
468 unsigned int struct_size);
469
470
471 void accumlate_storage_size(unsigned long long chunk_storage_size) {var_chunks_storage_size += chunk_storage_size; }
472 unsigned long long get_var_chunks_storage_size() const {return var_chunks_storage_size; }
473 virtual void dump(std::ostream & strm) const;
474};
475
476} // namespace dmrpp
477
478#endif // _dmrpp_common_h
479
Interface to hide the DMR++ information storage format.
Definition DMZ.h:74
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition DmrppArray.h:77
Size and offset information of data included in DMR++ files.
Definition DmrppCommon.h:97
static std::string d_ns_prefix
The XML namespace prefix to use.
virtual bool twiddle_bytes() const
Returns true if this object utilizes shuffle compression.
virtual void set_fill_value_string(const std::string &fv)
Set the fill value (using a string)
virtual libdap::Type get_fill_value_type() const
virtual bool is_multi_linked_blocks_chunk() const
Returns true if this object contains a chunk that have multiple linked blocks .
static bool d_print_chunks
if true, print_dap4() prints chunk elements
virtual bool is_compact_layout() const
Returns true if this object utilizes COMPACT layout.
virtual std::string get_fill_value() const
void set_multi_linked_blocks_chunk(bool value)
Set the value of the boolean variable that indicates this variable contains multiple linked blocks in...
virtual void ingest_compression_type(const std::string &compression_type_string)
Parses the text content of the XML element h4:chunkDimensionSizes into the internal vector<unsigned i...
void set_disable_dio(bool value)
Set the value of the compact property.
virtual void load_attributes(libdap::BaseType *btp)
Load the attribute information for this variable.
virtual bool is_missing_data() const
Returns true if this object describes the missing data.
virtual bool get_chunks_loaded() const
Have the chunks been loaded?
virtual bool get_one_chunk_fill_value() const
virtual size_t get_chunks_size() const
Use this when the number of chunks is needed.
static std::string d_dmrpp_ns
The DMR++ XML namespace.
void print_chunks_element(libdap::XMLWriter &xml, const std::string &name_space="")
Print the Chunk information.
virtual void set_uses_fill_value(bool ufv)
Set the uses_fill_value property.
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
virtual bool is_disable_dio() const
Returns true if this object describes the missing data.
virtual const std::vector< std::shared_ptr< Chunk > > & get_immutable_chunks() const
A const reference to the vector of chunks.
void set_chunk_dimension_sizes(const std::vector< unsigned long long > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Adds a chunk to the vector of chunk refs (byteStreams) and returns the size of the chunks internal ve...
void set_filter(const std::string &value)
Set the value of the filters property.
virtual void ingest_byte_order(const std::string &byte_order_string)
Parses the text content of the XML element chunks:byteOrder.
virtual bool get_uses_fill_value() const
virtual const std::vector< unsigned long long > & get_chunk_dimension_sizes() const
The chunk dimension sizes held in a const vector.
virtual void set_fill_value_type(libdap::Type t)
Set the libdap data type to use with the fill value.
void set_compact(bool value)
Set the value of the compact property.
void print_dmrpp(libdap::XMLWriter &writer, bool constrained=false)
Print the DMR++ response for the Scalar types.
virtual void load_chunks(libdap::BaseType *btp)
Load chunk information for this variable.
virtual unsigned long long get_chunk_size_in_elements() const
Get the number of elements in this chunk.
virtual std::string get_filters() const
Return the names of all the filters in the order they were applied.
void set_missing_data(bool value)
Set the value of the missing data.
void print_compact_element(libdap::XMLWriter &xml, const std::string &name_space="", const std::string &encoded="") const
Print the Compact base64-encoded information.
virtual bool get_attributes_loaded() const
Have the attributes been loaded?
virtual void set_one_chunk_fill_value(bool ufv)
Set the one_chunk_fill_value property.
virtual char * read_atomic(const std::string &name)
read method for the atomic types
A SuperChunk is a collection of contiguous Chunk objects along with optimized methods for data retrie...
Definition SuperChunk.h:44
Parse a URL into the protocol, host, path and query parts.
Definition url_impl.h:44
utility class for the HTTP catalog module
Definition TheBESKeys.h:51
hold the value used to fill empty chunks
Definition DmrppCommon.h:70