bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
Chunk.h
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of the BES
4
5// Copyright (c) 2016 OPeNDAP, Inc.
6// Author: Nathan Potter <ndp@opendap.org>
7//
8// This library is free software; you can redistribute it and/or
9// modify it under the terms of the GNU Lesser General Public
10// License as published by the Free Software Foundation; either
11// version 2.1 of the License, or (at your option) any later version.
12//
13// This library is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// Lesser General Public License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23
24#ifndef _Chunk_h
25#define _Chunk_h 1
26
27#include <string>
28#include <utility>
29#include <vector>
30#include <memory>
31
32// BES
33#include "url_impl.h"
34
35// libdap4
36#include <libdap/util.h>
37
38
39// This is used to track access to 'cloudydap' accesses in the S3 logs
40// by adding a query string that will show up in those logs. This is
41// activated by using a special BES context with the name 'cloudydap.'
42//
43// We disabled this (via ENABLE_TRACKING_QUERY_PARAMETER) because it
44// used regexes and was a performance killer. jhrg 4/22/22
45#define S3_TRACKING_CONTEXT "cloudydap"
46#define ENABLE_TRACKING_QUERY_PARAMETER 0
47
48namespace dmrpp {
49
51 int8_t int8;
52 int16_t int16;
53 int32_t int32;
54 int64_t int64;
55
56 uint8_t uint8;
57 uint16_t uint16;
58 uint32_t uint32;
59 uint64_t uint64;
60
61 float f;
62 double d;
63};
64
65// Callback functions used by chunk readers
66size_t chunk_header_callback(char *buffer, size_t size, size_t nitems, void *data);
67size_t chunk_write_data(void *buffer, size_t size, size_t nmemb, void *data);
68
69void process_s3_error_response(const std::shared_ptr<http::url> &data_url, const std::string &xml_message);
70
77class Chunk {
78private:
79 std::shared_ptr<http::url> d_data_url;
80 std::string d_query_marker;
81 std::string d_byte_order;
82 std::string d_fill_value;
83 unsigned long long d_size{0};
84 unsigned long long d_offset{0};
85 unsigned long long direct_io_offset{0};
86 unsigned int d_filter_mask{0};
87
88 // The following two members mean this chunk is actually a linked block.
89 // We don't need to remember chunk position but to remember the linked block index.
90 bool linked_block=false;
91 unsigned int linked_block_index = 0;
92
93 // The following two memebers mean this chunk contains multiple linked blocks.
94 // This is still a chunk but inside this chunk it contains multiple linked blocks.
95 // We will record this chunk multiple times with different multi_linked_block_index_in_dmrpp_file.
96 // This is for generating the dmrpp file.
97 bool multi_linked_blocks =false;
98 unsigned int multi_linked_block_index_in_dmrpp_file=0;
99
100 std::vector<std::pair<unsigned long long,unsigned long long>> mlb_offset_lengths;
101
102 bool d_uses_fill_value{false};
103 libdap::Type d_fill_value_type{libdap::dods_null_c};
104 std::vector<std::pair<libdap::Type,int>> compound_udf_type_elms;
105 unsigned int struct_size = 0;
106
107 std::vector<unsigned long long> d_chunk_position_in_array;
108
109 // These are used only during the libcurl callback; they are not duplicated by the
110 // copy ctor or assignment operator.
111
128 bool d_read_buffer_is_mine {true};
129 unsigned long long d_bytes_read {0};
130 char *d_read_buffer {nullptr};
131 unsigned long long d_read_buffer_size {0};
132 bool d_is_read {false};
133 bool d_is_inflated {false};
134 std::string d_response_content_type;
135
136 friend class ChunkTest;
137 friend class DmrppCommonTest;
138 friend class MockChunk;
139
140 unsigned int obtain_compound_udf_type_size() const;
141 unsigned int get_value_size(libdap::Type);
142 const char* get_value_ptr(fill_value &,libdap::Type, const std::string &,bool);
143 void obtain_fv_strs(vector<string>& fv_str, const string &v) const;
144 void get_compound_fvalue(const string &v, vector<char> &compound_fvalue) const;
145
146protected:
147
148 void _duplicate(const Chunk &bs)
149 {
150 d_size = bs.d_size;
151 d_offset = bs.d_offset;
152 direct_io_offset = bs.direct_io_offset;
153 d_filter_mask = bs.d_filter_mask;
154 linked_block = bs.linked_block;
155 linked_block_index = bs.linked_block_index;
156 multi_linked_block_index_in_dmrpp_file = bs.multi_linked_block_index_in_dmrpp_file;
157 multi_linked_blocks = bs.multi_linked_blocks;
158 d_data_url = bs.d_data_url;
159 d_byte_order = bs.d_byte_order;
160 d_fill_value = bs.d_fill_value;
161 d_uses_fill_value = bs.d_uses_fill_value;
162 d_query_marker = bs.d_query_marker;
163 d_chunk_position_in_array = bs.d_chunk_position_in_array;
164 }
165
166public:
167
177 Chunk() = default;
178
189 Chunk(std::shared_ptr<http::url> data_url, std::string order, unsigned long long size,
190 unsigned long long offset, const std::string &pia_str = "") :
191 d_data_url(std::move(data_url)), d_byte_order(std::move(order)),
192 d_size(size), d_offset(offset)
193 {
194#if ENABLE_TRACKING_QUERY_PARAMETER
196#endif
197 set_position_in_array(pia_str);
198 }
199
210 Chunk(std::string order, unsigned long long size, unsigned long long offset, const std::string &pia_str = "") :
211 d_byte_order(std::move(order)), d_size(size), d_offset(offset) {
212#if ENABLE_TRACKING_QUERY_PARAMETER
214#endif
215 set_position_in_array(pia_str);
216 }
217
218 Chunk(std::shared_ptr<http::url> data_url, std::string order, unsigned long long size,
219 unsigned long long offset, bool lb, unsigned int lblock_index, const std::string &pia_str = "") :
220 d_data_url(std::move(data_url)), d_byte_order(std::move(order)),
221 d_size(size), d_offset(offset), linked_block(lb),linked_block_index(lblock_index)
222 {
223#if ENABLE_TRACKING_QUERY_PARAMETER
225#endif
226 set_position_in_array(pia_str);
227 }
228
229 Chunk(std::string order, unsigned long long size, unsigned long long offset,
230 bool lb, unsigned int lblock_index, const std::string &pia_str = "") :
231 d_byte_order(std::move(order)), d_size(size), d_offset(offset),
232 linked_block(lb), linked_block_index(lblock_index) {
233#if ENABLE_TRACKING_QUERY_PARAMETER
235#endif
236 set_position_in_array(pia_str);
237 }
238
239 // For build_dmrpp that has multiple linked blocks in a chunk
240 Chunk(std::shared_ptr<http::url> data_url, std::string order, unsigned long long size, unsigned long long offset,
241 const std::vector<unsigned long long> &pia_vec,bool is_multi_lb, unsigned int lb_index) :
242 d_data_url(std::move(data_url)), d_byte_order(std::move(order)),
243 d_size(size), d_offset(offset), multi_linked_blocks(is_multi_lb), multi_linked_block_index_in_dmrpp_file(lb_index) {
244#if ENABLE_TRACKING_QUERY_PARAMETER
246#endif
247 set_position_in_array(pia_vec);
248 }
249
250 Chunk(std::string order, unsigned long long size, unsigned long long offset,
251 const std::vector<unsigned long long> &pia_vec,bool is_multi_lb, unsigned int lb_index) :
252 d_byte_order(std::move(order)),
253 d_size(size), d_offset(offset), multi_linked_blocks(is_multi_lb), multi_linked_block_index_in_dmrpp_file(lb_index) {
254#if ENABLE_TRACKING_QUERY_PARAMETER
256#endif
257 set_position_in_array(pia_vec);
258 }
259
260 // For retrieving dmrpp that has multiple linked blocks in a chunk
261 Chunk(std::shared_ptr<http::url> data_url, std::string order,
262 const std::string &pia_vec,const std::vector<std::pair<unsigned long long, unsigned long long>> &lb_ol) :
263 d_data_url(std::move(data_url)), d_byte_order(std::move(order))
264 {
265#if ENABLE_TRACKING_QUERY_PARAMETER
267#endif
268 set_position_in_array(pia_vec);
269 set_multi_linked_offset_length(lb_ol);
270 if (lb_ol.empty()==false)
271 multi_linked_blocks = true;
272 }
273
274 Chunk(std::string order,
275 const std::string &pia_vec,const std::vector<std::pair<unsigned long long, unsigned long long>> &lb_ol) :
276 d_byte_order(std::move(order))
277 {
278#if ENABLE_TRACKING_QUERY_PARAMETER
280#endif
281 set_position_in_array(pia_vec);
282 set_multi_linked_offset_length(lb_ol);
283 }
284
295 Chunk(std::shared_ptr<http::url> data_url, std::string order, unsigned long long size, unsigned long long offset,
296 const std::vector<unsigned long long> &pia_vec) :
297 d_data_url(std::move(data_url)), d_byte_order(std::move(order)),
298 d_size(size), d_offset(offset) {
299#if ENABLE_TRACKING_QUERY_PARAMETER
301#endif
302 set_position_in_array(pia_vec);
303 }
304
305 Chunk(std::shared_ptr<http::url> data_url, std::string order, unsigned long long size, unsigned long long offset,
306 unsigned int filter_mask, const std::vector<unsigned long long> &pia_vec) :
307 d_data_url(std::move(data_url)), d_byte_order(std::move(order)),
308 d_size(size), d_offset(offset), d_filter_mask(filter_mask) {
309#if ENABLE_TRACKING_QUERY_PARAMETER
311#endif
312 set_position_in_array(pia_vec);
313 }
324 Chunk(std::string order, unsigned long long size, unsigned long long offset,
325 unsigned int filter_mask, const std::vector<unsigned long long> &pia_vec) :
326 d_byte_order(std::move(order)), d_size(size), d_offset(offset), d_filter_mask(filter_mask) {
327#if ENABLE_TRACKING_QUERY_PARAMETER
329#endif
330 set_position_in_array(pia_vec);
331 }
332
333 Chunk(std::string order, unsigned long long size, unsigned long long offset,
334 const std::vector<unsigned long long> &pia_vec) :
335 d_byte_order(std::move(order)), d_size(size), d_offset(offset) {
336#if ENABLE_TRACKING_QUERY_PARAMETER
338#endif
339 set_position_in_array(pia_vec);
340 }
341 Chunk(std::string order, std::string fill_value, libdap::Type fv_type, unsigned long long chunk_size, std::vector<unsigned long long> pia) :
342 d_byte_order(std::move(order)), d_fill_value(std::move(fill_value)), d_size(chunk_size),
343 d_uses_fill_value(true), d_fill_value_type(fv_type), d_chunk_position_in_array(std::move(pia)) {
344 }
345
346 // Add a constructor when the chunk_offset is provided and chunk_position_in_array is not necessary. KY 2023-02-17
347 Chunk(std::string order, std::string fill_value, libdap::Type fv_type, unsigned long long size,
348 unsigned long long offset, const std::string &pia_str = "") :
349 d_byte_order(std::move(order)),d_fill_value(std::move(fill_value)),
350 d_size(size), d_offset(offset), d_uses_fill_value(true), d_fill_value_type(fv_type)
351 {
352#if ENABLE_TRACKING_QUERY_PARAMETER
354#endif
355 set_position_in_array(pia_str);
356 }
357
358 Chunk(std::string order, std::string fill_value, libdap::Type fv_type, unsigned long long chunk_size, std::vector<unsigned long long> pia, const std::vector<std::pair<libdap::Type,int>> & compound_udf_type_elms) :
359 d_byte_order(std::move(order)), d_fill_value(std::move(fill_value)), d_size(chunk_size),
360 d_uses_fill_value(true), d_fill_value_type(fv_type), d_chunk_position_in_array(std::move(pia)) {
361 set_compound_udf_info(compound_udf_type_elms);
362
363 }
364
365 Chunk(std::string order, std::string fill_value, libdap::Type fv_type, unsigned long long chunk_size, std::vector<unsigned long long> pia, unsigned int s_size) :
366 d_byte_order(std::move(order)), d_fill_value(std::move(fill_value)), d_size(chunk_size),
367 d_uses_fill_value(true), d_fill_value_type(fv_type), struct_size(s_size),d_chunk_position_in_array(std::move(pia)) {
368
369 }
370
371 Chunk(const Chunk &h4bs)
372 {
373 _duplicate(h4bs);
374 }
375
376 virtual ~Chunk()
377 {
378 if(d_read_buffer_is_mine)
379 delete[] d_read_buffer;
380 d_read_buffer = nullptr;
381 }
382
387 Chunk &operator=(const Chunk &rhs)
388 {
389 if (this == &rhs) return *this;
390
391 _duplicate(rhs);
392
393 return *this;
394 }
395
397 virtual std::string get_response_content_type() { return d_response_content_type; }
398
400 void set_response_content_type(const std::string &ct) { d_response_content_type = ct; }
401
403 virtual std::string get_byte_order() { return d_byte_order; }
404
406 virtual unsigned long long get_size() const
407 {
408 return d_size;
409 }
410
412 virtual unsigned long long get_offset() const
413 {
414 return d_offset;
415 }
416
417 virtual void set_size(unsigned long long storage_size)
418 {
419 d_size = storage_size;
420 }
421
422 virtual unsigned long long get_direct_io_offset() const
423 {
424 return direct_io_offset;
425 }
426
427 virtual void set_direct_io_offset(unsigned long long dio_offset) {
428 direct_io_offset = dio_offset;
429 }
430
431 virtual unsigned int get_filter_mask() const
432 {
433 return d_filter_mask;
434 }
435
436 virtual bool get_linked_block() const
437 {
438 return linked_block;
439 }
440
441 virtual unsigned int get_linked_block_index() const
442 {
443 return linked_block_index;
444 }
445
446 virtual bool get_multi_linked_blocks() const
447 {
448 return multi_linked_blocks;
449 }
450
451 virtual unsigned int get_multi_linked_block_index_in_dmrpp_file() const
452 {
453 return multi_linked_block_index_in_dmrpp_file;
454 }
455
456
458 virtual bool get_uses_fill_value() const { return d_uses_fill_value; }
459 virtual libdap::Type get_fill_value_type() const { return d_fill_value_type; }
460
462 virtual std::string get_fill_value() const { return d_fill_value; }
463
465 virtual std::shared_ptr<http::url> get_data_url() const;
466
468 virtual void set_data_url(std::shared_ptr<http::url> data_url)
469 {
470 d_data_url = std::move(data_url);
471 }
472
473 virtual bool get_read_buffer_is_mine() { return d_read_buffer_is_mine; }
475 virtual unsigned long long get_bytes_read() const
476 {
477 return d_bytes_read;
478 }
479
484 virtual void set_bytes_read(unsigned long long bytes_read)
485 {
486 d_bytes_read = bytes_read;
487 }
488
503 virtual void set_rbuf_to_size()
504 {
505 if(d_read_buffer_is_mine)
506 delete[] d_read_buffer;
507 d_read_buffer = new char[d_size];
508 d_read_buffer_size = d_size;
509 d_read_buffer_is_mine = true;
511 }
512
515 virtual char *get_rbuf()
516 {
517 return d_read_buffer;
518 }
519
531 void set_read_buffer(char *buf, unsigned long long buf_size, unsigned long long bytes_read = 0,
532 bool assume_ownership = true ) {
533 if(d_read_buffer_is_mine)
534 delete[] d_read_buffer;
535 d_read_buffer_is_mine = assume_ownership;
536 d_read_buffer = buf;
537 d_read_buffer_size = buf_size;
538
539 set_bytes_read(bytes_read);
540 }
541
543 virtual unsigned long long get_rbuf_size() const
544 {
545 return d_read_buffer_size;
546 }
547
549 virtual const std::vector<unsigned long long> &get_position_in_array() const
550 {
551 return d_chunk_position_in_array;
552 }
553
555
556 void set_position_in_array(const std::string &pia);
557 void set_position_in_array(const std::vector<unsigned long long> &pia);
558
559 void set_multi_linked_offset_length(const std::vector<std::pair<unsigned long long,unsigned long long>> &lb_offset_lengths){
560
561 for (const auto &lb_ol:lb_offset_lengths) {
562
563 std::pair<unsigned long long,unsigned long long> temp_pair;
564 temp_pair.first = lb_ol.first;
565 temp_pair.second = lb_ol.second;
566 mlb_offset_lengths.push_back(temp_pair);
567 }
568
569 }
570 void obtain_multi_linked_offset_length(vector<std::pair<unsigned long long, unsigned long long>> & cur_chunk_lb_offset_length) const{
571
572 for (const auto &lb_ol:mlb_offset_lengths) {
573
574 std::pair<unsigned long long,unsigned long long> temp_pair;
575 temp_pair.first = lb_ol.first;
576 temp_pair.second = lb_ol.second;
577 cur_chunk_lb_offset_length.push_back(temp_pair);
578 }
579
580 }
581
582 void set_compound_udf_info(const std::vector<std::pair<libdap::Type,int>> &structure_type_element){
583
584 for (const auto &ste:structure_type_element) {
585
586 std::pair<libdap::Type,int> temp_pair;
587 temp_pair.first = ste.first;
588 temp_pair.second = ste.second;
589 compound_udf_type_elms.push_back(temp_pair);
590 }
591
592 }
593
594 virtual void read_chunk();
595 virtual void read_chunk_dio();
596 virtual void load_fill_values();
597
598 virtual void filter_chunk(const std::string &filters, unsigned long long chunk_size, unsigned long long elem_width);
599
600 virtual bool get_is_read() { return d_is_read; }
601 virtual void set_is_read(bool state) { d_is_read = state; }
602
603 virtual std::string get_curl_range_arg_string();
604
605 static void parse_chunk_position_in_array_string(const std::string &pia, std::vector<unsigned long long> &pia_vect);
606
607 virtual void dump(std::ostream & strm) const;
608
609 virtual std::string to_string() const;
610};
611
612} // namespace dmrpp
613
614#endif // _Chunk_h
virtual void set_bytes_read(unsigned long long bytes_read)
Set the size of this Chunk's data block.
Definition Chunk.h:484
Chunk(std::string order, unsigned long long size, unsigned long long offset, unsigned int filter_mask, const std::vector< unsigned long long > &pia_vec)
Get a chunk initialized with values, the data URl will not be set.
Definition Chunk.h:324
virtual void dump(std::ostream &strm) const
Definition Chunk.cc:1323
Chunk(std::string order, unsigned long long size, unsigned long long offset, const std::string &pia_str="")
Get a chunk initialized with values, the data URL will not be set.
Definition Chunk.h:210
virtual char * get_rbuf()
Definition Chunk.h:515
virtual void read_chunk()
Definition Chunk.cc:1227
void add_tracking_query_param()
Modify this chunk's data URL so that it includes tracking info.
Definition Chunk.cc:517
virtual std::string get_response_content_type()
Get the response type of the last response.
Definition Chunk.h:397
virtual std::string get_byte_order()
Definition Chunk.h:403
virtual std::string get_curl_range_arg_string()
Returns a curl range argument. The libcurl requires a string argument for range-ge activitys,...
Definition Chunk.cc:496
virtual std::shared_ptr< http::url > get_data_url() const
Get the data URL for this chunk.
Definition Chunk.cc:1357
Chunk(std::shared_ptr< http::url > data_url, std::string order, unsigned long long size, unsigned long long offset, const std::string &pia_str="")
Get a chunk initialized with values.
Definition Chunk.h:189
virtual void set_data_url(std::shared_ptr< http::url > data_url)
Set the data url for this Chunk's data block.
Definition Chunk.h:468
virtual std::string get_fill_value() const
Definition Chunk.h:462
Chunk & operator=(const Chunk &rhs)
Definition Chunk.h:387
virtual void set_rbuf_to_size()
Allocates the internal read buffer to be d_size bytes.
Definition Chunk.h:503
virtual unsigned long long get_offset() const
Definition Chunk.h:412
virtual unsigned long long get_bytes_read() const
Definition Chunk.h:475
void set_position_in_array(const std::string &pia)
parse the chunk position string
Definition Chunk.cc:469
virtual unsigned long long get_rbuf_size() const
Definition Chunk.h:543
virtual unsigned long long get_size() const
Definition Chunk.h:406
Chunk()=default
Get an empty chunk.
Chunk(std::shared_ptr< http::url > data_url, std::string order, unsigned long long size, unsigned long long offset, const std::vector< unsigned long long > &pia_vec)
Get a chunk initialized with values.
Definition Chunk.h:295
virtual const std::vector< unsigned long long > & get_position_in_array() const
Definition Chunk.h:549
virtual bool get_uses_fill_value() const
Definition Chunk.h:458
void set_read_buffer(char *buf, unsigned long long buf_size, unsigned long long bytes_read=0, bool assume_ownership=true)
Set the target read buffer for this chunk.
Definition Chunk.h:531
virtual void filter_chunk(const std::string &filters, unsigned long long chunk_size, unsigned long long elem_width)
filter data in the chunk
Definition Chunk.cc:650
virtual void load_fill_values()
Load the chunk with fill values - temporary implementation.
Definition Chunk.cc:1163
void set_response_content_type(const std::string &ct)
Set the response type of the last response.
Definition Chunk.h:400
STL class.