bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
HDF5BaseArray.cc
Go to the documentation of this file.
1// This file is part of hdf5_handler an HDF5 file handler for the OPeNDAP
2// data server.
3
4// Author: Kent Yang <myang6@hdfgroup.org>
5
6// Copyright (c) 2011-2023 The HDF Group, Inc. and OPeNDAP, Inc.
7//
8// This is free software; you can redistribute it and/or modify it under the
9// terms of the GNU Lesser General Public License as published by the Free
10// Software Foundation; either version 2.1 of the License, or (at your
11// option) any later version.
12//
13// This software is distributed in the hope that it will be useful, but
14// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16// License for more details.
17//
18// You should have received a copy of the GNU Lesser General Public
19// License along with this library; if not, write to the Free Software
20// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21//
22// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23// You can contact The HDF Group, Inc. at 410 E University Ave,
24// Suite 200, Champaign, IL 61820
41
42#include <iostream>
43#include <sstream>
44#include <cassert>
45#include <algorithm>
46#include <memory>
47#include <BESDebug.h>
48#include <libdap/InternalErr.h>
49
50#include "HDF5BaseArray.h"
51#include "HDF5RequestHandler.h"
52#include "ObjMemCache.h"
53
54using namespace std;
55using namespace libdap;
56
57// parse constraint expr. and make hdf5 coordinate point location.
58// return number of elements to read.
59int64_t
60HDF5BaseArray::format_constraint (int64_t *offset, int64_t *step, int64_t *count)
61{
62 int64_t nels = 1;
63 int id = 0;
64
65 Dim_iter p = dim_begin ();
66
67 while (p != dim_end ()) {
68
69 int64_t start = dimension_start_ll (p, true);
70 int64_t stride = dimension_stride_ll (p, true);
71 int64_t stop = dimension_stop_ll (p, true);
72
73 // Check for illegal constraint
74 if (start > stop) {
75 ostringstream oss;
76 oss << "Array/Grid hyperslab start point "<< start <<
77 " is greater than stop point " << stop <<".";
78 throw Error(malformed_expr, oss.str());
79 }
80
81 offset[id] = start;
82 step[id] = stride;
83 count[id] = ((stop - start) / stride) + 1; // count of elements
84 nels *= count[id]; // total number of values for variable
85
86 BESDEBUG ("h5",
87 "=format_constraint():"
88 << "id=" << id << " offset=" << offset[id]
89 << " step=" << step[id]
90 << " count=" << count[id]
91 << endl);
92
93 id++;
94 p++;
95 }// "while (p != dim_end ())"
96
97 return nels;
98}
99
100void HDF5BaseArray::write_nature_number_buffer(int rank, int64_t tnumelm) {
101
102 if (rank != 1)
103 throw InternalErr(__FILE__, __LINE__, "Currently the rank of the missing field should be 1");
104
105 if (tnumelm >DODS_INT_MAX)
106 throw InternalErr(__FILE__, __LINE__, "Currently the maximum number for this dimension is less than DODS_INT_MAX");
107
108 vector<int64_t>offset;
109 vector<int64_t>count;
110 vector<int64_t>step;
111 offset.resize(rank);
112 count.resize(rank);
113 step.resize(rank);
114
115
116 int64_t nelms = format_constraint(offset.data(), step.data(), count.data());
117
118 // Since we always assign the missing Z dimension as 32-bit
119 // integer, so no need to check the type. The missing Z-dim is always
120 // 1-D with natural number 1,2,3,....
121 vector<int>val;
122 val.resize(nelms);
123
124 if (nelms == tnumelm) {
125 for (int64_t i = 0; i < nelms; i++)
126 val[i] = (int)i;
127 set_value_ll(val.data(), nelms);
128 }
129 else {
130 for (int64_t i = 0; i < count[0]; i++)
131 val[i] = (int)(offset[0] + step[0] * i);
132 set_value_ll(val.data(), nelms);
133 }
134}
135
136//#if 0
137void HDF5BaseArray::read_data_from_mem_cache(H5DataType h5type, const vector<size_t> &h5_dimsizes,void* buf,const bool is_dap4){
138
139 BESDEBUG("h5", "Coming to read_data_from_mem_cache"<<endl);
140 vector<int64_t>offset;
141 vector<int64_t>count;
142 vector<int64_t>step;
143
144 auto ndims = (int)(h5_dimsizes.size());
145 if(ndims == 0)
146 throw InternalErr(__FILE__, __LINE__, "Currently we only support array numeric data in the cache, the number of dimension for this file is 0");
147
148
149 offset.resize(ndims);
150 count.resize(ndims);
151 step.resize(ndims);
152 int64_t nelms = format_constraint (offset.data(), step.data(), count.data());
153
154 // set the original position to the starting point
155 vector<size_t>pos(ndims,0);
156 for (int64_t i = 0; i< ndims; i++)
157 pos[i] = offset[i];
158
159
160 switch (h5type) {
161
162 case H5UCHAR:
163
164 {
165 vector<unsigned char> val;
167 buf,
168 ndims,
169 h5_dimsizes,
170 offset.data(),
171 step.data(),
172 count.data(),
173 &val,
174 pos,
175 0
176 );
177
178 set_value_ll ((dods_byte *) val.data(), nelms);
179 } // case H5UCHAR
180 break;
181
182 case H5CHAR:
183 {
184
185 vector<char>val;
187 buf,
188 ndims,
189 h5_dimsizes,
190 offset.data(),
191 step.data(),
192 count.data(),
193 &val,
194 pos,
195 0
196 );
197
198 if(false == is_dap4) {
199
200 vector<short>newval;
201 newval.resize(nelms);
202
203 for (int64_t counter = 0; counter < nelms; counter++)
204 newval[counter] = (short) (val[counter]);
205 set_value_ll ((dods_int16 *) val.data(), nelms);
206 }
207 else
208 set_value_ll ((dods_int8 *) val.data(), nelms);
209
210
211 } // case H5CHAR
212 break;
213
214 case H5INT16:
215 {
216 vector<short> val;
218 buf,
219 ndims,
220 h5_dimsizes,
221 offset.data(),
222 step.data(),
223 count.data(),
224 &val,
225 pos,
226 0
227 );
228
229
230 set_value_ll (val.data(), nelms);
231 }// H5INT16
232 break;
233
234
235 case H5UINT16:
236 {
237 vector<unsigned short> val;
239 buf,
240 ndims,
241 h5_dimsizes,
242 offset.data(),
243 step.data(),
244 count.data(),
245 &val,
246 pos,
247 0
248 );
249
250
251 set_value_ll (val.data(), nelms);
252 } // H5UINT16
253 break;
254
255 case H5INT32:
256 {
257 vector<int>val;
259 buf,
260 ndims,
261 h5_dimsizes,
262 offset.data(),
263 step.data(),
264 count.data(),
265 &val,
266 pos,
267 0
268 );
269
270 set_value_ll (val.data(), nelms);
271 } // case H5INT32
272 break;
273
274 case H5UINT32:
275 {
276 vector<unsigned int>val;
278 buf,
279 ndims,
280 h5_dimsizes,
281 offset.data(),
282 step.data(),
283 count.data(),
284 &val,
285 pos,
286 0
287 );
288
289 set_value_ll (val.data(), nelms);
290 }
291 break;
292 // Add the code for the CF option DAP4 support
293 // For the CF option DAP2 support, the code will
294 // not come here since 64-integer will be ignored
295 // in DAP2.
296 case H5INT64:
297 {
298 vector<long long>val;
300 buf,
301 ndims,
302 h5_dimsizes,
303 offset.data(),
304 step.data(),
305 count.data(),
306 &val,
307 pos,
308 0
309 );
310
311 set_value_ll ((dods_int64 *) val.data(), nelms);
312 } // case H5INT64
313 break;
314
315 case H5UINT64:
316 {
317 vector<unsigned long long>val;
319 buf,
320 ndims,
321 h5_dimsizes,
322 offset.data(),
323 step.data(),
324 count.data(),
325 &val,
326 pos,
327 0
328 );
329
330 set_value_ll ((dods_uint64 *) val.data(), nelms);
331 }
332 break;
333
334
335 case H5FLOAT32:
336 {
337 vector<float>val;
339 buf,
340 ndims,
341 h5_dimsizes,
342 offset.data(),
343 step.data(),
344 count.data(),
345 &val,
346 pos,
347 0
348 );
349 set_value_ll (val.data(), nelms);
350 }
351 break;
352
353
354 case H5FLOAT64:
355 {
356
357 vector<double>val;
359 buf,
360 ndims,
361 h5_dimsizes,
362 offset.data(),
363 step.data(),
364 count.data(),
365 &val,
366 pos,
367 0
368 );
369 set_value_ll (val.data(), nelms);
370 } // case H5FLOAT64
371 break;
372
373 default:
374 throw InternalErr(__FILE__,__LINE__,"Non-supported datatype");
375
376 }
377}
378
380//
381// \param input Input variable
382// \param dim dimension info of the input
383// \param start start indexes of each dim
384// \param stride stride of each dim
385// \param edge count of each dim
386// \param poutput output variable
387// \parrm index dimension index
388// \return 0 if successful. -1 otherwise.
389//
390template<typename T>
392 void* input,
393 int rank,
394 const vector<size_t> & dim,
395 int64_t start[],
396 int64_t stride[],
397 int64_t edge[],
398 vector<T> *poutput,
399 vector<size_t>& pos,
400 int index)
401{
402 for(int k=0; k<edge[index]; k++)
403 {
404 pos[index] = start[index] + k*stride[index];
405 if(index+1<rank)
406 subset(input, rank, dim, start, stride, edge, poutput,pos,index+1);
407 if(index==rank-1)
408 {
409 size_t cur_pos = INDEX_nD_TO_1D( dim, pos);
410 void* tempbuf = (void*)((char*)input+cur_pos*sizeof(T));
411 poutput->push_back(*(static_cast<T*>(tempbuf)));
412 //"poutput->push_back(input[HDF5CFUtil::INDEX_nD_TO_1D( dim, pos)]);"
413 }
414 } // end of for
415 return 0;
416} // end of template<typename T> static int subset
417
418size_t HDF5BaseArray::INDEX_nD_TO_1D (const std::vector < size_t > &dims,
419 const std::vector < size_t > &pos) const {
420 //
421 // "int a[10][20][30] // & a[1][2][3] == a + (20*30+1 + 30*2 + 1 *3)"
422 // "int b[10][2] // &b[1][1] == b + (2*1 + 1)"
423 //
424 if(dims.size () != pos.size ())
425 throw InternalErr(__FILE__,__LINE__,"dimension error in INDEX_nD_TO_1D routine.");
426 size_t sum = 0;
427 size_t start = 1;
428
429 for (size_t p = 0; p < pos.size (); p++) {
430 size_t m = 1;
431
432 for (size_t j = start; j < dims.size (); j++)
433 m *= dims[j];
434 sum += m * pos[p];
435 start++;
436 }
437 return sum;
438}
439
440// This routine will check if any section(separated by sep) of string cur_str is inside the vector str_list.
441// The first found string will be returned or empty string will return if not found in the whole cur_str.
442string HDF5BaseArray::
443check_str_sect_in_list(const vector<string>&str_list, const string &cur_str,const char sep) const {
444
445 string ret_str;
446 string::size_type start = 0;
447 string::size_type end = 0;
448 // Obtain the ret_str value
449 // The cur_str will be chopped into tokens separated by sep.
450 while ((end = cur_str.find(sep, start)) != string::npos) {
451 if(std::find(str_list.begin(),str_list.end(),cur_str.substr(start,end-start))!=
452 str_list.end()) {
453 ret_str = cur_str.substr(start,end-start);
454 break;
455 }
456 start = end + 1;
457 }
458
459 // We will not include the last sect (rightmost sect) of cur_str.
460#if 0
461 //if(ret_str != "") {
462 // if(ret_str == cur_str.substr(cur_str.find_last_of(sep)+1))
463 // ret_str ="";
464 //}
465 //
466#endif
467
468 return ret_str;
469
470}
471
472// This routine will check if there is any sub-string of the fullpath(fname+varname) that is exactly the subset of the fullpath with the same ending
473// of the fullpath is contained in the slist.
474// Examples: slist contains { /foo1/foovar foovar2 } fname is /temp/myfile/foo1/ varname is foovar. The rotuine will return true.
475// fname is /myfile/foo2/ varname is foovar. The routine will return false.
476bool HDF5BaseArray::
477check_var_cache_files(const vector<string>&slist, const string &fname,const string &varname) const {
478
479 bool ret_value = false;
480 if(fname=="" || varname=="")
481 return ret_value;
482
483 string fullpath;
484
485 if(fname[fname.size()-1] == '/') {
486 if(varname[0]!='/')
487 fullpath = fname+varname;
488 else
489 fullpath = fname.substr(0,fname.size()-1)+varname;
490 }
491 else {
492 if(varname[0]!='/')
493 fullpath = fname+'/'+varname;
494 else
495 fullpath = fname+varname;
496 }
497
498
499 for(unsigned int i = 0; i<slist.size();i++) {
500#if 0
501//cerr<<"fullpath is "<<fullpath <<endl;
502//cerr<<"slist[i] is "<<slist[i] <<endl;
503//cerr<<"fullpath - slist size"<<fullpath.size() -slist[i].size()<<endl;
504//cerr<<"fullpath.rfind(slist[i] is "<<fullpath.rfind(slist[i]) <<endl;
505#endif
506 if(fullpath.rfind(slist[i])==(fullpath.size()-slist[i].size())){
507 ret_value = true;
508 break;
509 }
510 }
511 return ret_value;
512}
513
514// Handle data when memory cache is turned on.
515void HDF5BaseArray::
516handle_data_with_mem_cache(H5DataType h5_dtype, size_t total_elems,const short cache_flag, const string & cache_key, const bool is_dap4) {
517
518 //
519 ObjMemCache * mem_data_cache= nullptr;
520 if(1 == cache_flag)
521 mem_data_cache = HDF5RequestHandler::get_srdata_mem_cache();
522 else if(cache_flag > 1) {
523 mem_data_cache = HDF5RequestHandler::get_lrdata_mem_cache();
524
525#if 0
526//cerr<<"coming to the large metadata cache "<<endl;
527//cerr<<"The cache key is "<<cache_key <<endl;
528
529// dump the values in the cache,keep this line to check if memory cache works.
530//mem_data_cache->dump(cerr);
531#endif
532
533 }
534
535
536 if(mem_data_cache == nullptr)
537 throw InternalErr(__FILE__,__LINE__,"The memory data cache should NOT be nullptr.");
538
539 auto mem_cache_ptr = static_cast<HDF5DataMemCache*>(mem_data_cache->get(cache_key));
540 if(mem_cache_ptr) {
541
542 BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache general"
543 <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
544
545 BESDEBUG("h5","Data Memory Cache hit, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
546
547#if 0
548 //const string var_name = mem_cache_ptr->get_varname();
549#endif
550
551 // Obtain the buffer and do subsetting
552 const size_t var_size = mem_cache_ptr->get_var_buf_size();
553 if(!var_size)
554 throw InternalErr(__FILE__,__LINE__,"The cached data buffer size is 0.");
555 else {
556
557 void *buf = mem_cache_ptr->get_var_buf();
558
559 // Obtain dimension size info.
560 vector<size_t> dim_sizes;
561 Dim_iter i_dim = dim_begin();
562 Dim_iter i_enddim = dim_end();
563 while (i_dim != i_enddim) {
564 dim_sizes.push_back(dimension_size_ll(i_dim));
565 ++i_dim;
566 }
567 // read data from the memory cache
568 read_data_from_mem_cache(h5_dtype,dim_sizes,buf,is_dap4);
569 }
570 }
571 else{
572
573 BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache genenral"
574 <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
575
576 BESDEBUG("h5","Data Memory added to the cache, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
577
578 vector <char> buf;
579 if (total_elems == 0)
580 throw InternalErr(__FILE__,__LINE__,"The total number of elements is 0.");
581
582 buf.resize(total_elems*HDF5CFUtil::H5_numeric_atomic_type_size(h5_dtype));
583
584 // This routine will read the data, send it to the DAP and save the buf to the cache.
585 read_data_NOT_from_mem_cache(true,buf.data());
586
587 // Create a new cache element.
588
589 auto new_mem_cache_ele_unique = make_unique<HDF5DataMemCache>();
590 auto new_mem_cache_ele = new_mem_cache_ele_unique.release();
591 new_mem_cache_ele->set_databuf(buf);
592
593 // Add this entry to the cache list
594 mem_data_cache->add(new_mem_cache_ele, cache_key);
595 }
596
597}
598
599BaseType* HDF5BaseArray::h5cfdims_transform_to_dap4(D4Group *grp) {
600
601 if(grp == nullptr)
602 return nullptr;
603 Array *dest = static_cast<HDF5BaseArray*>(ptr_duplicate());
604
605 // If there is just a size, don't make
606 // a D4Dimension (In DAP4 you cannot share a dimension unless it has
607 // a name). jhrg 3/18/14
608
609 D4Dimensions *grp_dims = grp->dims();
610 for (Array::Dim_iter dap2_dim = dest->dim_begin(), e = dest->dim_end(); dap2_dim != e; ++dap2_dim) {
611 if (!(*dap2_dim).name.empty()) {
612
613 // If a D4Dimension with the name already exists, use it.
614 D4Dimension *d4_dim = grp_dims->find_dim((*dap2_dim).name);
615 if (!d4_dim) {
616 auto d4_dim_unique = make_unique<D4Dimension>((*dap2_dim).name, (*dap2_dim).size);
617 d4_dim = d4_dim_unique.release();
618 grp_dims->add_dim_nocopy(d4_dim);
619 }
620 // At this point d4_dim's name and size == those of (*d) so just set
621 // the D4Dimension pointer, so it matches the one in the D4Group.
622 (*dap2_dim).dim = d4_dim;
623 }
624 }
625
626 return dest;
627
628}
629
630
631
A helper class that aims to reduce code redundence for different special CF derived array class For e...
include the entry functions to execute the handlers
int subset(void *input, int rank, const std::vector< size_t > &dim, int64_t start[], int64_t stride[], int64_t edge[], std::vector< T > *poutput, std::vector< size_t > &pos, int index)
Getting a subset of a variable.
virtual void add(libdap::DapObj *obj, const std::string &key)
Add an object to the cache and associate it with a key.
virtual libdap::DapObj * get(const std::string &key)
Get the cached pointer.