bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
vlsa_util.cc
1// This file is part of bes, A C++ back-end server implementation framework
2// for the OPeNDAP Data Access Protocol.
3
4// Copyright (c) 2023 OPeNDAP
5// Author: Nathan Potter <ndp@opendap.org>
6//
7// This library is free software; you can redistribute it and/or
8// modify it under the terms of the GNU Lesser General Public
9// License as published by the Free Software Foundation; either
10// version 2.1 of the License, or (at your option) any later version.
11//
12// This library is distributed in the hope that it will be useful,
13// but WITHOUT ANY WARRANTY; without even the implied warranty of
14// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15// Lesser General Public License for more details.
16//
17// You should have received a copy of the GNU Lesser General Public
18// License along with this library; if not, write to the Free Software
19// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
21//
22// Created by ndp on 11/11/23.
23//
24
25#ifndef BES_VLSA_UTIL_H
26#define BES_VLSA_UTIL_H
27
28#include <string>
29#include <sstream>
30#include <zlib.h>
31#include <iostream> // std::cout, std::endl
32#include <iomanip> // std::setw
33
34#define PUGIXML_NO_XPATH
35#define PUGIXML_HEADER_ONLY
36#include <pugixml.hpp>
37
38#include <libdap/XMLWriter.h>
39
40#include "vlsa_util.h"
41#include "DmrppArray.h"
42#include "DmrppNames.h"
43#include "Base64.h"
44#include "BESInternalError.h"
45#include "BESDebug.h"
46
47#define prolog std::string("vlsa_util::").append(__func__).append("() - ")
48
49using namespace std;
50
51namespace vlsa {
52
53const auto VLSA = "vlsa";
54const auto VLSA_VERBOSE = "vlsa:verbose";
55const auto VLSA_VALUE_COMPRESSION_THRESHOLD = 512;
56
62string zlib_msg(int retval)
63{
64 string msg;
65 switch (retval) {
66 case Z_OK:
67 msg = "Z_OK";
68 break;
69 case Z_STREAM_END:
70 msg = "Z_STREAM_END";
71 break;
72 case Z_NEED_DICT:
73 msg = "Z_NEED_DICT";
74 break;
75 case Z_ERRNO:
76 msg = "Z_ERRNO";
77 break;
78 case Z_STREAM_ERROR:
79 msg = "Z_STREAM_ERROR";
80 break;
81 case Z_DATA_ERROR:
82 msg = "Z_DATA_ERROR";
83 break;
84 case Z_MEM_ERROR:
85 msg = "Z_MEM_ERROR";
86 break;
87 case Z_BUF_ERROR:
88 msg = "Z_BUF_ERROR";
89 break;
90 case Z_VERSION_ERROR:
91 msg = "Z_VERSION_ERROR";
92 break;
93 default:
94 msg = "UNKNOWN ZLIB RETURN CODE";
95 break;
96 }
97 return msg;
98}
99
100constexpr unsigned int W = 10;
101constexpr unsigned int R = 8;
102
111std::string encode(const std::string &source_string) {
112 BESDEBUG(VLSA, prolog << "BEGIN\n");
113
114 string encoded;
115 // Copy the stuff into a vector...
116 BESDEBUG(VLSA, prolog << " source_string.size(): " << source_string.size() << " bytes. \n");
117 BESDEBUG(VLSA_VERBOSE, "source_string: " << source_string << "\n");
118
119 uLong ssize = source_string.size();
120 uLongf csize = source_string.size();
121 vector<Bytef> compressed_src;
122 compressed_src.resize(source_string.size());
123
124 int retval = compress(compressed_src.data(), &csize, (Bytef *)source_string.data(), ssize);
125 BESDEBUG(VLSA, prolog << " compress() retval: " << setw(W) << retval
126 << " (" << zlib_msg(retval) << ")\n");
127
128 if (retval != 0) {
129 stringstream msg;
130 msg << "Failed to compress source string. \n";
131 msg << " compress() retval: " << retval << " (" << zlib_msg(retval) << ")\n";
132 msg << " ssize: " << ssize << "\n";
133 msg << " csize: " << csize << "\n";
134 throw BESInternalError(msg.str(), __FILE__, __LINE__);
135 }
136
137 BESDEBUG(VLSA, prolog << " source len: " << setw(W) << source_string.size() << "\n");
138 BESDEBUG(VLSA, prolog << " compressed source binary: " << setw(W) << compressed_src.size() <<
139 " src:csb=" << setw(R) << ((double) source_string.size()) / ((double) compressed_src.size()) << "\n");
140
141 BESDEBUG(VLSA, prolog << "END\n");
142 return { base64::Base64::encode(compressed_src.data(), (int)csize) };
143}
144
151string decode(const string &encoded, uint64_t expected_size) {
152 BESDEBUG(VLSA, prolog << "BEGIN\n");
153 BESDEBUG(VLSA, prolog << " expected_size: " << setw(W) << expected_size << "\n");
154 BESDEBUG(VLSA, prolog << " encoded.size(): " << setw(W) << encoded.size() << "\n");
155
156 string decoded_string;
157 std::vector<u_int8_t> decoded = base64::Base64::decode(encoded);
158 BESDEBUG(VLSA, prolog << " (base64) decoded.size(): " << setw(W) << decoded.size() << "\n");
159
160 vector<Bytef> result_bytes;
161 uLongf result_size = expected_size;
162
163 BESDEBUG(VLSA, prolog << " result_size: " << setw(W) << result_size << "\n");
164
165 result_bytes.resize(result_size);
166 BESDEBUG(VLSA, prolog << " result_bytes.size(): " << setw(W) << result_bytes.size() << "\n");
167
168 int retval = uncompress(result_bytes.data(), &result_size, decoded.data(), decoded.size());
169 if(retval !=0){
170 stringstream msg;
171 msg << prolog << "Failed to decompress payload. \n";
172 msg << " retval: " << retval << " (" << zlib_msg(retval) << ")\n";
173 msg << " result_size: " << result_size << "\n";
174 msg << " expected_size: " << expected_size << "\n";
175 msg << " result_bytes.size(): " << result_bytes.size() << "\n";
176
177 throw BESInternalError(msg.str(), __FILE__, __LINE__);
178 }
179 BESDEBUG(VLSA, prolog << " uncompress() result_size: " << setw(W) << result_size << "\n");
180 BESDEBUG(VLSA, prolog << " expected_size: " << setw(W) << expected_size << "\n");
181
182 if(result_bytes.size() != expected_size){
183 stringstream msg;
184 msg << prolog << "Result size " << result_bytes.size() << " does not match expected size " << expected_size;
185 throw BESInternalError(msg.str(), __FILE__, __LINE__);
186 }
187 BESDEBUG(VLSA, prolog << "END\n");
188 return {result_bytes.begin(), result_bytes.end()};
189}
190
202void write_value(libdap::XMLWriter &xml, const std::string &value, uint64_t dup_count)
203{
204 if (xmlTextWriterStartElement(xml.get_writer(), (const xmlChar *) DMRPP_VLSA_VALUE_ELEMENT) < 0) {
205 stringstream msg;
206 msg << prolog << "Could not begin '" << DMRPP_VLSA_VALUE_ELEMENT << "' element.";
207 throw BESInternalError( msg.str(), __FILE__, __LINE__);
208 }
209 if (dup_count > 1) {
210 if (xmlTextWriterWriteAttribute(xml.get_writer(), (const xmlChar *) DMRPP_VLSA_VALUE_COUNT_ATTR,
211 (const xmlChar *) to_string(dup_count).c_str()) < 0) {
212 stringstream msg;
213 msg << prolog << "Could not write '" << "c" << "' (size) attribute.";
214 throw BESInternalError( msg.str(), __FILE__, __LINE__);
215 }
216 }
217
218 if(value.size() > VLSA_VALUE_COMPRESSION_THRESHOLD) {
219
220 if (xmlTextWriterWriteAttribute(xml.get_writer(), (const xmlChar *) DMRPP_VLSA_VALUE_SIZE_ATTR,
221 (const xmlChar *) to_string(value.size()).c_str()) < 0) {
222 stringstream msg;
223 msg << prolog << "Could not write '" << DMRPP_VLSA_VALUE_SIZE_ATTR << "' (size) attribute.";
224 throw BESInternalError( msg.str(), __FILE__, __LINE__);
225 }
226 string encoded = encode(value);
227
228 if (xmlTextWriterWriteString(xml.get_writer(), (const xmlChar *) encoded.c_str()) < 0) {
229 stringstream msg;
230 msg << prolog << "Could not write text into element '" << DMRPP_VLSA_VALUE_ELEMENT << "'";
231 throw BESInternalError( msg.str(), __FILE__, __LINE__);
232 }
233 }
234 else {
235 if (xmlTextWriterWriteString(xml.get_writer(), (const xmlChar *) value.c_str()) < 0) {
236 stringstream msg;
237 msg << prolog << "Could not write text into element '"<< DMRPP_VLSA_VALUE_ELEMENT << "'";
238 throw BESInternalError( msg.str(), __FILE__, __LINE__);
239 }
240 }
241
242 if (xmlTextWriterEndElement(xml.get_writer()) < 0) {
243 stringstream msg;
244 msg << prolog << "Could not end '" << DMRPP_VLSA_VALUE_ELEMENT << "' element";
245 throw BESInternalError( msg.str(), __FILE__, __LINE__);
246 }
247}
248
249void write(libdap::XMLWriter &xml, const vector<string> &values)
250{
251 if (xmlTextWriterStartElement(xml.get_writer(), (const xmlChar *)DMRPP_VLSA_ELEMENT) < 0) {
252 stringstream msg;
253 msg << prolog << "Could not write " << DMRPP_VLSA_VALUE_ELEMENT << " element";
254 throw BESInternalError( msg.str(), __FILE__, __LINE__);
255 }
256
257 string last_value;
258 bool not_first = false;
259 uint64_t dup_count = 1;
260 for(const auto &value : values) {
261 if(not_first) {
262 if(value == last_value) {
263 dup_count++;
264 }
265 else {
266 BESDEBUG(VLSA, prolog << "value: '" << value << "' dup_count: " << dup_count << endl);
267 vlsa::write_value(xml, last_value, dup_count);
268 dup_count = 1;
269 }
270 }
271 last_value = value;
272 not_first = true;
273 }
274 BESDEBUG(VLSA, prolog << "last_value: '" << last_value << "' dup_count: " << dup_count << endl);
275 write_value(xml,last_value, dup_count);
276
277
278 if (xmlTextWriterEndElement(xml.get_writer()) < 0) {
279 stringstream msg;
280 msg << prolog << "Could not end " << DMRPP_VLSA_VALUE_ELEMENT << " element";
281 throw BESInternalError( msg.str(), __FILE__, __LINE__);
282 }
283
284}
285
286void write(libdap::XMLWriter &xml, dmrpp::DmrppArray &a)
287{
288 write(xml, a.get_str());
289}
290
296std::string read_value(const pugi::xml_node &v)
297{
298 static string vlsa_value_element_name(DMRPP_VLSA_VALUE_ELEMENT);
299
300 string value;
301 if (v.name() == vlsa_value_element_name ) {
302 // We check for the presence of the size attribut, vlsa_value_size_attr_name
303 // If present it means the content was compressed and the base64 encoded
304 // and we have to decode it.
305 auto size_attr = v.attribute(DMRPP_VLSA_VALUE_SIZE_ATTR);
306 if (size_attr) {
307 uint64_t value_size = stoull(size_attr.value());
308 value = decode(v.child_value(), value_size);
309 } else {
310 value = v.child_value();
311 }
312 }
313 return value;
314}
315
316void read(const pugi::xml_node &vlsa_element, vector<string> &entries)
317{
318 static string vlsa_element_name(DMRPP_VLSA_ELEMENT);
319
320 if (vlsa_element.name() != vlsa_element_name ) { return; }
321
322 // Chunks for this node will be held in the var_node siblings.
323 for (auto v = vlsa_element.child(DMRPP_VLSA_VALUE_ELEMENT); v; v = v.next_sibling()) {
324 string value = read_value(v);
325
326 uint64_t count = 1;
327 pugi::xml_attribute c_attr = v.attribute(DMRPP_VLSA_VALUE_COUNT_ATTR);
328 if(c_attr){
329 count = stoull(c_attr.value());
330 }
331 BESDEBUG(VLSA, prolog << "value: '" << value << "' count: " << count << "\n");
332 for(uint64_t i=0; i<count ;i++){
333 // Using emplace_back(value) here causes an error (why?) with ASAN on. jhrg 2/11/25
334 entries.push_back(value);
335 }
336 }
337}
338
339} // namespace vlsa
340#endif //BES_VLSA_UTIL_H