libdap Updated for version 3.21.1
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
Go to the documentation of this file.
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24//
25// Portions of this code were taken verbatim from Josuttis,
26// "The C++ Standard Library," p.672
27
28#include "config.h"
29
30#include <arpa/inet.h>
31#include <stdint.h>
32
33#include <cstring>
34#include <vector>
35
36#include "chunked_istream.h"
37#include "chunked_stream.h"
38
39#include "Error.h"
40
41// #define DODS_DEBUG
42// #define DODS_DEBUG2
43#ifdef DODS_DEBUG
44#include <iostream>
45#endif
46
47#include "debug.h"
48#include "util.h"
49
50namespace libdap {
51
52/*
53 This code does not use a 'put back' buffer, but here's a picture of the
54 d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
55 the I/O Stream library's streambuf class works. For the case with no
56 putback, just imagine it as zero and eliminate the leftmost extension. This
57 might also come in useful if the code was extended to support put back. I
58 removed that feature because I don't see it being used with our chunked
59 transmission protocol and it requires an extra call to memcopy() when data
60 are added to the internal buffer.
61
62 d_buffer d_buffer + putBack
63 | |
64 v v
65 |---------|--------------------------------------------|....
66 | | | .
67 |---------|--------------------------------------------|....
68 ^ ^ ^
69 | | |
70 eback() gptr() egptr()
71
72 */
73
83std::streambuf::int_type chunked_inbuf::underflow() {
84 DBG(cerr << "underflow..." << endl);
85 DBG2(cerr << "eback(): " << (void *)eback() << ", gptr(): " << (void *)(gptr() - eback())
86 << ", egptr(): " << (void *)(egptr() - eback()) << endl);
87
88 // return the next character; uflow() increments the puffer pointer.
89 if (gptr() < egptr())
90 return traits_type::to_int_type(*gptr());
91
92 // gptr() == egptr() so read more data from the underlying input source.
93
94 // To read data from the chunked stream, first read the header
95 uint32_t header;
96 d_is.read((char *)&header, 4);
97
98 // When the endian nature of the server is encoded in the chunk header, the header is
99 // sent using network byte order
100 header = ntohl(header);
101
102 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
103 // it holds data. In the latter case, bytes those will be read and moved into the
104 // buffer. Once those data are consumed, we'll be back here again and this read()
105 // will return EOF. See below for the other case...
106 if (d_is.eof())
107 return traits_type::eof();
108
109 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
110 if (!d_set_twiddle) {
111 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
112 d_set_twiddle = true;
113 }
114
115 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
116
117 DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
118 DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
119 DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
120
121 // Handle the case where the buffer is not big enough to hold the incoming chunk
122 if (chunk_size > d_buf_size) {
123 d_buf_size = chunk_size;
124 m_buffer_alloc();
125 }
126
127 // If the END chunk has zero bytes, return EOF. See above for more information
128 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END)
129 return traits_type::eof();
130
131 // Read the chunk's data
132 d_is.read(d_buffer, chunk_size);
133 DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad()
134 << endl);
135 if (d_is.bad())
136 return traits_type::eof();
137
138 DBG2(cerr << "eback(): " << (void *)eback() << ", gptr(): " << (void *)(gptr() - eback())
139 << ", egptr(): " << (void *)(egptr() - eback()) << endl);
140 setg(d_buffer, // beginning of put back area
141 d_buffer, // read position (gptr() == eback())
142 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143
144 DBG2(cerr << "eback(): " << (void *)eback() << ", gptr(): " << (void *)(gptr() - eback())
145 << ", egptr(): " << (void *)(egptr() - eback()) << endl);
146
147 switch (header & CHUNK_TYPE_MASK) {
148 case CHUNK_END:
149 DBG2(cerr << "Found end chunk" << endl);
150 return traits_type::to_int_type(*gptr());
151 case CHUNK_DATA:
152 return traits_type::to_int_type(*gptr());
153
154 case CHUNK_ERR:
155 // this is pretty much the end of the show... Assume the buffer/chunk holds
156 // the error message text.
157 d_error = true;
158 d_error_message = string(d_buffer, chunk_size);
159 return traits_type::eof();
160 default:
161 d_error = true;
162 d_error_message = "Failed to read known chunk header type.";
163 return traits_type::eof();
164 }
165}
166
183std::streamsize chunked_inbuf::xsgetn(char *s, std::streamsize num) {
184 DBG(cerr << "xsgetn... num: " << num << endl);
185
186 // if num is <= the chars currently in the buffer
187 if (num <= (egptr() - gptr())) {
188 memcpy(s, gptr(), num);
189 gbump(num);
190
191 return traits_type::not_eof(num);
192 }
193
194 // else they asked for more
195 uint32_t bytes_left_to_read = num;
196
197 // are there any bytes in the buffer? if so grab them first
198 if (gptr() < egptr()) {
199 int bytes_to_transfer = egptr() - gptr();
200 memcpy(s, gptr(), bytes_to_transfer);
201 gbump(bytes_to_transfer);
202 s += bytes_to_transfer;
203 bytes_left_to_read -= bytes_to_transfer;
204 }
205
206 // We need to get more bytes from the underlying stream; at this
207 // point the internal buffer is empty.
208
209 // read the remaining bytes to transfer, a chunk at a time,
210 // and put any leftover stuff in the buffer.
211
212 // note that when the code is here, gptr() == egptr(), so the
213 // next call to read() will fall through the previous tests and
214 // read at least one chunk here.
215 bool done = false;
216 while (!done) {
217 // Get a chunk header
218 uint32_t header;
219 d_is.read((char *)&header, 4);
220
221 header = ntohl(header);
222
223 // There are two EOF cases: One where the END chunk is zero bytes and one where
224 // it holds data. In the latter case, those will be read and moved into the
225 // buffer. Once those data are consumed, we'll be back here again and this read()
226 // will return EOF. See below for the other case...
227 if (d_is.eof())
228 return traits_type::eof();
229
230 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
231 if (!d_set_twiddle) {
232 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
233 d_set_twiddle = true;
234 }
235
236 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
237 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
238 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
239 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
240
241 // handle error chunks here
242 if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
243 d_error = true;
244 // Note that d_buffer is not used to avoid calling resize if it is too
245 // small to hold the error message. At this point, there's not much reason
246 // to optimize transport efficiency, however.
247 std::vector<char> message(chunk_size);
248 d_is.read(message.data(), chunk_size);
249 d_error_message = string(message.data(), chunk_size);
250 // leave the buffer and gptr(), ..., in a consistent state (empty)
251 setg(d_buffer, d_buffer, d_buffer);
252 }
253 // And zero-length END chunks here.
254 else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
255 return traits_type::not_eof(num - bytes_left_to_read);
256 }
257 // The next case is complicated because we read some data from the current
258 // chunk into 's' an some into the internal buffer.
259 else if (chunk_size > bytes_left_to_read) {
260 d_is.read(s, bytes_left_to_read);
261 if (d_is.bad())
262 return traits_type::eof();
263
264 // Now slurp up the remain part of the chunk and store it in the buffer
265 uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
266 // expand the internal buffer if needed
267 if (bytes_leftover > d_buf_size) {
268 d_buf_size = chunk_size;
269 m_buffer_alloc();
270 }
271 // read the remain stuff in to d_buffer
272 d_is.read(d_buffer, bytes_leftover);
273 if (d_is.bad())
274 return traits_type::eof();
275
276 setg(d_buffer, // beginning of put back area
277 d_buffer, // read position (gptr() == eback())
278 d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
279
280 bytes_left_to_read = 0 /* -= d_is.gcount()*/;
281 } else {
282 // expand the internal buffer if needed
283 if (chunk_size > d_buf_size) {
284 d_buf_size = chunk_size;
285 m_buffer_alloc();
286 }
287 // If we get a chunk that's zero bytes, Don't call read()
288 // to save the kernel context switch overhead.
289 if (chunk_size > 0) {
290 d_is.read(s, chunk_size);
291 if (d_is.bad())
292 return traits_type::eof();
293 bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
294 s += chunk_size;
295 }
296 }
297
298 switch (header & CHUNK_TYPE_MASK) {
299 case CHUNK_END:
300 DBG(cerr << "Found end chunk" << endl);
301 // in this case bytes_left_to_read can be > 0 because we ran out of data
302 // before reading all the requested bytes. The next read() call will return
303 // eof; this call returns the number of bytes read and transferred to 's'.
304 done = true;
305 break;
306
307 case CHUNK_DATA:
308 done = bytes_left_to_read == 0;
309 break;
310
311 case CHUNK_ERR:
312 // this is pretty much the end of the show... The error message has
313 // already been read above
314 return traits_type::eof();
315
316 default:
317 d_error = true;
318 d_error_message = "Failed to read known chunk header type.";
319 return traits_type::eof();
320 }
321 }
322
323 return traits_type::not_eof(num - bytes_left_to_read);
324}
325
338std::streambuf::int_type chunked_inbuf::read_next_chunk() {
339 // To read data from the chunked stream, first read the header
340 uint32_t header;
341 d_is.read((char *)&header, 4);
342
343 header = ntohl(header);
344
345 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
346 // it holds data. In the latter case, bytes those will be read and moved into the
347 // buffer. Once those data are consumed, we'll be back here again and this read()
348 // will return EOF. See below for the other case...
349 if (d_is.eof())
350 return traits_type::eof();
351
352 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
353 if (!d_set_twiddle) {
354 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
355 d_set_twiddle = true;
356 }
357
358 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
359
360 DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
361 DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
362 DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
363
364 // Handle the case where the buffer is not big enough to hold the incoming chunk
365 if (chunk_size > d_buf_size) {
366 d_buf_size = chunk_size;
367 m_buffer_alloc();
368 }
369
370 // If the END chunk has zero bytes, return EOF. See above for more information
371 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END)
372 return traits_type::eof();
373
374 // Read the chunk's data
375 d_is.read(d_buffer, chunk_size);
376 DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad()
377 << endl);
378 if (d_is.bad())
379 return traits_type::eof();
380
381 DBG2(cerr << "eback(): " << (void *)eback() << ", gptr(): " << (void *)(gptr() - eback())
382 << ", egptr(): " << (void *)(egptr() - eback()) << endl);
383 setg(d_buffer, // beginning of put back area
384 d_buffer, // read position (gptr() == eback())
385 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
386
387 DBG2(cerr << "eback(): " << (void *)eback() << ", gptr(): " << (void *)(gptr() - eback())
388 << ", egptr(): " << (void *)(egptr() - eback()) << endl);
389
390 switch (header & CHUNK_TYPE_MASK) {
391 case CHUNK_END:
392 DBG(cerr << "Found end chunk" << endl);
393 return traits_type::not_eof(chunk_size);
394
395 case CHUNK_DATA:
396 return traits_type::not_eof(chunk_size);
397
398 case CHUNK_ERR:
399 // this is pretty much the end of the show... Assume the buffer/chunk holds
400 // the error message text.
401 d_error = true;
402 d_error_message = string(d_buffer, chunk_size);
403 return traits_type::eof();
404
405 default:
406 d_error = true;
407 d_error_message = "Failed to read known chunk header type.";
408 return traits_type::eof();
409 }
410}
411
412} // namespace libdap
#define CHUNK_ERR
#define CHUNK_DATA
#define CHUNK_LITTLE_ENDIAN
#define CHUNK_SIZE_MASK
#define CHUNK_TYPE_MASK
#define CHUNK_END
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
#define DBG(x)
Definition debug.h:58
#define DBG2(x)
Definition debug.h:74
top level DAP object to house generic methods
Definition AISConnect.cc:30
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition util.cc:94