libdap  Updated for version 3.20.6
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 //
25 // Portions of this code were taken verbatim from Josuttis,
26 // "The C++ Standard Library," p.672
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <arpa/inet.h>
32 
33 #include <cstring>
34 #include <vector>
35 
36 #include "chunked_stream.h"
37 #include "chunked_istream.h"
38 
39 #include "Error.h"
40 
41 //#define DODS_DEBUG
42 //#define DODS_DEBUG2
43 #ifdef DODS_DEBUG
44 #include <iostream>
45 #endif
46 
47 #include "util.h"
48 #include "debug.h"
49 
50 namespace libdap {
51 
52 /*
53  This code does not use a 'put back' buffer, but here's a picture of the
54  d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
55  the I/O Stream library's streambuf class works. For the case with no
56  putback, just imagine it as zero and eliminate the leftmost extension. This
57  might also come in useful if the code was extended to support put back. I
58  removed that feature because I don't see it being used with our chunked
59  transmission protocol and it requires an extra call to memcopy() when data
60  are added to the internal buffer.
61 
62  d_buffer d_buffer + putBack
63  | |
64  v v
65  |---------|--------------------------------------------|....
66  | | | .
67  |---------|--------------------------------------------|....
68  ^ ^ ^
69  | | |
70  eback() gptr() egptr()
71 
72  */
73 
83 std::streambuf::int_type
85 {
86  DBG(cerr << "underflow..." << endl);
87  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
88 
89  // return the next character; uflow() increments the puffer pointer.
90  if (gptr() < egptr())
91  return traits_type::to_int_type(*gptr());
92 
93  // gptr() == egptr() so read more data from the underlying input source.
94 
95  // To read data from the chunked stream, first read the header
96  uint32_t header;
97  d_is.read((char *) &header, 4);
98 
99  // When the endian nature of the server is encoded in the chunk header, the header is
100  // sent using network byte order
101  header = ntohl(header);
102 
103  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
104  // it holds data. In the latter case, bytes those will be read and moved into the
105  // buffer. Once those data are consumed, we'll be back here again and this read()
106  // will return EOF. See below for the other case...
107  if (d_is.eof()) return traits_type::eof();
108 
109  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
110  if (!d_set_twiddle) {
111  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
112  d_set_twiddle = true;
113  }
114 
115  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
116 
117  DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
118  DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
119  DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
120 
121  // Handle the case where the buffer is not big enough to hold the incoming chunk
122  if (chunk_size > d_buf_size) {
123  d_buf_size = chunk_size;
124  m_buffer_alloc();
125  }
126 
127  // If the END chunk has zero bytes, return EOF. See above for more information
128  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
129 
130  // Read the chunk's data
131  d_is.read(d_buffer, chunk_size);
132  DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
133  if (d_is.bad()) return traits_type::eof();
134 
135  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
136  setg(d_buffer, // beginning of put back area
137  d_buffer, // read position (gptr() == eback())
138  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
139 
140  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
141 
142  switch (header & CHUNK_TYPE_MASK) {
143  case CHUNK_END:
144  DBG2(cerr << "Found end chunk" << endl);
145  return traits_type::to_int_type(*gptr());
146  case CHUNK_DATA:
147  return traits_type::to_int_type(*gptr());
148 
149  case CHUNK_ERR:
150  // this is pretty much the end of the show... Assume the buffer/chunk holds
151  // the error message text.
152  d_error = true;
153  d_error_message = string(d_buffer, chunk_size);
154  return traits_type::eof();
155  default:
156  d_error = true;
157  d_error_message = "Failed to read known chunk header type.";
158  return traits_type::eof();
159  }
160 }
161 
178 std::streamsize
179 chunked_inbuf::xsgetn(char* s, std::streamsize num)
180 {
181  DBG(cerr << "xsgetn... num: " << num << endl);
182 
183  // if num is <= the chars currently in the buffer
184  if (num <= (egptr() - gptr())) {
185  memcpy(s, gptr(), num);
186  gbump(num);
187 
188  return traits_type::not_eof(num);
189  }
190 
191  // else they asked for more
192  uint32_t bytes_left_to_read = num;
193 
194  // are there any bytes in the buffer? if so grab them first
195  if (gptr() < egptr()) {
196  int bytes_to_transfer = egptr() - gptr();
197  memcpy(s, gptr(), bytes_to_transfer);
198  gbump(bytes_to_transfer);
199  s += bytes_to_transfer;
200  bytes_left_to_read -= bytes_to_transfer;
201  }
202 
203  // We need to get more bytes from the underlying stream; at this
204  // point the internal buffer is empty.
205 
206  // read the remaining bytes to transfer, a chunk at a time,
207  // and put any leftover stuff in the buffer.
208 
209  // note that when the code is here, gptr() == egptr(), so the
210  // next call to read() will fall through the previous tests and
211  // read at least one chunk here.
212  bool done = false;
213  while (!done) {
214  // Get a chunk header
215  uint32_t header;
216  d_is.read((char *) &header, 4);
217 
218  header = ntohl(header);
219 
220  // There are two EOF cases: One where the END chunk is zero bytes and one where
221  // it holds data. In the latter case, those will be read and moved into the
222  // buffer. Once those data are consumed, we'll be back here again and this read()
223  // will return EOF. See below for the other case...
224  if (d_is.eof()) return traits_type::eof();
225 
226  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
227  if (!d_set_twiddle) {
228  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
229  d_set_twiddle = true;
230  }
231 
232  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
233  DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
234  DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
235  DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
236 
237  // handle error chunks here
238  if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
239  d_error = true;
240  // Note that d_buffer is not used to avoid calling resize if it is too
241  // small to hold the error message. At this point, there's not much reason
242  // to optimize transport efficiency, however.
243  std::vector<char> message(chunk_size);
244  d_is.read(&message[0], chunk_size);
245  d_error_message = string(&message[0], chunk_size);
246  // leave the buffer and gptr(), ..., in a consistent state (empty)
247  setg(d_buffer, d_buffer, d_buffer);
248  }
249  // And zero-length END chunks here.
250  else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
251  return traits_type::not_eof(num-bytes_left_to_read);
252  }
253  // The next case is complicated because we read some data from the current
254  // chunk into 's' an some into the internal buffer.
255  else if (chunk_size > bytes_left_to_read) {
256  d_is.read(s, bytes_left_to_read);
257  if (d_is.bad()) return traits_type::eof();
258 
259  // Now slurp up the remain part of the chunk and store it in the buffer
260  uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
261  // expand the internal buffer if needed
262  if (bytes_leftover > d_buf_size) {
263  d_buf_size = chunk_size;
264  m_buffer_alloc();
265  }
266  // read the remain stuff in to d_buffer
267  d_is.read(d_buffer, bytes_leftover);
268  if (d_is.bad()) return traits_type::eof();
269 
270  setg(d_buffer, // beginning of put back area
271  d_buffer, // read position (gptr() == eback())
272  d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
273 
274  bytes_left_to_read = 0 /* -= d_is.gcount()*/;
275  }
276  else {
277  // expand the internal buffer if needed
278  if (chunk_size > d_buf_size) {
279  d_buf_size = chunk_size;
280  m_buffer_alloc();
281  }
282  // If we get a chunk that's zero bytes, Don't call read()
283  // to save the kernel context switch overhead.
284  if (chunk_size > 0) {
285  d_is.read(s, chunk_size);
286  if (d_is.bad()) return traits_type::eof();
287  bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
288  s += chunk_size;
289  }
290  }
291 
292  switch (header & CHUNK_TYPE_MASK) {
293  case CHUNK_END:
294  DBG(cerr << "Found end chunk" << endl);
295  // in this case bytes_left_to_read can be > 0 because we ran out of data
296  // before reading all the requested bytes. The next read() call will return
297  // eof; this call returns the number of bytes read and transferred to 's'.
298  done = true;
299  break;
300 
301  case CHUNK_DATA:
302  done = bytes_left_to_read == 0;
303  break;
304 
305  case CHUNK_ERR:
306  // this is pretty much the end of the show... The error message has
307  // already been read above
308  return traits_type::eof();
309 
310  default:
311  d_error = true;
312  d_error_message = "Failed to read known chunk header type.";
313  return traits_type::eof();
314  }
315  }
316 
317  return traits_type::not_eof(num-bytes_left_to_read);
318 }
319 
332 std::streambuf::int_type
334 {
335  // To read data from the chunked stream, first read the header
336  uint32_t header;
337  d_is.read((char *) &header, 4);
338 
339  header = ntohl(header);
340 
341  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
342  // it holds data. In the latter case, bytes those will be read and moved into the
343  // buffer. Once those data are consumed, we'll be back here again and this read()
344  // will return EOF. See below for the other case...
345  if (d_is.eof()) return traits_type::eof();
346 
347  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
348  if (!d_set_twiddle) {
349  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
350  d_set_twiddle = true;
351  }
352 
353  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
354 
355  DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
356  DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
357  DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
358 
359  // Handle the case where the buffer is not big enough to hold the incoming chunk
360  if (chunk_size > d_buf_size) {
361  d_buf_size = chunk_size;
362  m_buffer_alloc();
363  }
364 
365  // If the END chunk has zero bytes, return EOF. See above for more information
366  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
367 
368  // Read the chunk's data
369  d_is.read(d_buffer, chunk_size);
370  DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
371  if (d_is.bad()) return traits_type::eof();
372 
373  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
374  setg(d_buffer, // beginning of put back area
375  d_buffer, // read position (gptr() == eback())
376  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
377 
378  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
379 
380  switch (header & CHUNK_TYPE_MASK) {
381  case CHUNK_END:
382  DBG(cerr << "Found end chunk" << endl);
383  return traits_type::not_eof(chunk_size);
384 
385  case CHUNK_DATA:
386  return traits_type::not_eof(chunk_size);
387 
388  case CHUNK_ERR:
389  // this is pretty much the end of the show... Assume the buffer/chunk holds
390  // the error message text.
391  d_error = true;
392  d_error_message = string(d_buffer, chunk_size);
393  return traits_type::eof();
394 
395  default:
396  d_error = true;
397  d_error_message = "Failed to read known chunk header type.";
398  return traits_type::eof();
399  }
400 }
401 
402 }
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
top level DAP object to house generic methods
Definition: AISConnect.cc:30
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:94