bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
reader.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#ifndef RAPIDJSON_READER_H_
16#define RAPIDJSON_READER_H_
17
19
20#include "allocators.h"
21#include "stream.h"
22#include "encodedstream.h"
23#include "internal/clzll.h"
24#include "internal/meta.h"
25#include "internal/stack.h"
26#include "internal/strtod.h"
27#include <limits>
28
29#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
30#include <intrin.h>
31#pragma intrinsic(_BitScanForward)
32#endif
33#ifdef RAPIDJSON_SSE42
34#include <nmmintrin.h>
35#elif defined(RAPIDJSON_SSE2)
36#include <emmintrin.h>
37#elif defined(RAPIDJSON_NEON)
38#include <arm_neon.h>
39#endif
40
41#ifdef __clang__
42RAPIDJSON_DIAG_PUSH
43RAPIDJSON_DIAG_OFF(old-style-cast)
44RAPIDJSON_DIAG_OFF(padded)
45RAPIDJSON_DIAG_OFF(switch-enum)
46#elif defined(_MSC_VER)
47RAPIDJSON_DIAG_PUSH
48RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
49RAPIDJSON_DIAG_OFF(4702) // unreachable code
50#endif
51
52#ifdef __GNUC__
53RAPIDJSON_DIAG_PUSH
54RAPIDJSON_DIAG_OFF(effc++)
55#endif
56
58#define RAPIDJSON_NOTHING /* deliberately empty */
59#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
60#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
61 RAPIDJSON_MULTILINEMACRO_BEGIN \
62 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
63 RAPIDJSON_MULTILINEMACRO_END
64#endif
65#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
66 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
68
99#ifndef RAPIDJSON_PARSE_ERROR_NORETURN
100#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
101 RAPIDJSON_MULTILINEMACRO_BEGIN \
102 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
103 SetParseError(parseErrorCode, offset); \
104 RAPIDJSON_MULTILINEMACRO_END
105#endif
106
118#ifndef RAPIDJSON_PARSE_ERROR
119#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
120 RAPIDJSON_MULTILINEMACRO_BEGIN \
121 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
122 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
123 RAPIDJSON_MULTILINEMACRO_END
124#endif
125
126#include "error/error.h" // ParseErrorCode, ParseResult
127
129
131// ParseFlag
132
139#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
140#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
141#endif
142
144
160
162// Handler
163
180 bool RawNumber(const Ch* str, SizeType length, bool copy);
181 bool String(const Ch* str, SizeType length, bool copy);
182 bool StartObject();
183 bool Key(const Ch* str, SizeType length, bool copy);
184 bool EndObject(SizeType memberCount);
185 bool StartArray();
186 bool EndArray(SizeType elementCount);
187};
188\endcode
189*/
191// BaseReaderHandler
192
194
197template<typename Encoding = UTF8<>, typename Derived = void>
199 typedef typename Encoding::Ch Ch;
200
201 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
202
203 bool Default() { return true; }
204 bool Null() { return static_cast<Override&>(*this).Default(); }
205 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
206 bool Int(int) { return static_cast<Override&>(*this).Default(); }
207 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
208 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
209 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
210 bool Double(double) { return static_cast<Override&>(*this).Default(); }
212 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
213 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
214 bool StartObject() { return static_cast<Override&>(*this).Default(); }
215 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
216 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
217 bool StartArray() { return static_cast<Override&>(*this).Default(); }
218 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
219};
220
222// StreamLocalCopy
223
224namespace internal {
225
226template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
228
230template<typename Stream>
231class StreamLocalCopy<Stream, 1> {
232public:
233 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
234 ~StreamLocalCopy() { original_ = s; }
235
236 Stream s;
237
238private:
239 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
240
241 Stream& original_;
242};
243
245template<typename Stream>
246class StreamLocalCopy<Stream, 0> {
247public:
248 StreamLocalCopy(Stream& original) : s(original) {}
249
250 Stream& s;
251
252private:
253 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
254};
255
256} // namespace internal
257
259// SkipWhitespace
260
262
265template<typename InputStream>
266void SkipWhitespace(InputStream& is) {
268 InputStream& s(copy.s);
269
270 typename InputStream::Ch c;
271 while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
272 s.Take();
273}
274
275inline const char* SkipWhitespace(const char* p, const char* end) {
276 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
277 ++p;
278 return p;
279}
280
281#ifdef RAPIDJSON_SSE42
283inline const char *SkipWhitespace_SIMD(const char* p) {
284 // Fast return for single non-whitespace
285 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
286 ++p;
287 else
288 return p;
289
290 // 16-byte align to the next boundary
291 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
292 while (p != nextAligned)
293 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
294 ++p;
295 else
296 return p;
297
298 // The rest of string using SIMD
299 static const char whitespace[16] = " \n\r\t";
300 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespace.data()));
301
302 for (;; p += 16) {
303 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
304 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
305 if (r != 16) // some of characters is non-whitespace
306 return p + r;
307 }
308}
309
310inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
311 // Fast return for single non-whitespace
312 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
313 ++p;
314 else
315 return p;
316
317 // The middle of string using SIMD
318 static const char whitespace[16] = " \n\r\t";
319 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespace.data()));
320
321 for (; p <= end - 16; p += 16) {
322 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
323 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
324 if (r != 16) // some of characters is non-whitespace
325 return p + r;
326 }
327
328 return SkipWhitespace(p, end);
329}
330
331#elif defined(RAPIDJSON_SSE2)
332
334inline const char *SkipWhitespace_SIMD(const char* p) {
335 // Fast return for single non-whitespace
336 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
337 ++p;
338 else
339 return p;
340
341 // 16-byte align to the next boundary
342 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
343 while (p != nextAligned)
344 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
345 ++p;
346 else
347 return p;
348
349 // The rest of string
350 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
351 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
352 #undef C16
353
354 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[0].data()));
355 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[1].data()));
356 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[2].data()));
357 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[3].data()));
358
359 for (;; p += 16) {
360 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
361 __m128i x = _mm_cmpeq_epi8(s, w0);
362 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
363 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
364 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
365 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
366 if (r != 0) { // some of characters may be non-whitespace
367#ifdef _MSC_VER // Find the index of first non-whitespace
368 unsigned long offset;
369 _BitScanForward(&offset, r);
370 return p + offset;
371#else
372 return p + __builtin_ffs(r) - 1;
373#endif
374 }
375 }
376}
377
378inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
379 // Fast return for single non-whitespace
380 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
381 ++p;
382 else
383 return p;
384
385 // The rest of string
386 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
387 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
388 #undef C16
389
390 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[0].data()));
391 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[1].data()));
392 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[2].data()));
393 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(whitespaces[3].data()));
394
395 for (; p <= end - 16; p += 16) {
396 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
397 __m128i x = _mm_cmpeq_epi8(s, w0);
398 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
399 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
400 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
401 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
402 if (r != 0) { // some of characters may be non-whitespace
403#ifdef _MSC_VER // Find the index of first non-whitespace
404 unsigned long offset;
405 _BitScanForward(&offset, r);
406 return p + offset;
407#else
408 return p + __builtin_ffs(r) - 1;
409#endif
410 }
411 }
412
413 return SkipWhitespace(p, end);
414}
415
416#elif defined(RAPIDJSON_NEON)
417
419inline const char *SkipWhitespace_SIMD(const char* p) {
420 // Fast return for single non-whitespace
421 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
422 ++p;
423 else
424 return p;
425
426 // 16-byte align to the next boundary
427 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
428 while (p != nextAligned)
429 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
430 ++p;
431 else
432 return p;
433
434 const uint8x16_t w0 = vmovq_n_u8(' ');
435 const uint8x16_t w1 = vmovq_n_u8('\n');
436 const uint8x16_t w2 = vmovq_n_u8('\r');
437 const uint8x16_t w3 = vmovq_n_u8('\t');
438
439 for (;; p += 16) {
440 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
441 uint8x16_t x = vceqq_u8(s, w0);
442 x = vorrq_u8(x, vceqq_u8(s, w1));
443 x = vorrq_u8(x, vceqq_u8(s, w2));
444 x = vorrq_u8(x, vceqq_u8(s, w3));
445
446 x = vmvnq_u8(x); // Negate
447 x = vrev64q_u8(x); // Rev in 64
448 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
449 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
450
451 if (low == 0) {
452 if (high != 0) {
453 uint32_t lz = internal::clzll(high);
454 return p + 8 + (lz >> 3);
455 }
456 } else {
457 uint32_t lz = internal::clzll(low);
458 return p + (lz >> 3);
459 }
460 }
461}
462
463inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
464 // Fast return for single non-whitespace
465 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
466 ++p;
467 else
468 return p;
469
470 const uint8x16_t w0 = vmovq_n_u8(' ');
471 const uint8x16_t w1 = vmovq_n_u8('\n');
472 const uint8x16_t w2 = vmovq_n_u8('\r');
473 const uint8x16_t w3 = vmovq_n_u8('\t');
474
475 for (; p <= end - 16; p += 16) {
476 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
477 uint8x16_t x = vceqq_u8(s, w0);
478 x = vorrq_u8(x, vceqq_u8(s, w1));
479 x = vorrq_u8(x, vceqq_u8(s, w2));
480 x = vorrq_u8(x, vceqq_u8(s, w3));
481
482 x = vmvnq_u8(x); // Negate
483 x = vrev64q_u8(x); // Rev in 64
484 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
485 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
486
487 if (low == 0) {
488 if (high != 0) {
489 uint32_t lz = internal::clzll(high);
490 return p + 8 + (lz >> 3);
491 }
492 } else {
493 uint32_t lz = internal::clzll(low);
494 return p + (lz >> 3);
495 }
496 }
497
498 return SkipWhitespace(p, end);
499}
500
501#endif // RAPIDJSON_NEON
502
503#ifdef RAPIDJSON_SIMD
505template<> inline void SkipWhitespace(InsituStringStream& is) {
506 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
507}
508
510template<> inline void SkipWhitespace(StringStream& is) {
511 is.src_ = SkipWhitespace_SIMD(is.src_);
512}
513
514template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
515 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
516}
517#endif // RAPIDJSON_SIMD
518
520// GenericReader
521
523
538template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
540public:
541 typedef typename SourceEncoding::Ch Ch;
542
544
547 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
548 stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
549
551
558 template <unsigned parseFlags, typename InputStream, typename Handler>
559 ParseResult Parse(InputStream& is, Handler& handler) {
560 if (parseFlags & kParseIterativeFlag)
561 return IterativeParse<parseFlags>(is, handler);
562
563 parseResult_.Clear();
564
565 ClearStackOnExit scope(*this);
566
567 SkipWhitespaceAndComments<parseFlags>(is);
568 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
569
570 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
572 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
573 }
574 else {
575 ParseValue<parseFlags>(is, handler);
576 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
577
578 if (!(parseFlags & kParseStopWhenDoneFlag)) {
579 SkipWhitespaceAndComments<parseFlags>(is);
580 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
581
582 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
584 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
585 }
586 }
587 }
588
589 return parseResult_;
590 }
591
593
599 template <typename InputStream, typename Handler>
600 ParseResult Parse(InputStream& is, Handler& handler) {
601 return Parse<kParseDefaultFlags>(is, handler);
602 }
603
605
608 parseResult_.Clear();
609 state_ = IterativeParsingStartState;
610 }
611
613
619 template <unsigned parseFlags, typename InputStream, typename Handler>
620 bool IterativeParseNext(InputStream& is, Handler& handler) {
621 while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
622 SkipWhitespaceAndComments<parseFlags>(is);
623
624 Token t = Tokenize(is.Peek());
625 IterativeParsingState n = Predict(state_, t);
626 IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
627
628 // If we've finished or hit an error...
629 if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
630 // Report errors.
631 if (d == IterativeParsingErrorState) {
632 HandleError(state_, is);
633 return false;
634 }
635
636 // Transition to the finish state.
637 RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
638 state_ = d;
639
640 // If StopWhenDone is not set...
641 if (!(parseFlags & kParseStopWhenDoneFlag)) {
642 // ... and extra non-whitespace data is found...
643 SkipWhitespaceAndComments<parseFlags>(is);
644 if (is.Peek() != '\0') {
645 // ... this is considered an error.
646 HandleError(state_, is);
647 return false;
648 }
649 }
650
651 // Success! We are done!
652 return true;
653 }
654
655 // Transition to the new state.
656 state_ = d;
657
658 // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
659 if (!IsIterativeParsingDelimiterState(n))
660 return true;
661 }
662
663 // We reached the end of file.
664 stack_.Clear();
665
666 if (state_ != IterativeParsingFinishState) {
667 HandleError(state_, is);
668 return false;
669 }
670
671 return true;
672 }
673
675
677 RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
678 return IsIterativeParsingCompleteState(state_);
679 }
680
682 bool HasParseError() const { return parseResult_.IsError(); }
683
685 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
686
688 size_t GetErrorOffset() const { return parseResult_.Offset(); }
689
690protected:
691 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
692
693private:
694 // Prohibit copy constructor & assignment operator.
696 GenericReader& operator=(const GenericReader&);
697
698 void ClearStack() { stack_.Clear(); }
699
700 // clear stack on any exit from ParseStream, e.g. due to exception
701 struct ClearStackOnExit {
702 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
703 ~ClearStackOnExit() { r_.ClearStack(); }
704 private:
705 GenericReader& r_;
706 ClearStackOnExit(const ClearStackOnExit&);
707 ClearStackOnExit& operator=(const ClearStackOnExit&);
708 };
709
710 template<unsigned parseFlags, typename InputStream>
711 void SkipWhitespaceAndComments(InputStream& is) {
712 SkipWhitespace(is);
713
714 if (parseFlags & kParseCommentsFlag) {
715 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
716 if (Consume(is, '*')) {
717 while (true) {
718 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
720 else if (Consume(is, '*')) {
721 if (Consume(is, '/'))
722 break;
723 }
724 else
725 is.Take();
726 }
727 }
728 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
729 while (is.Peek() != '\0' && is.Take() != '\n') {}
730 else
732
733 SkipWhitespace(is);
734 }
735 }
736 }
737
738 // Parse object: { string : value, ... }
739 template<unsigned parseFlags, typename InputStream, typename Handler>
740 void ParseObject(InputStream& is, Handler& handler) {
741 RAPIDJSON_ASSERT(is.Peek() == '{');
742 is.Take(); // Skip '{'
743
744 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
746
747 SkipWhitespaceAndComments<parseFlags>(is);
748 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
749
750 if (Consume(is, '}')) {
751 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
753 return;
754 }
755
756 for (SizeType memberCount = 0;;) {
757 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
759
760 ParseString<parseFlags>(is, handler, true);
761 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
762
763 SkipWhitespaceAndComments<parseFlags>(is);
764 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
765
766 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
768
769 SkipWhitespaceAndComments<parseFlags>(is);
770 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
771
772 ParseValue<parseFlags>(is, handler);
773 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
774
775 SkipWhitespaceAndComments<parseFlags>(is);
776 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
777
778 ++memberCount;
779
780 switch (is.Peek()) {
781 case ',':
782 is.Take();
783 SkipWhitespaceAndComments<parseFlags>(is);
784 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
785 break;
786 case '}':
787 is.Take();
788 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
790 return;
791 default:
792 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
793 }
794
795 if (parseFlags & kParseTrailingCommasFlag) {
796 if (is.Peek() == '}') {
797 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
799 is.Take();
800 return;
801 }
802 }
803 }
804 }
805
806 // Parse array: [ value, ... ]
807 template<unsigned parseFlags, typename InputStream, typename Handler>
808 void ParseArray(InputStream& is, Handler& handler) {
809 RAPIDJSON_ASSERT(is.Peek() == '[');
810 is.Take(); // Skip '['
811
812 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
814
815 SkipWhitespaceAndComments<parseFlags>(is);
816 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
817
818 if (Consume(is, ']')) {
819 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
821 return;
822 }
823
824 for (SizeType elementCount = 0;;) {
825 ParseValue<parseFlags>(is, handler);
826 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
827
828 ++elementCount;
829 SkipWhitespaceAndComments<parseFlags>(is);
830 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
831
832 if (Consume(is, ',')) {
833 SkipWhitespaceAndComments<parseFlags>(is);
834 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
835 }
836 else if (Consume(is, ']')) {
837 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
839 return;
840 }
841 else
843
844 if (parseFlags & kParseTrailingCommasFlag) {
845 if (is.Peek() == ']') {
846 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
848 is.Take();
849 return;
850 }
851 }
852 }
853 }
854
855 template<unsigned parseFlags, typename InputStream, typename Handler>
856 void ParseNull(InputStream& is, Handler& handler) {
857 RAPIDJSON_ASSERT(is.Peek() == 'n');
858 is.Take();
859
860 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
861 if (RAPIDJSON_UNLIKELY(!handler.Null()))
863 }
864 else
866 }
867
868 template<unsigned parseFlags, typename InputStream, typename Handler>
869 void ParseTrue(InputStream& is, Handler& handler) {
870 RAPIDJSON_ASSERT(is.Peek() == 't');
871 is.Take();
872
873 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
874 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
876 }
877 else
879 }
880
881 template<unsigned parseFlags, typename InputStream, typename Handler>
882 void ParseFalse(InputStream& is, Handler& handler) {
883 RAPIDJSON_ASSERT(is.Peek() == 'f');
884 is.Take();
885
886 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
887 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
889 }
890 else
892 }
893
894 template<typename InputStream>
895 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
896 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
897 is.Take();
898 return true;
899 }
900 else
901 return false;
902 }
903
904 // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
905 template<typename InputStream>
906 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
907 unsigned codepoint = 0;
908 for (int i = 0; i < 4; i++) {
909 Ch c = is.Peek();
910 codepoint <<= 4;
911 codepoint += static_cast<unsigned>(c);
912 if (c >= '0' && c <= '9')
913 codepoint -= '0';
914 else if (c >= 'A' && c <= 'F')
915 codepoint -= 'A' - 10;
916 else if (c >= 'a' && c <= 'f')
917 codepoint -= 'a' - 10;
918 else {
920 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
921 }
922 is.Take();
923 }
924 return codepoint;
925 }
926
927 template <typename CharType>
928 class StackStream {
929 public:
930 typedef CharType Ch;
931
932 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
933 RAPIDJSON_FORCEINLINE void Put(Ch c) {
934 *stack_.template Push<Ch>() = c;
935 ++length_;
936 }
937
938 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
939 length_ += count;
940 return stack_.template Push<Ch>(count);
941 }
942
943 size_t Length() const { return length_; }
944
945 Ch* Pop() {
946 return stack_.template Pop<Ch>(length_);
947 }
948
949 private:
950 StackStream(const StackStream&);
951 StackStream& operator=(const StackStream&);
952
953 internal::Stack<StackAllocator>& stack_;
954 SizeType length_;
955 };
956
957 // Parse string and generate String event. Different code paths for kParseInsituFlag.
958 template<unsigned parseFlags, typename InputStream, typename Handler>
959 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
960 internal::StreamLocalCopy<InputStream> copy(is);
961 InputStream& s(copy.s);
962
963 RAPIDJSON_ASSERT(s.Peek() == '\"');
964 s.Take(); // Skip '\"'
965
966 bool success = false;
967 if (parseFlags & kParseInsituFlag) {
968 typename InputStream::Ch *head = s.PutBegin();
969 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
970 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
971 size_t length = s.PutEnd(head) - 1;
972 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
973 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
974 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
975 }
976 else {
977 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
978 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
979 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
980 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
981 const typename TargetEncoding::Ch* const str = stackStream.Pop();
982 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
983 }
984 if (RAPIDJSON_UNLIKELY(!success))
986 }
987
988 // Parse string to an output is
989 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
990 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
991 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
993#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
994 static const char escape[256] = {
995 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
996 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
997 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
998 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
999 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
1000 };
1001#undef Z16
1003
1004 for (;;) {
1005 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
1006 if (!(parseFlags & kParseValidateEncodingFlag))
1007 ScanCopyUnescapedString(is, os);
1008
1009 Ch c = is.Peek();
1010 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
1011 size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset
1012 is.Take();
1013 Ch e = is.Peek();
1014 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
1015 is.Take();
1016 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
1017 }
1018 else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
1019 is.Take();
1020 os.Put('\'');
1021 }
1022 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
1023 is.Take();
1024 unsigned codepoint = ParseHex4(is, escapeOffset);
1025 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1026 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
1027 // Handle UTF-16 surrogate pair
1028 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1030 unsigned codepoint2 = ParseHex4(is, escapeOffset);
1031 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1032 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1034 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1035 }
1036 TEncoding::Encode(os, codepoint);
1037 }
1038 else
1040 }
1041 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
1042 is.Take();
1043 os.Put('\0'); // null-terminate the string
1044 return;
1045 }
1046 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
1047 if (c == '\0')
1049 else
1051 }
1052 else {
1053 size_t offset = is.Tell();
1058 }
1059 }
1060 }
1061
1062 template<typename InputStream, typename OutputStream>
1063 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
1064 // Do nothing for generic version
1065 }
1066
1067#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
1068 // StringStream -> StackStream<char>
1069 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1070 const char* p = is.src_;
1071
1072 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1073 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1074 while (p != nextAligned)
1075 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1076 is.src_ = p;
1077 return;
1078 }
1079 else
1080 os.Put(*p++);
1081
1082 // The rest of string using SIMD
1083 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1084 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1085 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1086 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(dquote.data()));
1087 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bslash.data()));
1088 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(space.data()));
1089
1090 for (;; p += 16) {
1091 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1092 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1093 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1094 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1095 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1096 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1097 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1098 SizeType length;
1099 #ifdef _MSC_VER // Find the index of first escaped
1100 unsigned long offset;
1101 _BitScanForward(&offset, r);
1102 length = offset;
1103 #else
1104 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1105 #endif
1106 if (length != 0) {
1107 char* q = reinterpret_cast<char*>(os.Push(length));
1108 for (size_t i = 0; i < length; i++)
1109 q[i] = p[i];
1110
1111 p += length;
1112 }
1113 break;
1114 }
1115 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1116 }
1117
1118 is.src_ = p;
1119 }
1120
1121 // InsituStringStream -> InsituStringStream
1122 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1123 RAPIDJSON_ASSERT(&is == &os);
1124 (void)os;
1125
1126 if (is.src_ == is.dst_) {
1127 SkipUnescapedString(is);
1128 return;
1129 }
1130
1131 char* p = is.src_;
1132 char *q = is.dst_;
1133
1134 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1135 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1136 while (p != nextAligned)
1137 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1138 is.src_ = p;
1139 is.dst_ = q;
1140 return;
1141 }
1142 else
1143 *q++ = *p++;
1144
1145 // The rest of string using SIMD
1146 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1147 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1148 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1149 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(dquote.data()));
1150 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bslash.data()));
1151 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(space.data()));
1152
1153 for (;; p += 16, q += 16) {
1154 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1155 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1156 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1157 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1158 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1159 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1160 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1161 size_t length;
1162#ifdef _MSC_VER // Find the index of first escaped
1163 unsigned long offset;
1164 _BitScanForward(&offset, r);
1165 length = offset;
1166#else
1167 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1168#endif
1169 for (const char* pend = p + length; p != pend; )
1170 *q++ = *p++;
1171 break;
1172 }
1173 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1174 }
1175
1176 is.src_ = p;
1177 is.dst_ = q;
1178 }
1179
1180 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1181 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1182 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1183 char* p = is.src_;
1184
1185 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1186 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1187 for (; p != nextAligned; p++)
1188 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1189 is.src_ = is.dst_ = p;
1190 return;
1191 }
1192
1193 // The rest of string using SIMD
1194 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1195 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1196 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1197 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(dquote.data()));
1198 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(bslash.data()));
1199 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(space.data()));
1200
1201 for (;; p += 16) {
1202 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1203 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1204 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1205 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1206 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1207 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1208 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1209 size_t length;
1210#ifdef _MSC_VER // Find the index of first escaped
1211 unsigned long offset;
1212 _BitScanForward(&offset, r);
1213 length = offset;
1214#else
1215 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1216#endif
1217 p += length;
1218 break;
1219 }
1220 }
1221
1222 is.src_ = is.dst_ = p;
1223 }
1224#elif defined(RAPIDJSON_NEON)
1225 // StringStream -> StackStream<char>
1226 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1227 const char* p = is.src_;
1228
1229 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1230 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1231 while (p != nextAligned)
1232 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1233 is.src_ = p;
1234 return;
1235 }
1236 else
1237 os.Put(*p++);
1238
1239 // The rest of string using SIMD
1240 const uint8x16_t s0 = vmovq_n_u8('"');
1241 const uint8x16_t s1 = vmovq_n_u8('\\');
1242 const uint8x16_t s2 = vmovq_n_u8('\b');
1243 const uint8x16_t s3 = vmovq_n_u8(32);
1244
1245 for (;; p += 16) {
1246 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
1247 uint8x16_t x = vceqq_u8(s, s0);
1248 x = vorrq_u8(x, vceqq_u8(s, s1));
1249 x = vorrq_u8(x, vceqq_u8(s, s2));
1250 x = vorrq_u8(x, vcltq_u8(s, s3));
1251
1252 x = vrev64q_u8(x); // Rev in 64
1253 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1254 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1255
1256 SizeType length = 0;
1257 bool escaped = false;
1258 if (low == 0) {
1259 if (high != 0) {
1260 uint32_t lz = internal::clzll(high);
1261 length = 8 + (lz >> 3);
1262 escaped = true;
1263 }
1264 } else {
1265 uint32_t lz = internal::clzll(low);
1266 length = lz >> 3;
1267 escaped = true;
1268 }
1269 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1270 if (length != 0) {
1271 char* q = reinterpret_cast<char*>(os.Push(length));
1272 for (size_t i = 0; i < length; i++)
1273 q[i] = p[i];
1274
1275 p += length;
1276 }
1277 break;
1278 }
1279 vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
1280 }
1281
1282 is.src_ = p;
1283 }
1284
1285 // InsituStringStream -> InsituStringStream
1286 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1287 RAPIDJSON_ASSERT(&is == &os);
1288 (void)os;
1289
1290 if (is.src_ == is.dst_) {
1291 SkipUnescapedString(is);
1292 return;
1293 }
1294
1295 char* p = is.src_;
1296 char *q = is.dst_;
1297
1298 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1299 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1300 while (p != nextAligned)
1301 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1302 is.src_ = p;
1303 is.dst_ = q;
1304 return;
1305 }
1306 else
1307 *q++ = *p++;
1308
1309 // The rest of string using SIMD
1310 const uint8x16_t s0 = vmovq_n_u8('"');
1311 const uint8x16_t s1 = vmovq_n_u8('\\');
1312 const uint8x16_t s2 = vmovq_n_u8('\b');
1313 const uint8x16_t s3 = vmovq_n_u8(32);
1314
1315 for (;; p += 16, q += 16) {
1316 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1317 uint8x16_t x = vceqq_u8(s, s0);
1318 x = vorrq_u8(x, vceqq_u8(s, s1));
1319 x = vorrq_u8(x, vceqq_u8(s, s2));
1320 x = vorrq_u8(x, vcltq_u8(s, s3));
1321
1322 x = vrev64q_u8(x); // Rev in 64
1323 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1324 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1325
1326 SizeType length = 0;
1327 bool escaped = false;
1328 if (low == 0) {
1329 if (high != 0) {
1330 uint32_t lz = internal::clzll(high);
1331 length = 8 + (lz >> 3);
1332 escaped = true;
1333 }
1334 } else {
1335 uint32_t lz = internal::clzll(low);
1336 length = lz >> 3;
1337 escaped = true;
1338 }
1339 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1340 for (const char* pend = p + length; p != pend; ) {
1341 *q++ = *p++;
1342 }
1343 break;
1344 }
1345 vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
1346 }
1347
1348 is.src_ = p;
1349 is.dst_ = q;
1350 }
1351
1352 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1353 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1354 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1355 char* p = is.src_;
1356
1357 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1358 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1359 for (; p != nextAligned; p++)
1360 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1361 is.src_ = is.dst_ = p;
1362 return;
1363 }
1364
1365 // The rest of string using SIMD
1366 const uint8x16_t s0 = vmovq_n_u8('"');
1367 const uint8x16_t s1 = vmovq_n_u8('\\');
1368 const uint8x16_t s2 = vmovq_n_u8('\b');
1369 const uint8x16_t s3 = vmovq_n_u8(32);
1370
1371 for (;; p += 16) {
1372 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1373 uint8x16_t x = vceqq_u8(s, s0);
1374 x = vorrq_u8(x, vceqq_u8(s, s1));
1375 x = vorrq_u8(x, vceqq_u8(s, s2));
1376 x = vorrq_u8(x, vcltq_u8(s, s3));
1377
1378 x = vrev64q_u8(x); // Rev in 64
1379 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1380 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1381
1382 if (low == 0) {
1383 if (high != 0) {
1384 uint32_t lz = internal::clzll(high);
1385 p += 8 + (lz >> 3);
1386 break;
1387 }
1388 } else {
1389 uint32_t lz = internal::clzll(low);
1390 p += lz >> 3;
1391 break;
1392 }
1393 }
1394
1395 is.src_ = is.dst_ = p;
1396 }
1397#endif // RAPIDJSON_NEON
1398
1399 template<typename InputStream, bool backup, bool pushOnTake>
1400 class NumberStream;
1401
1402 template<typename InputStream>
1403 class NumberStream<InputStream, false, false> {
1404 public:
1405 typedef typename InputStream::Ch Ch;
1406
1407 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
1408
1409 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
1410 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
1411 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
1412 RAPIDJSON_FORCEINLINE void Push(char) {}
1413
1414 size_t Tell() { return is.Tell(); }
1415 size_t Length() { return 0; }
1416 const char* Pop() { return 0; }
1417
1418 protected:
1419 NumberStream& operator=(const NumberStream&);
1420
1421 InputStream& is;
1422 };
1423
1424 template<typename InputStream>
1425 class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
1426 typedef NumberStream<InputStream, false, false> Base;
1427 public:
1428 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
1429
1430 RAPIDJSON_FORCEINLINE Ch TakePush() {
1431 stackStream.Put(static_cast<char>(Base::is.Peek()));
1432 return Base::is.Take();
1433 }
1434
1435 RAPIDJSON_FORCEINLINE void Push(char c) {
1436 stackStream.Put(c);
1437 }
1438
1439 size_t Length() { return stackStream.Length(); }
1440
1441 const char* Pop() {
1442 stackStream.Put('\0');
1443 return stackStream.Pop();
1444 }
1445
1446 private:
1447 StackStream<char> stackStream;
1448 };
1449
1450 template<typename InputStream>
1451 class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
1452 typedef NumberStream<InputStream, true, false> Base;
1453 public:
1454 NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
1455
1456 RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1457 };
1458
1459 template<unsigned parseFlags, typename InputStream, typename Handler>
1460 void ParseNumber(InputStream& is, Handler& handler) {
1461 internal::StreamLocalCopy<InputStream> copy(is);
1462 NumberStream<InputStream,
1463 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1464 ((parseFlags & kParseInsituFlag) == 0) :
1465 ((parseFlags & kParseFullPrecisionFlag) != 0),
1466 (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1467 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1468
1469 size_t startOffset = s.Tell();
1470 double d = 0.0;
1471 bool useNanOrInf = false;
1472
1473 // Parse minus
1474 bool minus = Consume(s, '-');
1475
1476 // Parse int: zero / ( digit1-9 *DIGIT )
1477 unsigned i = 0;
1478 uint64_t i64 = 0;
1479 bool use64bit = false;
1480 int significandDigit = 0;
1481 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1482 i = 0;
1483 s.TakePush();
1484 }
1485 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1486 i = static_cast<unsigned>(s.TakePush() - '0');
1487
1488 if (minus)
1489 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1490 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1491 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1492 i64 = i;
1493 use64bit = true;
1494 break;
1495 }
1496 }
1497 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1498 significandDigit++;
1499 }
1500 else
1501 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1502 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1503 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1504 i64 = i;
1505 use64bit = true;
1506 break;
1507 }
1508 }
1509 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1510 significandDigit++;
1511 }
1512 }
1513 // Parse NaN or Infinity here
1514 else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1515 if (Consume(s, 'N')) {
1516 if (Consume(s, 'a') && Consume(s, 'N')) {
1517 d = std::numeric_limits<double>::quiet_NaN();
1518 useNanOrInf = true;
1519 }
1520 }
1521 else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1522 if (Consume(s, 'n') && Consume(s, 'f')) {
1523 d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1524 useNanOrInf = true;
1525
1526 if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1527 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1529 }
1530 }
1531 }
1532
1533 if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1535 }
1536 }
1537 else
1539
1540 // Parse 64bit int
1541 bool useDouble = false;
1542 if (use64bit) {
1543 if (minus)
1544 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1545 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1546 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1547 d = static_cast<double>(i64);
1548 useDouble = true;
1549 break;
1550 }
1551 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1552 significandDigit++;
1553 }
1554 else
1555 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1556 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1557 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1558 d = static_cast<double>(i64);
1559 useDouble = true;
1560 break;
1561 }
1562 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1563 significandDigit++;
1564 }
1565 }
1566
1567 // Force double for big integer
1568 if (useDouble) {
1569 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1570 d = d * 10 + (s.TakePush() - '0');
1571 }
1572 }
1573
1574 // Parse frac = decimal-point 1*DIGIT
1575 int expFrac = 0;
1576 size_t decimalPosition;
1577 if (Consume(s, '.')) {
1578 decimalPosition = s.Length();
1579
1580 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1582
1583 if (!useDouble) {
1584#if RAPIDJSON_64BIT
1585 // Use i64 to store significand in 64-bit architecture
1586 if (!use64bit)
1587 i64 = i;
1588
1589 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1590 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1591 break;
1592 else {
1593 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1594 --expFrac;
1595 if (i64 != 0)
1596 significandDigit++;
1597 }
1598 }
1599
1600 d = static_cast<double>(i64);
1601#else
1602 // Use double to store significand in 32-bit architecture
1603 d = static_cast<double>(use64bit ? i64 : i);
1604#endif
1605 useDouble = true;
1606 }
1607
1608 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1609 if (significandDigit < 17) {
1610 d = d * 10.0 + (s.TakePush() - '0');
1611 --expFrac;
1612 if (RAPIDJSON_LIKELY(d > 0.0))
1613 significandDigit++;
1614 }
1615 else
1616 s.TakePush();
1617 }
1618 }
1619 else
1620 decimalPosition = s.Length(); // decimal position at the end of integer.
1621
1622 // Parse exp = e [ minus / plus ] 1*DIGIT
1623 int exp = 0;
1624 if (Consume(s, 'e') || Consume(s, 'E')) {
1625 if (!useDouble) {
1626 d = static_cast<double>(use64bit ? i64 : i);
1627 useDouble = true;
1628 }
1629
1630 bool expMinus = false;
1631 if (Consume(s, '+'))
1632 ;
1633 else if (Consume(s, '-'))
1634 expMinus = true;
1635
1636 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1637 exp = static_cast<int>(s.Take() - '0');
1638 if (expMinus) {
1639 // (exp + expFrac) must not underflow int => we're detecting when -exp gets
1640 // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
1641 // underflow territory):
1642 //
1643 // -(exp * 10 + 9) + expFrac >= INT_MIN
1644 // <=> exp <= (expFrac - INT_MIN - 9) / 10
1645 RAPIDJSON_ASSERT(expFrac <= 0);
1646 int maxExp = (expFrac + 2147483639) / 10;
1647
1648 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1649 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1650 if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
1651 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1652 s.Take();
1653 }
1654 }
1655 }
1656 else { // positive exp
1657 int maxExp = 308 - expFrac;
1658 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1659 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1660 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1662 }
1663 }
1664 }
1665 else
1667
1668 if (expMinus)
1669 exp = -exp;
1670 }
1671
1672 // Finish parsing, call event according to the type of number.
1673 bool cont = true;
1674
1675 if (parseFlags & kParseNumbersAsStringsFlag) {
1676 if (parseFlags & kParseInsituFlag) {
1677 s.Pop(); // Pop stack no matter if it will be used or not.
1678 typename InputStream::Ch* head = is.PutBegin();
1679 const size_t length = s.Tell() - startOffset;
1680 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1681 // unable to insert the \0 character here, it will erase the comma after this number
1682 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1683 cont = handler.RawNumber(str, SizeType(length), false);
1684 }
1685 else {
1686 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1687 StringStream srcStream(s.Pop());
1688 StackStream<typename TargetEncoding::Ch> dstStream(stack_);
1689 while (numCharsToCopy--) {
1690 Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
1691 }
1692 dstStream.Put('\0');
1693 const typename TargetEncoding::Ch* str = dstStream.Pop();
1694 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1695 cont = handler.RawNumber(str, SizeType(length), true);
1696 }
1697 }
1698 else {
1699 size_t length = s.Length();
1700 const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1701
1702 if (useDouble) {
1703 int p = exp + expFrac;
1704 if (parseFlags & kParseFullPrecisionFlag)
1705 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1706 else
1707 d = internal::StrtodNormalPrecision(d, p);
1708
1709 // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
1710 if (d > (std::numeric_limits<double>::max)()) {
1711 // Overflow
1712 // TODO: internal::StrtodX should report overflow (or underflow)
1714 }
1715
1716 cont = handler.Double(minus ? -d : d);
1717 }
1718 else if (useNanOrInf) {
1719 cont = handler.Double(d);
1720 }
1721 else {
1722 if (use64bit) {
1723 if (minus)
1724 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1725 else
1726 cont = handler.Uint64(i64);
1727 }
1728 else {
1729 if (minus)
1730 cont = handler.Int(static_cast<int32_t>(~i + 1));
1731 else
1732 cont = handler.Uint(i);
1733 }
1734 }
1735 }
1736 if (RAPIDJSON_UNLIKELY(!cont))
1738 }
1739
1740 // Parse any JSON value
1741 template<unsigned parseFlags, typename InputStream, typename Handler>
1742 void ParseValue(InputStream& is, Handler& handler) {
1743 switch (is.Peek()) {
1744 case 'n': ParseNull <parseFlags>(is, handler); break;
1745 case 't': ParseTrue <parseFlags>(is, handler); break;
1746 case 'f': ParseFalse <parseFlags>(is, handler); break;
1747 case '"': ParseString<parseFlags>(is, handler); break;
1748 case '{': ParseObject<parseFlags>(is, handler); break;
1749 case '[': ParseArray <parseFlags>(is, handler); break;
1750 default :
1751 ParseNumber<parseFlags>(is, handler);
1752 break;
1753
1754 }
1755 }
1756
1757 // Iterative Parsing
1758
1759 // States
1760 enum IterativeParsingState {
1761 IterativeParsingFinishState = 0, // sink states at top
1762 IterativeParsingErrorState, // sink states at top
1763 IterativeParsingStartState,
1764
1765 // Object states
1766 IterativeParsingObjectInitialState,
1767 IterativeParsingMemberKeyState,
1768 IterativeParsingMemberValueState,
1769 IterativeParsingObjectFinishState,
1770
1771 // Array states
1772 IterativeParsingArrayInitialState,
1773 IterativeParsingElementState,
1774 IterativeParsingArrayFinishState,
1775
1776 // Single value state
1777 IterativeParsingValueState,
1778
1779 // Delimiter states (at bottom)
1780 IterativeParsingElementDelimiterState,
1781 IterativeParsingMemberDelimiterState,
1782 IterativeParsingKeyValueDelimiterState,
1783
1784 cIterativeParsingStateCount
1785 };
1786
1787 // Tokens
1788 enum Token {
1789 LeftBracketToken = 0,
1790 RightBracketToken,
1791
1792 LeftCurlyBracketToken,
1793 RightCurlyBracketToken,
1794
1795 CommaToken,
1796 ColonToken,
1797
1798 StringToken,
1799 FalseToken,
1800 TrueToken,
1801 NullToken,
1802 NumberToken,
1803
1804 kTokenCount
1805 };
1806
1807 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
1808
1810#define N NumberToken
1811#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1812 // Maps from ASCII to Token
1813 static const unsigned char tokenMap[256] = {
1814 N16, // 00~0F
1815 N16, // 10~1F
1816 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1817 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1818 N16, // 40~4F
1819 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1820 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1821 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1822 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1823 };
1824#undef N
1825#undef N16
1827
1828 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1829 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1830 else
1831 return NumberToken;
1832 }
1833
1834 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
1835 // current state x one lookahead token -> new state
1836 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1837 // Finish(sink state)
1838 {
1839 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1840 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1841 IterativeParsingErrorState
1842 },
1843 // Error(sink state)
1844 {
1845 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1846 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1847 IterativeParsingErrorState
1848 },
1849 // Start
1850 {
1851 IterativeParsingArrayInitialState, // Left bracket
1852 IterativeParsingErrorState, // Right bracket
1853 IterativeParsingObjectInitialState, // Left curly bracket
1854 IterativeParsingErrorState, // Right curly bracket
1855 IterativeParsingErrorState, // Comma
1856 IterativeParsingErrorState, // Colon
1857 IterativeParsingValueState, // String
1858 IterativeParsingValueState, // False
1859 IterativeParsingValueState, // True
1860 IterativeParsingValueState, // Null
1861 IterativeParsingValueState // Number
1862 },
1863 // ObjectInitial
1864 {
1865 IterativeParsingErrorState, // Left bracket
1866 IterativeParsingErrorState, // Right bracket
1867 IterativeParsingErrorState, // Left curly bracket
1868 IterativeParsingObjectFinishState, // Right curly bracket
1869 IterativeParsingErrorState, // Comma
1870 IterativeParsingErrorState, // Colon
1871 IterativeParsingMemberKeyState, // String
1872 IterativeParsingErrorState, // False
1873 IterativeParsingErrorState, // True
1874 IterativeParsingErrorState, // Null
1875 IterativeParsingErrorState // Number
1876 },
1877 // MemberKey
1878 {
1879 IterativeParsingErrorState, // Left bracket
1880 IterativeParsingErrorState, // Right bracket
1881 IterativeParsingErrorState, // Left curly bracket
1882 IterativeParsingErrorState, // Right curly bracket
1883 IterativeParsingErrorState, // Comma
1884 IterativeParsingKeyValueDelimiterState, // Colon
1885 IterativeParsingErrorState, // String
1886 IterativeParsingErrorState, // False
1887 IterativeParsingErrorState, // True
1888 IterativeParsingErrorState, // Null
1889 IterativeParsingErrorState // Number
1890 },
1891 // MemberValue
1892 {
1893 IterativeParsingErrorState, // Left bracket
1894 IterativeParsingErrorState, // Right bracket
1895 IterativeParsingErrorState, // Left curly bracket
1896 IterativeParsingObjectFinishState, // Right curly bracket
1897 IterativeParsingMemberDelimiterState, // Comma
1898 IterativeParsingErrorState, // Colon
1899 IterativeParsingErrorState, // String
1900 IterativeParsingErrorState, // False
1901 IterativeParsingErrorState, // True
1902 IterativeParsingErrorState, // Null
1903 IterativeParsingErrorState // Number
1904 },
1905 // ObjectFinish(sink state)
1906 {
1907 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1908 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1909 IterativeParsingErrorState
1910 },
1911 // ArrayInitial
1912 {
1913 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1914 IterativeParsingArrayFinishState, // Right bracket
1915 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1916 IterativeParsingErrorState, // Right curly bracket
1917 IterativeParsingErrorState, // Comma
1918 IterativeParsingErrorState, // Colon
1919 IterativeParsingElementState, // String
1920 IterativeParsingElementState, // False
1921 IterativeParsingElementState, // True
1922 IterativeParsingElementState, // Null
1923 IterativeParsingElementState // Number
1924 },
1925 // Element
1926 {
1927 IterativeParsingErrorState, // Left bracket
1928 IterativeParsingArrayFinishState, // Right bracket
1929 IterativeParsingErrorState, // Left curly bracket
1930 IterativeParsingErrorState, // Right curly bracket
1931 IterativeParsingElementDelimiterState, // Comma
1932 IterativeParsingErrorState, // Colon
1933 IterativeParsingErrorState, // String
1934 IterativeParsingErrorState, // False
1935 IterativeParsingErrorState, // True
1936 IterativeParsingErrorState, // Null
1937 IterativeParsingErrorState // Number
1938 },
1939 // ArrayFinish(sink state)
1940 {
1941 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1942 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1943 IterativeParsingErrorState
1944 },
1945 // Single Value (sink state)
1946 {
1947 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1948 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1949 IterativeParsingErrorState
1950 },
1951 // ElementDelimiter
1952 {
1953 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1954 IterativeParsingArrayFinishState, // Right bracket
1955 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1956 IterativeParsingErrorState, // Right curly bracket
1957 IterativeParsingErrorState, // Comma
1958 IterativeParsingErrorState, // Colon
1959 IterativeParsingElementState, // String
1960 IterativeParsingElementState, // False
1961 IterativeParsingElementState, // True
1962 IterativeParsingElementState, // Null
1963 IterativeParsingElementState // Number
1964 },
1965 // MemberDelimiter
1966 {
1967 IterativeParsingErrorState, // Left bracket
1968 IterativeParsingErrorState, // Right bracket
1969 IterativeParsingErrorState, // Left curly bracket
1970 IterativeParsingObjectFinishState, // Right curly bracket
1971 IterativeParsingErrorState, // Comma
1972 IterativeParsingErrorState, // Colon
1973 IterativeParsingMemberKeyState, // String
1974 IterativeParsingErrorState, // False
1975 IterativeParsingErrorState, // True
1976 IterativeParsingErrorState, // Null
1977 IterativeParsingErrorState // Number
1978 },
1979 // KeyValueDelimiter
1980 {
1981 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1982 IterativeParsingErrorState, // Right bracket
1983 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1984 IterativeParsingErrorState, // Right curly bracket
1985 IterativeParsingErrorState, // Comma
1986 IterativeParsingErrorState, // Colon
1987 IterativeParsingMemberValueState, // String
1988 IterativeParsingMemberValueState, // False
1989 IterativeParsingMemberValueState, // True
1990 IterativeParsingMemberValueState, // Null
1991 IterativeParsingMemberValueState // Number
1992 },
1993 }; // End of G
1994
1995 return static_cast<IterativeParsingState>(G[state][token]);
1996 }
1997
1998 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1999 // May return a new state on state pop.
2000 template <unsigned parseFlags, typename InputStream, typename Handler>
2001 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
2002 (void)token;
2003
2004 switch (dst) {
2005 case IterativeParsingErrorState:
2006 return dst;
2007
2008 case IterativeParsingObjectInitialState:
2009 case IterativeParsingArrayInitialState:
2010 {
2011 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
2012 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
2013 IterativeParsingState n = src;
2014 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
2015 n = IterativeParsingElementState;
2016 else if (src == IterativeParsingKeyValueDelimiterState)
2017 n = IterativeParsingMemberValueState;
2018 // Push current state.
2019 *stack_.template Push<SizeType>(1) = n;
2020 // Initialize and push the member/element count.
2021 *stack_.template Push<SizeType>(1) = 0;
2022 // Call handler
2023 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
2024 // On handler short circuits the parsing.
2025 if (!hr) {
2027 return IterativeParsingErrorState;
2028 }
2029 else {
2030 is.Take();
2031 return dst;
2032 }
2033 }
2034
2035 case IterativeParsingMemberKeyState:
2036 ParseString<parseFlags>(is, handler, true);
2037 if (HasParseError())
2038 return IterativeParsingErrorState;
2039 else
2040 return dst;
2041
2042 case IterativeParsingKeyValueDelimiterState:
2043 RAPIDJSON_ASSERT(token == ColonToken);
2044 is.Take();
2045 return dst;
2046
2047 case IterativeParsingMemberValueState:
2048 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2049 ParseValue<parseFlags>(is, handler);
2050 if (HasParseError()) {
2051 return IterativeParsingErrorState;
2052 }
2053 return dst;
2054
2055 case IterativeParsingElementState:
2056 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2057 ParseValue<parseFlags>(is, handler);
2058 if (HasParseError()) {
2059 return IterativeParsingErrorState;
2060 }
2061 return dst;
2062
2063 case IterativeParsingMemberDelimiterState:
2064 case IterativeParsingElementDelimiterState:
2065 is.Take();
2066 // Update member/element count.
2067 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
2068 return dst;
2069
2070 case IterativeParsingObjectFinishState:
2071 {
2072 // Transit from delimiter is only allowed when trailing commas are enabled
2073 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
2075 return IterativeParsingErrorState;
2076 }
2077 // Get member count.
2078 SizeType c = *stack_.template Pop<SizeType>(1);
2079 // If the object is not empty, count the last member.
2080 if (src == IterativeParsingMemberValueState)
2081 ++c;
2082 // Restore the state.
2083 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2084 // Transit to Finish state if this is the topmost scope.
2085 if (n == IterativeParsingStartState)
2086 n = IterativeParsingFinishState;
2087 // Call handler
2088 bool hr = handler.EndObject(c);
2089 // On handler short circuits the parsing.
2090 if (!hr) {
2092 return IterativeParsingErrorState;
2093 }
2094 else {
2095 is.Take();
2096 return n;
2097 }
2098 }
2099
2100 case IterativeParsingArrayFinishState:
2101 {
2102 // Transit from delimiter is only allowed when trailing commas are enabled
2103 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
2105 return IterativeParsingErrorState;
2106 }
2107 // Get element count.
2108 SizeType c = *stack_.template Pop<SizeType>(1);
2109 // If the array is not empty, count the last element.
2110 if (src == IterativeParsingElementState)
2111 ++c;
2112 // Restore the state.
2113 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2114 // Transit to Finish state if this is the topmost scope.
2115 if (n == IterativeParsingStartState)
2116 n = IterativeParsingFinishState;
2117 // Call handler
2118 bool hr = handler.EndArray(c);
2119 // On handler short circuits the parsing.
2120 if (!hr) {
2122 return IterativeParsingErrorState;
2123 }
2124 else {
2125 is.Take();
2126 return n;
2127 }
2128 }
2129
2130 default:
2131 // This branch is for IterativeParsingValueState actually.
2132 // Use `default:` rather than
2133 // `case IterativeParsingValueState:` is for code coverage.
2134
2135 // The IterativeParsingStartState is not enumerated in this switch-case.
2136 // It is impossible for that case. And it can be caught by following assertion.
2137
2138 // The IterativeParsingFinishState is not enumerated in this switch-case either.
2139 // It is a "derivative" state which cannot triggered from Predict() directly.
2140 // Therefore it cannot happen here. And it can be caught by following assertion.
2141 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
2142
2143 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2144 ParseValue<parseFlags>(is, handler);
2145 if (HasParseError()) {
2146 return IterativeParsingErrorState;
2147 }
2148 return IterativeParsingFinishState;
2149 }
2150 }
2151
2152 template <typename InputStream>
2153 void HandleError(IterativeParsingState src, InputStream& is) {
2154 if (HasParseError()) {
2155 // Error flag has been set.
2156 return;
2157 }
2158
2159 switch (src) {
2160 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
2161 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
2162 case IterativeParsingObjectInitialState:
2163 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
2164 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
2165 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
2166 case IterativeParsingKeyValueDelimiterState:
2167 case IterativeParsingArrayInitialState:
2168 case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
2169 default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
2170 }
2171 }
2172
2173 RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
2174 return s >= IterativeParsingElementDelimiterState;
2175 }
2176
2177 RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
2178 return s <= IterativeParsingErrorState;
2179 }
2180
2181 template <unsigned parseFlags, typename InputStream, typename Handler>
2182 ParseResult IterativeParse(InputStream& is, Handler& handler) {
2183 parseResult_.Clear();
2184 ClearStackOnExit scope(*this);
2185 IterativeParsingState state = IterativeParsingStartState;
2186
2187 SkipWhitespaceAndComments<parseFlags>(is);
2188 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2189 while (is.Peek() != '\0') {
2190 Token t = Tokenize(is.Peek());
2191 IterativeParsingState n = Predict(state, t);
2192 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
2193
2194 if (d == IterativeParsingErrorState) {
2195 HandleError(state, is);
2196 break;
2197 }
2198
2199 state = d;
2200
2201 // Do not further consume streams if a root JSON has been parsed.
2202 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
2203 break;
2204
2205 SkipWhitespaceAndComments<parseFlags>(is);
2206 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2207 }
2208
2209 // Handle the end of file.
2210 if (state != IterativeParsingFinishState)
2211 HandleError(state, is);
2212
2213 return parseResult_;
2214 }
2215
2216 static const size_t kDefaultStackCapacity = 256;
2217 internal::Stack<StackAllocator> stack_;
2218 ParseResult parseResult_;
2219 IterativeParsingState state_;
2220}; // class GenericReader
2221
2224
2226
2227#if defined(__clang__) || defined(_MSC_VER)
2228RAPIDJSON_DIAG_POP
2229#endif
2230
2231
2232#ifdef __GNUC__
2233RAPIDJSON_DIAG_POP
2234#endif
2235
2236#endif // RAPIDJSON_READER_H_
Input byte stream wrapper with a statically bound encoding.
SAX-style JSON parser. Use Reader for UTF8 encoding and default allocator.
Definition reader.h:539
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text.
Definition reader.h:559
bool IterativeParseNext(InputStream &is, Handler &handler)
Parse one token from JSON text.
Definition reader.h:620
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text (with kParseDefaultFlags)
Definition reader.h:600
void IterativeParseInit()
Initialize JSON text token-by-token parsing.
Definition reader.h:607
ParseErrorCode GetParseErrorCode() const
Get the ParseErrorCode of last parsing.
Definition reader.h:685
RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const
Check if token-by-token parsing JSON text is complete.
Definition reader.h:677
GenericReader(StackAllocator *stackAllocator=0, size_t stackCapacity=kDefaultStackCapacity)
Constructor.
Definition reader.h:547
bool HasParseError() const
Whether a parse error has occurred in the last parsing.
Definition reader.h:682
size_t GetErrorOffset() const
Get the position of last parsing error in input, 0 otherwise.
Definition reader.h:688
Concept for receiving events from GenericReader upon parsing. The functions return true if no error o...
Concept for reading and writing characters.
#define RAPIDJSON_LIKELY(x)
Compiler branching hint for expression with high probability to be true.
Definition rapidjson.h:463
#define RAPIDJSON_UNLIKELY(x)
Compiler branching hint for expression with low probability to be true.
Definition rapidjson.h:476
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:406
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset)
Macro to indicate a parse error.
Definition reader.h:100
ParseErrorCode
Error code of parsing.
Definition error.h:64
#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset)
(Internal) macro to indicate and handle a parse error.
Definition reader.h:119
@ kParseErrorDocumentEmpty
The document is empty.
Definition error.h:67
@ kParseErrorNumberMissFraction
Miss fraction part in number.
Definition error.h:85
@ kParseErrorStringInvalidEncoding
Invalid encoding in string.
Definition error.h:82
@ kParseErrorValueInvalid
Invalid value.
Definition error.h:70
@ kParseErrorDocumentRootNotSingular
The document root must not follow by other values.
Definition error.h:68
@ kParseErrorUnspecificSyntaxError
Unspecific syntax error.
Definition error.h:89
@ kParseErrorObjectMissCommaOrCurlyBracket
Missing a comma or '}' after an object member.
Definition error.h:74
@ kParseErrorObjectMissColon
Missing a colon after a name of object member.
Definition error.h:73
@ kParseErrorStringMissQuotationMark
Missing a closing quotation mark in string.
Definition error.h:81
@ kParseErrorTermination
Parsing was terminated.
Definition error.h:88
@ kParseErrorNumberMissExponent
Miss exponent in number.
Definition error.h:86
@ kParseErrorStringEscapeInvalid
Invalid escape character in string.
Definition error.h:80
@ kParseErrorArrayMissCommaOrSquareBracket
Missing a comma or ']' after an array element.
Definition error.h:76
@ kParseErrorStringUnicodeSurrogateInvalid
The surrogate pair in string is invalid.
Definition error.h:79
@ kParseErrorObjectMissName
Missing a name for object member.
Definition error.h:72
@ kParseErrorNumberTooBig
Number too big to be stored in double.
Definition error.h:84
@ kParseErrorStringUnicodeEscapeInvalidHex
Incorrect hex digit after \u escape in string.
Definition error.h:78
Type
Type of JSON value.
Definition rapidjson.h:664
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition rapidjson.h:384
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition rapidjson.h:289
void SkipWhitespace(InputStream &is)
Skip the JSON white spaces in a stream.
Definition reader.h:266
ParseFlag
Combination of parseFlags.
Definition reader.h:146
@ kParseFullPrecisionFlag
Parse number in full precision (but slower).
Definition reader.h:152
@ kParseInsituFlag
In-situ(destructive) parsing.
Definition reader.h:148
@ kParseNoFlags
No flags are set.
Definition reader.h:147
@ kParseCommentsFlag
Allow one-line (//) and multi-line (/‍**/) comments.
Definition reader.h:153
@ kParseEscapedApostropheFlag
Allow escaped apostrophe in strings.
Definition reader.h:157
@ kParseDefaultFlags
Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS.
Definition reader.h:158
@ kParseTrailingCommasFlag
Allow trailing commas at the end of objects and arrays.
Definition reader.h:155
@ kParseNanAndInfFlag
Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
Definition reader.h:156
@ kParseValidateEncodingFlag
Validate encoding of JSON strings.
Definition reader.h:149
@ kParseNumbersAsStringsFlag
Parse all numbers (ints/doubles) as strings.
Definition reader.h:154
@ kParseIterativeFlag
Iterative(constant complexity in terms of function call stack size) parsing.
Definition reader.h:150
@ kParseStopWhenDoneFlag
After parsing a complete JSON root from stream, stop further processing the rest of stream....
Definition reader.h:151
Default implementation of Handler.
Definition reader.h:198
bool RawNumber(const Ch *str, SizeType len, bool copy)
enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
Definition reader.h:212
const Ch * src_
Current read position.
Definition stream.h:168
Represents an in-memory input byte stream.
Result of parsing (wraps ParseErrorCode)
Definition error.h:106
void Set(ParseErrorCode code, size_t offset=0)
Update error code and offset.
Definition error.h:136
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition encodings.h:661
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition encodings.h:680
UTF-8 encoding.
Definition encodings.h:96