bes Updated for version 3.21.1
The Backend Server (BES) is the lower two tiers of the Hyrax data server
pugixml.cpp
1
13
14#ifndef SOURCE_PUGIXML_CPP
15#define SOURCE_PUGIXML_CPP
16
17#include "pugixml.hpp"
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24
25#ifdef PUGIXML_WCHAR_MODE
26# include <wchar.h>
27#endif
28
29#ifndef PUGIXML_NO_XPATH
30# include <math.h>
31# include <float.h>
32#endif
33
34#ifndef PUGIXML_NO_STL
35# include <istream>
36# include <ostream>
37# include <string>
38#endif
39
40// For placement new
41#include <new>
42
43#ifdef _MSC_VER
44# pragma warning(push)
45# pragma warning(disable: 4127) // conditional expression is constant
46# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47# pragma warning(disable: 4702) // unreachable code
48# pragma warning(disable: 4996) // this function or variable may be unsafe
49#endif
50
51#if defined(_MSC_VER) && defined(__c2__)
52# pragma clang diagnostic push
53# pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54#endif
55
56#ifdef __INTEL_COMPILER
57# pragma warning(disable: 177) // function was declared but never referenced
58# pragma warning(disable: 279) // controlling expression is constant
59# pragma warning(disable: 1478 1786) // function was declared "deprecated"
60# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61#endif
62
63#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65#endif
66
67#ifdef __BORLANDC__
68# pragma option push
69# pragma warn -8008 // condition is always false
70# pragma warn -8066 // unreachable code
71#endif
72
73#ifdef __SNC__
74// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75# pragma diag_suppress=178 // function was declared but never referenced
76# pragma diag_suppress=237 // controlling expression is constant
77#endif
78
79#ifdef __TI_COMPILER_VERSION__
80# pragma diag_suppress 179 // function was declared but never referenced
81#endif
82
83// Inlining controls
84#if defined(_MSC_VER) && _MSC_VER >= 1300
85# define PUGI__NO_INLINE __declspec(noinline)
86#elif defined(__GNUC__)
87# define PUGI__NO_INLINE __attribute__((noinline))
88#else
89# define PUGI__NO_INLINE
90#endif
91
92// Branch weight controls
93#if defined(__GNUC__) && !defined(__c2__)
94# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95#else
96# define PUGI__UNLIKELY(cond) (cond)
97#endif
98
99// Simple static assertion
100#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101
102// Digital Mars C++ bug workaround for passing char loaded from memory via stack
103#ifdef __DMC__
104# define PUGI__DMC_VOLATILE volatile
105#else
106# define PUGI__DMC_VOLATILE
107#endif
108
109// Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110#if defined(__clang__) && defined(__has_attribute)
111# if __has_attribute(no_sanitize)
112# define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113# else
114# define PUGI__UNSIGNED_OVERFLOW
115# endif
116#else
117# define PUGI__UNSIGNED_OVERFLOW
118#endif
119
120// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122using std::memcpy;
123using std::memmove;
124using std::memset;
125#endif
126
127// Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128#if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129# define LLONG_MIN (-LLONG_MAX - 1LL)
130# define LLONG_MAX __LONG_LONG_MAX__
131# define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132#endif
133
134// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135#if defined(_MSC_VER) && !defined(__S3E__)
136# define PUGI__MSVC_CRT_VERSION _MSC_VER
137#endif
138
139// Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140#if __cplusplus >= 201103
141# define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142#elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143# define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
144#else
145# define PUGI__SNPRINTF sprintf
146#endif
147
148// We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149#ifdef PUGIXML_HEADER_ONLY
150# define PUGI__NS_BEGIN namespace pugi { namespace impl {
151# define PUGI__NS_END } }
152# define PUGI__FN inline
153# define PUGI__FN_NO_INLINE inline
154#else
155# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156# define PUGI__NS_BEGIN namespace pugi { namespace impl {
157# define PUGI__NS_END } }
158# else
159# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160# define PUGI__NS_END } } }
161# endif
162# define PUGI__FN
163# define PUGI__FN_NO_INLINE PUGI__NO_INLINE
164#endif
165
166// uintptr_t
167#if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
168namespace pugi
169{
170# ifndef _UINTPTR_T_DEFINED
171 typedef size_t uintptr_t;
172# endif
173
174 typedef unsigned __int8 uint8_t;
175 typedef unsigned __int16 uint16_t;
176 typedef unsigned __int32 uint32_t;
177}
178#else
179# include <stdint.h>
180#endif
181
182// Memory allocation
183PUGI__NS_BEGIN
184 PUGI__FN void* default_allocate(size_t size)
185 {
186 return malloc(size);
187 }
188
189 PUGI__FN void default_deallocate(void* ptr)
190 {
191 free(ptr);
192 }
193
194 template <typename T>
195 struct xml_memory_management_function_storage
196 {
197 static allocation_function allocate;
198 static deallocation_function deallocate;
199 };
200
201 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202 // Without a template<> we'll get multiple definitions of the same static
203 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
205
206 typedef xml_memory_management_function_storage<int> xml_memory;
207PUGI__NS_END
208
209// String utilities
210PUGI__NS_BEGIN
211 // Get string length
212 PUGI__FN size_t strlength(const char_t* s)
213 {
214 assert(s);
215
216 #ifdef PUGIXML_WCHAR_MODE
217 return wcslen(s);
218 #else
219 return strlen(s);
220 #endif
221 }
222
223 // Compare two strings
224 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
225 {
226 assert(src && dst);
227
228 #ifdef PUGIXML_WCHAR_MODE
229 return wcscmp(src, dst) == 0;
230 #else
231 return strcmp(src, dst) == 0;
232 #endif
233 }
234
235 // Compare lhs with [rhs_begin, rhs_end)
236 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
237 {
238 for (size_t i = 0; i < count; ++i)
239 if (lhs[i] != rhs[i])
240 return false;
241
242 return lhs[count] == 0;
243 }
244
245 // Get length of wide string, even if CRT lacks wide character support
246 PUGI__FN size_t strlength_wide(const wchar_t* s)
247 {
248 assert(s);
249
250 #ifdef PUGIXML_WCHAR_MODE
251 return wcslen(s);
252 #else
253 const wchar_t* end = s;
254 while (*end) end++;
255 return static_cast<size_t>(end - s);
256 #endif
257 }
258PUGI__NS_END
259
260// auto_ptr-like object for exception recovery
261PUGI__NS_BEGIN
262 template <typename T> struct auto_deleter
263 {
264 typedef void (*D)(T*);
265
266 T* data;
267 D deleter;
268
269 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
270 {
271 }
272
273 ~auto_deleter()
274 {
275 if (data) deleter(data);
276 }
277
278 T* release()
279 {
280 T* result = data;
281 data = 0;
282 return result;
283 }
284 };
285PUGI__NS_END
286
287#ifdef PUGIXML_COMPACT
288PUGI__NS_BEGIN
289 class compact_hash_table
290 {
291 public:
292 compact_hash_table(): _items(0), _capacity(0), _count(0)
293 {
294 }
295
296 void clear()
297 {
298 if (_items)
299 {
300 xml_memory::deallocate(_items);
301 _items = 0;
302 _capacity = 0;
303 _count = 0;
304 }
305 }
306
307 void* find(const void* key)
308 {
309 if (_capacity == 0) return 0;
310
311 item_t* item = get_item(key);
312 assert(item);
313 assert(item->key == key || (item->key == 0 && item->value == 0));
314
315 return item->value;
316 }
317
318 void insert(const void* key, void* value)
319 {
320 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
321
322 item_t* item = get_item(key);
323 assert(item);
324
325 if (item->key == 0)
326 {
327 _count++;
328 item->key = key;
329 }
330
331 item->value = value;
332 }
333
334 bool reserve(size_t extra = 16)
335 {
336 if (_count + extra >= _capacity - _capacity / 4)
337 return rehash(_count + extra);
338
339 return true;
340 }
341
342 private:
343 struct item_t
344 {
345 const void* key;
346 void* value;
347 };
348
349 item_t* _items;
350 size_t _capacity;
351
352 size_t _count;
353
354 bool rehash(size_t count);
355
356 item_t* get_item(const void* key)
357 {
358 assert(key);
359 assert(_capacity > 0);
360
361 size_t hashmod = _capacity - 1;
362 size_t bucket = hash(key) & hashmod;
363
364 for (size_t probe = 0; probe <= hashmod; ++probe)
365 {
366 item_t& probe_item = _items[bucket];
367
368 if (probe_item.key == key || probe_item.key == 0)
369 return &probe_item;
370
371 // hash collision, quadratic probing
372 bucket = (bucket + probe + 1) & hashmod;
373 }
374
375 assert(false && "Hash table is full"); // unreachable
376 return 0;
377 }
378
379 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
380 {
381 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
382
383 // MurmurHash3 32-bit finalizer
384 h ^= h >> 16;
385 h *= 0x85ebca6bu;
386 h ^= h >> 13;
387 h *= 0xc2b2ae35u;
388 h ^= h >> 16;
389
390 return h;
391 }
392 };
393
394 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
395 {
396 size_t capacity = 32;
397 while (count >= capacity - capacity / 4)
398 capacity *= 2;
399
400 compact_hash_table rt;
401 rt._capacity = capacity;
402 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
403
404 if (!rt._items)
405 return false;
406
407 memset(rt._items, 0, sizeof(item_t) * capacity);
408
409 for (size_t i = 0; i < _capacity; ++i)
410 if (_items[i].key)
411 rt.insert(_items[i].key, _items[i].value);
412
413 if (_items)
414 xml_memory::deallocate(_items);
415
416 _capacity = capacity;
417 _items = rt._items;
418
419 assert(_count == rt._count);
420
421 return true;
422 }
423
424PUGI__NS_END
425#endif
426
427PUGI__NS_BEGIN
428#ifdef PUGIXML_COMPACT
429 static const uintptr_t xml_memory_block_alignment = 4;
430#else
431 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
432#endif
433
434 // extra metadata bits
435 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438 static const uintptr_t xml_memory_page_type_mask = 15;
439
440 // combined masks for string uniqueness
441 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
443
444#ifdef PUGIXML_COMPACT
445 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
447#else
448 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
451#endif
452
453 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
455
456 struct xml_allocator;
457
458 struct xml_memory_page
459 {
460 static xml_memory_page* construct(void* memory)
461 {
462 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
463
464 result->allocator = 0;
465 result->prev = 0;
466 result->next = 0;
467 result->busy_size = 0;
468 result->freed_size = 0;
469
470 #ifdef PUGIXML_COMPACT
471 result->compact_string_base = 0;
472 result->compact_shared_parent = 0;
473 result->compact_page_marker = 0;
474 #endif
475
476 return result;
477 }
478
479 xml_allocator* allocator;
480
481 xml_memory_page* prev;
482 xml_memory_page* next;
483
484 size_t busy_size;
485 size_t freed_size;
486
487 #ifdef PUGIXML_COMPACT
488 char_t* compact_string_base;
489 void* compact_shared_parent;
490 uint32_t* compact_page_marker;
491 #endif
492 };
493
494 static const size_t xml_memory_page_size =
495 #ifdef PUGIXML_MEMORY_PAGE_SIZE
496 (PUGIXML_MEMORY_PAGE_SIZE)
497 #else
498 32768
499 #endif
500 - sizeof(xml_memory_page);
501
502 struct xml_memory_string_header
503 {
504 uint16_t page_offset; // offset from page->data
505 uint16_t full_size; // 0 if string occupies whole page
506 };
507
508 struct xml_allocator
509 {
510 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
511 {
512 #ifdef PUGIXML_COMPACT
513 _hash = 0;
514 #endif
515 }
516
517 xml_memory_page* allocate_page(size_t data_size)
518 {
519 size_t size = sizeof(xml_memory_page) + data_size;
520
521 // allocate block with some alignment, leaving memory for worst-case padding
522 void* memory = xml_memory::allocate(size);
523 if (!memory) return 0;
524
525 // prepare page structure
526 xml_memory_page* page = xml_memory_page::construct(memory);
527 assert(page);
528
529 assert(this == _root->allocator);
530 page->allocator = this;
531
532 return page;
533 }
534
535 static void deallocate_page(xml_memory_page* page)
536 {
537 xml_memory::deallocate(page);
538 }
539
540 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
541
542 void* allocate_memory(size_t size, xml_memory_page*& out_page)
543 {
544 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
545 return allocate_memory_oob(size, out_page);
546
547 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
548
549 _busy_size += size;
550
551 out_page = _root;
552
553 return buf;
554 }
555
556 #ifdef PUGIXML_COMPACT
557 void* allocate_object(size_t size, xml_memory_page*& out_page)
558 {
559 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
560 if (!result) return 0;
561
562 // adjust for marker
563 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
564
565 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
566 {
567 // insert new marker
568 uint32_t* marker = static_cast<uint32_t*>(result);
569
570 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
571 out_page->compact_page_marker = marker;
572
573 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
574 // this will make sure deallocate_memory correctly tracks the size
575 out_page->freed_size += sizeof(uint32_t);
576
577 return marker + 1;
578 }
579 else
580 {
581 // roll back uint32_t part
582 _busy_size -= sizeof(uint32_t);
583
584 return result;
585 }
586 }
587 #else
588 void* allocate_object(size_t size, xml_memory_page*& out_page)
589 {
590 return allocate_memory(size, out_page);
591 }
592 #endif
593
594 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
595 {
596 if (page == _root) page->busy_size = _busy_size;
597
598 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
599 (void)!ptr;
600
601 page->freed_size += size;
602 assert(page->freed_size <= page->busy_size);
603
604 if (page->freed_size == page->busy_size)
605 {
606 if (page->next == 0)
607 {
608 assert(_root == page);
609
610 // top page freed, just reset sizes
611 page->busy_size = 0;
612 page->freed_size = 0;
613
614 #ifdef PUGIXML_COMPACT
615 // reset compact state to maximize efficiency
616 page->compact_string_base = 0;
617 page->compact_shared_parent = 0;
618 page->compact_page_marker = 0;
619 #endif
620
621 _busy_size = 0;
622 }
623 else
624 {
625 assert(_root != page);
626 assert(page->prev);
627
628 // remove from the list
629 page->prev->next = page->next;
630 page->next->prev = page->prev;
631
632 // deallocate
633 deallocate_page(page);
634 }
635 }
636 }
637
638 char_t* allocate_string(size_t length)
639 {
640 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
641
642 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
643
644 // allocate memory for string and header block
645 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
646
647 // round size up to block alignment boundary
648 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
649
650 xml_memory_page* page;
651 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
652
653 if (!header) return 0;
654
655 // setup header
656 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
657
658 assert(page_offset % xml_memory_block_alignment == 0);
659 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
660 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
661
662 // full_size == 0 for large strings that occupy the whole page
663 assert(full_size % xml_memory_block_alignment == 0);
664 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
665 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
666
667 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
668 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
669 return static_cast<char_t*>(static_cast<void*>(header + 1));
670 }
671
672 void deallocate_string(char_t* string)
673 {
674 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
675 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
676
677 // get header
678 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
679 assert(header);
680
681 // deallocate
682 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
683 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
684
685 // if full_size == 0 then this string occupies the whole page
686 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
687
688 deallocate_memory(header, full_size, page);
689 }
690
691 bool reserve()
692 {
693 #ifdef PUGIXML_COMPACT
694 return _hash->reserve();
695 #else
696 return true;
697 #endif
698 }
699
700 xml_memory_page* _root;
701 size_t _busy_size;
702
703 #ifdef PUGIXML_COMPACT
704 compact_hash_table* _hash;
705 #endif
706 };
707
708 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
709 {
710 const size_t large_allocation_threshold = xml_memory_page_size / 4;
711
712 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
713 out_page = page;
714
715 if (!page) return 0;
716
717 if (size <= large_allocation_threshold)
718 {
719 _root->busy_size = _busy_size;
720
721 // insert page at the end of linked list
722 page->prev = _root;
723 _root->next = page;
724 _root = page;
725
726 _busy_size = size;
727 }
728 else
729 {
730 // insert page before the end of linked list, so that it is deleted as soon as possible
731 // the last page is not deleted even if it's empty (see deallocate_memory)
732 assert(_root->prev);
733
734 page->prev = _root->prev;
735 page->next = _root;
736
737 _root->prev->next = page;
738 _root->prev = page;
739
740 page->busy_size = size;
741 }
742
743 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
744 }
745PUGI__NS_END
746
747#ifdef PUGIXML_COMPACT
748PUGI__NS_BEGIN
749 static const uintptr_t compact_alignment_log2 = 2;
750 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
751
752 class compact_header
753 {
754 public:
755 compact_header(xml_memory_page* page, unsigned int flags)
756 {
757 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
758
759 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
760 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
761
762 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
763 _flags = static_cast<unsigned char>(flags);
764 }
765
766 void operator&=(uintptr_t mod)
767 {
768 _flags &= static_cast<unsigned char>(mod);
769 }
770
771 void operator|=(uintptr_t mod)
772 {
773 _flags |= static_cast<unsigned char>(mod);
774 }
775
776 uintptr_t operator&(uintptr_t mod) const
777 {
778 return _flags & mod;
779 }
780
781 xml_memory_page* get_page() const
782 {
783 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
784 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
785 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
786
787 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
788 }
789
790 private:
791 unsigned char _page;
792 unsigned char _flags;
793 };
794
795 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
796 {
797 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
798
799 return header->get_page();
800 }
801
802 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
803 {
804 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
805 }
806
807 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
808 {
809 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
810 }
811
812 template <typename T, int header_offset, int start = -126> class compact_pointer
813 {
814 public:
815 compact_pointer(): _data(0)
816 {
817 }
818
819 void operator=(const compact_pointer& rhs)
820 {
821 *this = rhs + 0;
822 }
823
824 void operator=(T* value)
825 {
826 if (value)
827 {
828 // value is guaranteed to be compact-aligned; 'this' is not
829 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
830 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
831 // compensate for arithmetic shift rounding for negative values
832 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
833 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
834
835 if (static_cast<uintptr_t>(offset) <= 253)
836 _data = static_cast<unsigned char>(offset + 1);
837 else
838 {
839 compact_set_value<header_offset>(this, value);
840
841 _data = 255;
842 }
843 }
844 else
845 _data = 0;
846 }
847
848 operator T*() const
849 {
850 if (_data)
851 {
852 if (_data < 255)
853 {
854 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
855
856 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
857 }
858 else
859 return compact_get_value<header_offset, T>(this);
860 }
861 else
862 return 0;
863 }
864
865 T* operator->() const
866 {
867 return *this;
868 }
869
870 private:
871 unsigned char _data;
872 };
873
874 template <typename T, int header_offset> class compact_pointer_parent
875 {
876 public:
877 compact_pointer_parent(): _data(0)
878 {
879 }
880
881 void operator=(const compact_pointer_parent& rhs)
882 {
883 *this = rhs + 0;
884 }
885
886 void operator=(T* value)
887 {
888 if (value)
889 {
890 // value is guaranteed to be compact-aligned; 'this' is not
891 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
892 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
893 // compensate for arithmetic shift behavior for negative values
894 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
895 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
896
897 if (static_cast<uintptr_t>(offset) <= 65533)
898 {
899 _data = static_cast<unsigned short>(offset + 1);
900 }
901 else
902 {
903 xml_memory_page* page = compact_get_page(this, header_offset);
904
905 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
906 page->compact_shared_parent = value;
907
908 if (page->compact_shared_parent == value)
909 {
910 _data = 65534;
911 }
912 else
913 {
914 compact_set_value<header_offset>(this, value);
915
916 _data = 65535;
917 }
918 }
919 }
920 else
921 {
922 _data = 0;
923 }
924 }
925
926 operator T*() const
927 {
928 if (_data)
929 {
930 if (_data < 65534)
931 {
932 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
933
934 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
935 }
936 else if (_data == 65534)
937 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
938 else
939 return compact_get_value<header_offset, T>(this);
940 }
941 else
942 return 0;
943 }
944
945 T* operator->() const
946 {
947 return *this;
948 }
949
950 private:
951 uint16_t _data;
952 };
953
954 template <int header_offset, int base_offset> class compact_string
955 {
956 public:
957 compact_string(): _data(0)
958 {
959 }
960
961 void operator=(const compact_string& rhs)
962 {
963 *this = rhs + 0;
964 }
965
966 void operator=(char_t* value)
967 {
968 if (value)
969 {
970 xml_memory_page* page = compact_get_page(this, header_offset);
971
972 if (PUGI__UNLIKELY(page->compact_string_base == 0))
973 page->compact_string_base = value;
974
975 ptrdiff_t offset = value - page->compact_string_base;
976
977 if (static_cast<uintptr_t>(offset) < (65535 << 7))
978 {
979 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
980 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
981
982 if (*base == 0)
983 {
984 *base = static_cast<uint16_t>((offset >> 7) + 1);
985 _data = static_cast<unsigned char>((offset & 127) + 1);
986 }
987 else
988 {
989 ptrdiff_t remainder = offset - ((*base - 1) << 7);
990
991 if (static_cast<uintptr_t>(remainder) <= 253)
992 {
993 _data = static_cast<unsigned char>(remainder + 1);
994 }
995 else
996 {
997 compact_set_value<header_offset>(this, value);
998
999 _data = 255;
1000 }
1001 }
1002 }
1003 else
1004 {
1005 compact_set_value<header_offset>(this, value);
1006
1007 _data = 255;
1008 }
1009 }
1010 else
1011 {
1012 _data = 0;
1013 }
1014 }
1015
1016 operator char_t*() const
1017 {
1018 if (_data)
1019 {
1020 if (_data < 255)
1021 {
1022 xml_memory_page* page = compact_get_page(this, header_offset);
1023
1024 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1025 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1026 assert(*base);
1027
1028 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1029
1030 return page->compact_string_base + offset;
1031 }
1032 else
1033 {
1034 return compact_get_value<header_offset, char_t>(this);
1035 }
1036 }
1037 else
1038 return 0;
1039 }
1040
1041 private:
1042 unsigned char _data;
1043 };
1044PUGI__NS_END
1045#endif
1046
1047#ifdef PUGIXML_COMPACT
1048namespace pugi
1049{
1050 struct xml_attribute_struct
1051 {
1052 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1053 {
1054 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1055 }
1056
1057 impl::compact_header header;
1058
1059 uint16_t namevalue_base;
1060
1061 impl::compact_string<4, 2> name;
1062 impl::compact_string<5, 3> value;
1063
1064 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1065 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1066 };
1067
1068 struct xml_node_struct
1069 {
1070 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1071 {
1072 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1073 }
1074
1075 impl::compact_header header;
1076
1077 uint16_t namevalue_base;
1078
1079 impl::compact_string<4, 2> name;
1080 impl::compact_string<5, 3> value;
1081
1082 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1083
1084 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1085
1086 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1087 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1088
1089 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1090 };
1091}
1092#else
1093namespace pugi
1094{
1095 struct xml_attribute_struct
1096 {
1097 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1098 {
1099 header = PUGI__GETHEADER_IMPL(this, page, 0);
1100 }
1101
1102 uintptr_t header;
1103
1104 char_t* name;
1105 char_t* value;
1106
1107 xml_attribute_struct* prev_attribute_c;
1108 xml_attribute_struct* next_attribute;
1109 };
1110
1111 struct xml_node_struct
1112 {
1113 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1114 {
1115 header = PUGI__GETHEADER_IMPL(this, page, type);
1116 }
1117
1118 uintptr_t header;
1119
1120 char_t* name;
1121 char_t* value;
1122
1123 xml_node_struct* parent;
1124
1125 xml_node_struct* first_child;
1126
1127 xml_node_struct* prev_sibling_c;
1128 xml_node_struct* next_sibling;
1129
1130 xml_attribute_struct* first_attribute;
1131 };
1132}
1133#endif
1134
1135PUGI__NS_BEGIN
1136 struct xml_extra_buffer
1137 {
1138 char_t* buffer;
1139 xml_extra_buffer* next;
1140 };
1141
1142 struct xml_document_struct: public xml_node_struct, public xml_allocator
1143 {
1144 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1145 {
1146 }
1147
1148 const char_t* buffer;
1149
1150 xml_extra_buffer* extra_buffers;
1151
1152 #ifdef PUGIXML_COMPACT
1153 compact_hash_table hash;
1154 #endif
1155 };
1156
1157 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1158 {
1159 assert(object);
1160
1161 return *PUGI__GETPAGE(object)->allocator;
1162 }
1163
1164 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1165 {
1166 assert(object);
1167
1168 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1169 }
1170PUGI__NS_END
1171
1172// Low-level DOM operations
1173PUGI__NS_BEGIN
1174 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1175 {
1176 xml_memory_page* page;
1177 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1178 if (!memory) return 0;
1179
1180 return new (memory) xml_attribute_struct(page);
1181 }
1182
1183 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1184 {
1185 xml_memory_page* page;
1186 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1187 if (!memory) return 0;
1188
1189 return new (memory) xml_node_struct(page, type);
1190 }
1191
1192 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1193 {
1194 if (a->header & impl::xml_memory_page_name_allocated_mask)
1195 alloc.deallocate_string(a->name);
1196
1197 if (a->header & impl::xml_memory_page_value_allocated_mask)
1198 alloc.deallocate_string(a->value);
1199
1200 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1201 }
1202
1203 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1204 {
1205 if (n->header & impl::xml_memory_page_name_allocated_mask)
1206 alloc.deallocate_string(n->name);
1207
1208 if (n->header & impl::xml_memory_page_value_allocated_mask)
1209 alloc.deallocate_string(n->value);
1210
1211 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1212 {
1213 xml_attribute_struct* next = attr->next_attribute;
1214
1215 destroy_attribute(attr, alloc);
1216
1217 attr = next;
1218 }
1219
1220 for (xml_node_struct* child = n->first_child; child; )
1221 {
1222 xml_node_struct* next = child->next_sibling;
1223
1224 destroy_node(child, alloc);
1225
1226 child = next;
1227 }
1228
1229 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1230 }
1231
1232 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1233 {
1234 child->parent = node;
1235
1236 xml_node_struct* head = node->first_child;
1237
1238 if (head)
1239 {
1240 xml_node_struct* tail = head->prev_sibling_c;
1241
1242 tail->next_sibling = child;
1243 child->prev_sibling_c = tail;
1244 head->prev_sibling_c = child;
1245 }
1246 else
1247 {
1248 node->first_child = child;
1249 child->prev_sibling_c = child;
1250 }
1251 }
1252
1253 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1254 {
1255 child->parent = node;
1256
1257 xml_node_struct* head = node->first_child;
1258
1259 if (head)
1260 {
1261 child->prev_sibling_c = head->prev_sibling_c;
1262 head->prev_sibling_c = child;
1263 }
1264 else
1265 child->prev_sibling_c = child;
1266
1267 child->next_sibling = head;
1268 node->first_child = child;
1269 }
1270
1271 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1272 {
1273 xml_node_struct* parent = node->parent;
1274
1275 child->parent = parent;
1276
1277 if (node->next_sibling)
1278 node->next_sibling->prev_sibling_c = child;
1279 else
1280 parent->first_child->prev_sibling_c = child;
1281
1282 child->next_sibling = node->next_sibling;
1283 child->prev_sibling_c = node;
1284
1285 node->next_sibling = child;
1286 }
1287
1288 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1289 {
1290 xml_node_struct* parent = node->parent;
1291
1292 child->parent = parent;
1293
1294 if (node->prev_sibling_c->next_sibling)
1295 node->prev_sibling_c->next_sibling = child;
1296 else
1297 parent->first_child = child;
1298
1299 child->prev_sibling_c = node->prev_sibling_c;
1300 child->next_sibling = node;
1301
1302 node->prev_sibling_c = child;
1303 }
1304
1305 inline void remove_node(xml_node_struct* node)
1306 {
1307 xml_node_struct* parent = node->parent;
1308
1309 if (node->next_sibling)
1310 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1311 else
1312 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1313
1314 if (node->prev_sibling_c->next_sibling)
1315 node->prev_sibling_c->next_sibling = node->next_sibling;
1316 else
1317 parent->first_child = node->next_sibling;
1318
1319 node->parent = 0;
1320 node->prev_sibling_c = 0;
1321 node->next_sibling = 0;
1322 }
1323
1324 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1325 {
1326 xml_attribute_struct* head = node->first_attribute;
1327
1328 if (head)
1329 {
1330 xml_attribute_struct* tail = head->prev_attribute_c;
1331
1332 tail->next_attribute = attr;
1333 attr->prev_attribute_c = tail;
1334 head->prev_attribute_c = attr;
1335 }
1336 else
1337 {
1338 node->first_attribute = attr;
1339 attr->prev_attribute_c = attr;
1340 }
1341 }
1342
1343 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1344 {
1345 xml_attribute_struct* head = node->first_attribute;
1346
1347 if (head)
1348 {
1349 attr->prev_attribute_c = head->prev_attribute_c;
1350 head->prev_attribute_c = attr;
1351 }
1352 else
1353 attr->prev_attribute_c = attr;
1354
1355 attr->next_attribute = head;
1356 node->first_attribute = attr;
1357 }
1358
1359 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1360 {
1361 if (place->next_attribute)
1362 place->next_attribute->prev_attribute_c = attr;
1363 else
1364 node->first_attribute->prev_attribute_c = attr;
1365
1366 attr->next_attribute = place->next_attribute;
1367 attr->prev_attribute_c = place;
1368 place->next_attribute = attr;
1369 }
1370
1371 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1372 {
1373 if (place->prev_attribute_c->next_attribute)
1374 place->prev_attribute_c->next_attribute = attr;
1375 else
1376 node->first_attribute = attr;
1377
1378 attr->prev_attribute_c = place->prev_attribute_c;
1379 attr->next_attribute = place;
1380 place->prev_attribute_c = attr;
1381 }
1382
1383 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1384 {
1385 if (attr->next_attribute)
1386 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1387 else
1388 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1389
1390 if (attr->prev_attribute_c->next_attribute)
1391 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1392 else
1393 node->first_attribute = attr->next_attribute;
1394
1395 attr->prev_attribute_c = 0;
1396 attr->next_attribute = 0;
1397 }
1398
1399 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1400 {
1401 if (!alloc.reserve()) return 0;
1402
1403 xml_node_struct* child = allocate_node(alloc, type);
1404 if (!child) return 0;
1405
1406 append_node(child, node);
1407
1408 return child;
1409 }
1410
1411 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1412 {
1413 if (!alloc.reserve()) return 0;
1414
1415 xml_attribute_struct* attr = allocate_attribute(alloc);
1416 if (!attr) return 0;
1417
1418 append_attribute(attr, node);
1419
1420 return attr;
1421 }
1422PUGI__NS_END
1423
1424// Helper classes for code generation
1425PUGI__NS_BEGIN
1426 struct opt_false
1427 {
1428 enum { value = 0 };
1429 };
1430
1431 struct opt_true
1432 {
1433 enum { value = 1 };
1434 };
1435PUGI__NS_END
1436
1437// Unicode utilities
1438PUGI__NS_BEGIN
1439 inline uint16_t endian_swap(uint16_t value)
1440 {
1441 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1442 }
1443
1444 inline uint32_t endian_swap(uint32_t value)
1445 {
1446 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1447 }
1448
1449 struct utf8_counter
1450 {
1451 typedef size_t value_type;
1452
1453 static value_type low(value_type result, uint32_t ch)
1454 {
1455 // U+0000..U+007F
1456 if (ch < 0x80) return result + 1;
1457 // U+0080..U+07FF
1458 else if (ch < 0x800) return result + 2;
1459 // U+0800..U+FFFF
1460 else return result + 3;
1461 }
1462
1463 static value_type high(value_type result, uint32_t)
1464 {
1465 // U+10000..U+10FFFF
1466 return result + 4;
1467 }
1468 };
1469
1470 struct utf8_writer
1471 {
1472 typedef uint8_t* value_type;
1473
1474 static value_type low(value_type result, uint32_t ch)
1475 {
1476 // U+0000..U+007F
1477 if (ch < 0x80)
1478 {
1479 *result = static_cast<uint8_t>(ch);
1480 return result + 1;
1481 }
1482 // U+0080..U+07FF
1483 else if (ch < 0x800)
1484 {
1485 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1486 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1487 return result + 2;
1488 }
1489 // U+0800..U+FFFF
1490 else
1491 {
1492 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1493 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1494 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1495 return result + 3;
1496 }
1497 }
1498
1499 static value_type high(value_type result, uint32_t ch)
1500 {
1501 // U+10000..U+10FFFF
1502 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1503 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1504 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1505 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1506 return result + 4;
1507 }
1508
1509 static value_type any(value_type result, uint32_t ch)
1510 {
1511 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1512 }
1513 };
1514
1515 struct utf16_counter
1516 {
1517 typedef size_t value_type;
1518
1519 static value_type low(value_type result, uint32_t)
1520 {
1521 return result + 1;
1522 }
1523
1524 static value_type high(value_type result, uint32_t)
1525 {
1526 return result + 2;
1527 }
1528 };
1529
1530 struct utf16_writer
1531 {
1532 typedef uint16_t* value_type;
1533
1534 static value_type low(value_type result, uint32_t ch)
1535 {
1536 *result = static_cast<uint16_t>(ch);
1537
1538 return result + 1;
1539 }
1540
1541 static value_type high(value_type result, uint32_t ch)
1542 {
1543 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1544 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1545
1546 result[0] = static_cast<uint16_t>(0xD800 + msh);
1547 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1548
1549 return result + 2;
1550 }
1551
1552 static value_type any(value_type result, uint32_t ch)
1553 {
1554 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1555 }
1556 };
1557
1558 struct utf32_counter
1559 {
1560 typedef size_t value_type;
1561
1562 static value_type low(value_type result, uint32_t)
1563 {
1564 return result + 1;
1565 }
1566
1567 static value_type high(value_type result, uint32_t)
1568 {
1569 return result + 1;
1570 }
1571 };
1572
1573 struct utf32_writer
1574 {
1575 typedef uint32_t* value_type;
1576
1577 static value_type low(value_type result, uint32_t ch)
1578 {
1579 *result = ch;
1580
1581 return result + 1;
1582 }
1583
1584 static value_type high(value_type result, uint32_t ch)
1585 {
1586 *result = ch;
1587
1588 return result + 1;
1589 }
1590
1591 static value_type any(value_type result, uint32_t ch)
1592 {
1593 *result = ch;
1594
1595 return result + 1;
1596 }
1597 };
1598
1599 struct latin1_writer
1600 {
1601 typedef uint8_t* value_type;
1602
1603 static value_type low(value_type result, uint32_t ch)
1604 {
1605 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1606
1607 return result + 1;
1608 }
1609
1610 static value_type high(value_type result, uint32_t ch)
1611 {
1612 (void)ch;
1613
1614 *result = '?';
1615
1616 return result + 1;
1617 }
1618 };
1619
1620 struct utf8_decoder
1621 {
1622 typedef uint8_t type;
1623
1624 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1625 {
1626 const uint8_t utf8_byte_mask = 0x3f;
1627
1628 while (size)
1629 {
1630 uint8_t lead = *data;
1631
1632 // 0xxxxxxx -> U+0000..U+007F
1633 if (lead < 0x80)
1634 {
1635 result = Traits::low(result, lead);
1636 data += 1;
1637 size -= 1;
1638
1639 // process aligned single-byte (ascii) blocks
1640 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1641 {
1642 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1643 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1644 {
1645 result = Traits::low(result, data[0]);
1646 result = Traits::low(result, data[1]);
1647 result = Traits::low(result, data[2]);
1648 result = Traits::low(result, data[3]);
1649 data += 4;
1650 size -= 4;
1651 }
1652 }
1653 }
1654 // 110xxxxx -> U+0080..U+07FF
1655 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1656 {
1657 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1658 data += 2;
1659 size -= 2;
1660 }
1661 // 1110xxxx -> U+0800-U+FFFF
1662 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1663 {
1664 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1665 data += 3;
1666 size -= 3;
1667 }
1668 // 11110xxx -> U+10000..U+10FFFF
1669 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1670 {
1671 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1672 data += 4;
1673 size -= 4;
1674 }
1675 // 10xxxxxx or 11111xxx -> invalid
1676 else
1677 {
1678 data += 1;
1679 size -= 1;
1680 }
1681 }
1682
1683 return result;
1684 }
1685 };
1686
1687 template <typename opt_swap> struct utf16_decoder
1688 {
1689 typedef uint16_t type;
1690
1691 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1692 {
1693 while (size)
1694 {
1695 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1696
1697 // U+0000..U+D7FF
1698 if (lead < 0xD800)
1699 {
1700 result = Traits::low(result, lead);
1701 data += 1;
1702 size -= 1;
1703 }
1704 // U+E000..U+FFFF
1705 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1706 {
1707 result = Traits::low(result, lead);
1708 data += 1;
1709 size -= 1;
1710 }
1711 // surrogate pair lead
1712 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1713 {
1714 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1715
1716 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1717 {
1718 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1719 data += 2;
1720 size -= 2;
1721 }
1722 else
1723 {
1724 data += 1;
1725 size -= 1;
1726 }
1727 }
1728 else
1729 {
1730 data += 1;
1731 size -= 1;
1732 }
1733 }
1734
1735 return result;
1736 }
1737 };
1738
1739 template <typename opt_swap> struct utf32_decoder
1740 {
1741 typedef uint32_t type;
1742
1743 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1744 {
1745 while (size)
1746 {
1747 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1748
1749 // U+0000..U+FFFF
1750 if (lead < 0x10000)
1751 {
1752 result = Traits::low(result, lead);
1753 data += 1;
1754 size -= 1;
1755 }
1756 // U+10000..U+10FFFF
1757 else
1758 {
1759 result = Traits::high(result, lead);
1760 data += 1;
1761 size -= 1;
1762 }
1763 }
1764
1765 return result;
1766 }
1767 };
1768
1769 struct latin1_decoder
1770 {
1771 typedef uint8_t type;
1772
1773 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1774 {
1775 while (size)
1776 {
1777 result = Traits::low(result, *data);
1778 data += 1;
1779 size -= 1;
1780 }
1781
1782 return result;
1783 }
1784 };
1785
1786 template <size_t size> struct wchar_selector;
1787
1788 template <> struct wchar_selector<2>
1789 {
1790 typedef uint16_t type;
1791 typedef utf16_counter counter;
1792 typedef utf16_writer writer;
1793 typedef utf16_decoder<opt_false> decoder;
1794 };
1795
1796 template <> struct wchar_selector<4>
1797 {
1798 typedef uint32_t type;
1799 typedef utf32_counter counter;
1800 typedef utf32_writer writer;
1801 typedef utf32_decoder<opt_false> decoder;
1802 };
1803
1804 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1805 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1806
1807 struct wchar_decoder
1808 {
1809 typedef wchar_t type;
1810
1811 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1812 {
1813 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1814
1815 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1816 }
1817 };
1818
1819#ifdef PUGIXML_WCHAR_MODE
1820 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1821 {
1822 for (size_t i = 0; i < length; ++i)
1823 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1824 }
1825#endif
1826PUGI__NS_END
1827
1828PUGI__NS_BEGIN
1829 enum chartype_t
1830 {
1831 ct_parse_pcdata = 1, // \0, &, \r, <
1832 ct_parse_attr = 2, // \0, &, \r, ', "
1833 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1834 ct_space = 8, // \r, \n, space, tab
1835 ct_parse_cdata = 16, // \0, ], >, \r
1836 ct_parse_comment = 32, // \0, -, >, \r
1837 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1838 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1839 };
1840
1841 static const unsigned char chartype_table[256] =
1842 {
1843 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1844 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1845 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1846 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1847 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1848 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1849 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1850 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1851
1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1853 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1859 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1860 };
1861
1862 enum chartypex_t
1863 {
1864 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1865 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
1866 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1867 ctx_digit = 8, // 0-9
1868 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1869 };
1870
1871 static const unsigned char chartypex_table[256] =
1872 {
1873 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
1874 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1875 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1876 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
1877
1878 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1879 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1880 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1881 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1882
1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1884 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1890 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1891 };
1892
1893#ifdef PUGIXML_WCHAR_MODE
1894 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1895#else
1896 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1897#endif
1898
1899 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1900 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1901
1902 PUGI__FN bool is_little_endian()
1903 {
1904 unsigned int ui = 1;
1905
1906 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1907 }
1908
1909 PUGI__FN xml_encoding get_wchar_encoding()
1910 {
1911 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1912
1913 if (sizeof(wchar_t) == 2)
1914 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1915 else
1916 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1917 }
1918
1919 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1920 {
1921 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1922 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1923
1924 // check if we have a non-empty XML declaration
1925 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1926 return false;
1927
1928 // scan XML declaration until the encoding field
1929 for (size_t i = 6; i + 1 < size; ++i)
1930 {
1931 // declaration can not contain ? in quoted values
1932 if (data[i] == '?')
1933 return false;
1934
1935 if (data[i] == 'e' && data[i + 1] == 'n')
1936 {
1937 size_t offset = i;
1938
1939 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1940 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1941 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1942
1943 // S? = S?
1944 PUGI__SCANCHARTYPE(ct_space);
1945 PUGI__SCANCHAR('=');
1946 PUGI__SCANCHARTYPE(ct_space);
1947
1948 // the only two valid delimiters are ' and "
1949 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1950
1951 PUGI__SCANCHAR(delimiter);
1952
1953 size_t start = offset;
1954
1955 out_encoding = data + offset;
1956
1957 PUGI__SCANCHARTYPE(ct_symbol);
1958
1959 out_length = offset - start;
1960
1961 PUGI__SCANCHAR(delimiter);
1962
1963 return true;
1964 }
1965 }
1966
1967 return false;
1968
1969 #undef PUGI__SCANCHAR
1970 #undef PUGI__SCANCHARTYPE
1971 }
1972
1973 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1974 {
1975 // skip encoding autodetection if input buffer is too small
1976 if (size < 4) return encoding_utf8;
1977
1978 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1979
1980 // look for BOM in first few bytes
1981 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1982 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1983 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1984 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1985 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1986
1987 // look for <, <? or <?xm in various encodings
1988 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1989 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1990 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1991 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1992
1993 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1994 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1995 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1996
1997 // no known BOM detected; parse declaration
1998 const uint8_t* enc = 0;
1999 size_t enc_length = 0;
2000
2001 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2002 {
2003 // iso-8859-1 (case-insensitive)
2004 if (enc_length == 10
2005 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2006 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2007 && enc[8] == '-' && enc[9] == '1')
2008 return encoding_latin1;
2009
2010 // latin1 (case-insensitive)
2011 if (enc_length == 6
2012 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2013 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2014 && enc[5] == '1')
2015 return encoding_latin1;
2016 }
2017
2018 return encoding_utf8;
2019 }
2020
2021 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2022 {
2023 // replace wchar encoding with utf implementation
2024 if (encoding == encoding_wchar) return get_wchar_encoding();
2025
2026 // replace utf16 encoding with utf16 with specific endianness
2027 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2028
2029 // replace utf32 encoding with utf32 with specific endianness
2030 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2031
2032 // only do autodetection if no explicit encoding is requested
2033 if (encoding != encoding_auto) return encoding;
2034
2035 // try to guess encoding (based on XML specification, Appendix F.1)
2036 const uint8_t* data = static_cast<const uint8_t*>(contents);
2037
2038 return guess_buffer_encoding(data, size);
2039 }
2040
2041 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2042 {
2043 size_t length = size / sizeof(char_t);
2044
2045 if (is_mutable)
2046 {
2047 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2048 out_length = length;
2049 }
2050 else
2051 {
2052 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2053 if (!buffer) return false;
2054
2055 if (contents)
2056 memcpy(buffer, contents, length * sizeof(char_t));
2057 else
2058 assert(length == 0);
2059
2060 buffer[length] = 0;
2061
2062 out_buffer = buffer;
2063 out_length = length + 1;
2064 }
2065
2066 return true;
2067 }
2068
2069#ifdef PUGIXML_WCHAR_MODE
2070 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2071 {
2072 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2073 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2074 }
2075
2076 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2077 {
2078 const char_t* data = static_cast<const char_t*>(contents);
2079 size_t length = size / sizeof(char_t);
2080
2081 if (is_mutable)
2082 {
2083 char_t* buffer = const_cast<char_t*>(data);
2084
2085 convert_wchar_endian_swap(buffer, data, length);
2086
2087 out_buffer = buffer;
2088 out_length = length;
2089 }
2090 else
2091 {
2092 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2093 if (!buffer) return false;
2094
2095 convert_wchar_endian_swap(buffer, data, length);
2096 buffer[length] = 0;
2097
2098 out_buffer = buffer;
2099 out_length = length + 1;
2100 }
2101
2102 return true;
2103 }
2104
2105 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2106 {
2107 const typename D::type* data = static_cast<const typename D::type*>(contents);
2108 size_t data_length = size / sizeof(typename D::type);
2109
2110 // first pass: get length in wchar_t units
2111 size_t length = D::process(data, data_length, 0, wchar_counter());
2112
2113 // allocate buffer of suitable length
2114 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2115 if (!buffer) return false;
2116
2117 // second pass: convert utf16 input to wchar_t
2118 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2119 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2120
2121 assert(oend == obegin + length);
2122 *oend = 0;
2123
2124 out_buffer = buffer;
2125 out_length = length + 1;
2126
2127 return true;
2128 }
2129
2130 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2131 {
2132 // get native encoding
2133 xml_encoding wchar_encoding = get_wchar_encoding();
2134
2135 // fast path: no conversion required
2136 if (encoding == wchar_encoding)
2137 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2138
2139 // only endian-swapping is required
2140 if (need_endian_swap_utf(encoding, wchar_encoding))
2141 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2142
2143 // source encoding is utf8
2144 if (encoding == encoding_utf8)
2145 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2146
2147 // source encoding is utf16
2148 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2149 {
2150 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2151
2152 return (native_encoding == encoding) ?
2153 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2154 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2155 }
2156
2157 // source encoding is utf32
2158 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2159 {
2160 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2161
2162 return (native_encoding == encoding) ?
2163 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2164 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2165 }
2166
2167 // source encoding is latin1
2168 if (encoding == encoding_latin1)
2169 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2170
2171 assert(false && "Invalid encoding"); // unreachable
2172 return false;
2173 }
2174#else
2175 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2176 {
2177 const typename D::type* data = static_cast<const typename D::type*>(contents);
2178 size_t data_length = size / sizeof(typename D::type);
2179
2180 // first pass: get length in utf8 units
2181 size_t length = D::process(data, data_length, 0, utf8_counter());
2182
2183 // allocate buffer of suitable length
2184 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2185 if (!buffer) return false;
2186
2187 // second pass: convert utf16 input to utf8
2188 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2189 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2190
2191 assert(oend == obegin + length);
2192 *oend = 0;
2193
2194 out_buffer = buffer;
2195 out_length = length + 1;
2196
2197 return true;
2198 }
2199
2200 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2201 {
2202 for (size_t i = 0; i < size; ++i)
2203 if (data[i] > 127)
2204 return i;
2205
2206 return size;
2207 }
2208
2209 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2210 {
2211 const uint8_t* data = static_cast<const uint8_t*>(contents);
2212 size_t data_length = size;
2213
2214 // get size of prefix that does not need utf8 conversion
2215 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2216 assert(prefix_length <= data_length);
2217
2218 const uint8_t* postfix = data + prefix_length;
2219 size_t postfix_length = data_length - prefix_length;
2220
2221 // if no conversion is needed, just return the original buffer
2222 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2223
2224 // first pass: get length in utf8 units
2225 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2226
2227 // allocate buffer of suitable length
2228 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2229 if (!buffer) return false;
2230
2231 // second pass: convert latin1 input to utf8
2232 memcpy(buffer, data, prefix_length);
2233
2234 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2235 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2236
2237 assert(oend == obegin + length);
2238 *oend = 0;
2239
2240 out_buffer = buffer;
2241 out_length = length + 1;
2242
2243 return true;
2244 }
2245
2246 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2247 {
2248 // fast path: no conversion required
2249 if (encoding == encoding_utf8)
2250 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2251
2252 // source encoding is utf16
2253 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2254 {
2255 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2256
2257 return (native_encoding == encoding) ?
2258 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2259 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2260 }
2261
2262 // source encoding is utf32
2263 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2264 {
2265 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2266
2267 return (native_encoding == encoding) ?
2268 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2269 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2270 }
2271
2272 // source encoding is latin1
2273 if (encoding == encoding_latin1)
2274 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2275
2276 assert(false && "Invalid encoding"); // unreachable
2277 return false;
2278 }
2279#endif
2280
2281 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2282 {
2283 // get length in utf8 characters
2284 return wchar_decoder::process(str, length, 0, utf8_counter());
2285 }
2286
2287 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2288 {
2289 // convert to utf8
2290 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2291 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2292
2293 assert(begin + size == end);
2294 (void)!end;
2295 (void)!size;
2296 }
2297
2298#ifndef PUGIXML_NO_STL
2299 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2300 {
2301 // first pass: get length in utf8 characters
2302 size_t size = as_utf8_begin(str, length);
2303
2304 // allocate resulting string
2305 std::string result;
2306 result.resize(size);
2307
2308 // second pass: convert to utf8
2309 if (size > 0) as_utf8_end(&result[0], size, str, length);
2310
2311 return result;
2312 }
2313
2314 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2315 {
2316 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2317
2318 // first pass: get length in wchar_t units
2319 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2320
2321 // allocate resulting string
2322 std::basic_string<wchar_t> result;
2323 result.resize(length);
2324
2325 // second pass: convert to wchar_t
2326 if (length > 0)
2327 {
2328 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2329 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2330
2331 assert(begin + length == end);
2332 (void)!end;
2333 }
2334
2335 return result;
2336 }
2337#endif
2338
2339 template <typename Header>
2340 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2341 {
2342 // never reuse shared memory
2343 if (header & xml_memory_page_contents_shared_mask) return false;
2344
2345 size_t target_length = strlength(target);
2346
2347 // always reuse document buffer memory if possible
2348 if ((header & header_mask) == 0) return target_length >= length;
2349
2350 // reuse heap memory if waste is not too great
2351 const size_t reuse_threshold = 32;
2352
2353 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2354 }
2355
2356 template <typename String, typename Header>
2357 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2358 {
2359 if (source_length == 0)
2360 {
2361 // empty string and null pointer are equivalent, so just deallocate old memory
2362 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2363
2364 if (header & header_mask) alloc->deallocate_string(dest);
2365
2366 // mark the string as not allocated
2367 dest = 0;
2368 header &= ~header_mask;
2369
2370 return true;
2371 }
2372 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2373 {
2374 // we can reuse old buffer, so just copy the new data (including zero terminator)
2375 memcpy(dest, source, source_length * sizeof(char_t));
2376 dest[source_length] = 0;
2377
2378 return true;
2379 }
2380 else
2381 {
2382 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2383
2384 if (!alloc->reserve()) return false;
2385
2386 // allocate new buffer
2387 char_t* buf = alloc->allocate_string(source_length + 1);
2388 if (!buf) return false;
2389
2390 // copy the string (including zero terminator)
2391 memcpy(buf, source, source_length * sizeof(char_t));
2392 buf[source_length] = 0;
2393
2394 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2395 if (header & header_mask) alloc->deallocate_string(dest);
2396
2397 // the string is now allocated, so set the flag
2398 dest = buf;
2399 header |= header_mask;
2400
2401 return true;
2402 }
2403 }
2404
2405 struct gap
2406 {
2407 char_t* end;
2408 size_t size;
2409
2410 gap(): end(0), size(0)
2411 {
2412 }
2413
2414 // Push new gap, move s count bytes further (skipping the gap).
2415 // Collapse previous gap.
2416 void push(char_t*& s, size_t count)
2417 {
2418 if (end) // there was a gap already; collapse it
2419 {
2420 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2421 assert(s >= end);
2422 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2423 }
2424
2425 s += count; // end of current gap
2426
2427 // "merge" two gaps
2428 end = s;
2429 size += count;
2430 }
2431
2432 // Collapse all gaps, return past-the-end pointer
2433 char_t* flush(char_t* s)
2434 {
2435 if (end)
2436 {
2437 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2438 assert(s >= end);
2439 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2440
2441 return s - size;
2442 }
2443 else return s;
2444 }
2445 };
2446
2447 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2448 {
2449 char_t* stre = s + 1;
2450
2451 switch (*stre)
2452 {
2453 case '#': // &#...
2454 {
2455 unsigned int ucsc = 0;
2456
2457 if (stre[1] == 'x') // &#x... (hex code)
2458 {
2459 stre += 2;
2460
2461 char_t ch = *stre;
2462
2463 if (ch == ';') return stre;
2464
2465 for (;;)
2466 {
2467 if (static_cast<unsigned int>(ch - '0') <= 9)
2468 ucsc = 16 * ucsc + (ch - '0');
2469 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2470 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2471 else if (ch == ';')
2472 break;
2473 else // cancel
2474 return stre;
2475
2476 ch = *++stre;
2477 }
2478
2479 ++stre;
2480 }
2481 else // &#... (dec code)
2482 {
2483 char_t ch = *++stre;
2484
2485 if (ch == ';') return stre;
2486
2487 for (;;)
2488 {
2489 if (static_cast<unsigned int>(ch - '0') <= 9)
2490 ucsc = 10 * ucsc + (ch - '0');
2491 else if (ch == ';')
2492 break;
2493 else // cancel
2494 return stre;
2495
2496 ch = *++stre;
2497 }
2498
2499 ++stre;
2500 }
2501
2502 #ifdef PUGIXML_WCHAR_MODE
2503 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2504 #else
2505 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2506 #endif
2507
2508 g.push(s, stre - s);
2509 return stre;
2510 }
2511
2512 case 'a': // &a
2513 {
2514 ++stre;
2515
2516 if (*stre == 'm') // &am
2517 {
2518 if (*++stre == 'p' && *++stre == ';') // &amp;
2519 {
2520 *s++ = '&';
2521 ++stre;
2522
2523 g.push(s, stre - s);
2524 return stre;
2525 }
2526 }
2527 else if (*stre == 'p') // &ap
2528 {
2529 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2530 {
2531 *s++ = '\'';
2532 ++stre;
2533
2534 g.push(s, stre - s);
2535 return stre;
2536 }
2537 }
2538 break;
2539 }
2540
2541 case 'g': // &g
2542 {
2543 if (*++stre == 't' && *++stre == ';') // &gt;
2544 {
2545 *s++ = '>';
2546 ++stre;
2547
2548 g.push(s, stre - s);
2549 return stre;
2550 }
2551 break;
2552 }
2553
2554 case 'l': // &l
2555 {
2556 if (*++stre == 't' && *++stre == ';') // &lt;
2557 {
2558 *s++ = '<';
2559 ++stre;
2560
2561 g.push(s, stre - s);
2562 return stre;
2563 }
2564 break;
2565 }
2566
2567 case 'q': // &q
2568 {
2569 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2570 {
2571 *s++ = '"';
2572 ++stre;
2573
2574 g.push(s, stre - s);
2575 return stre;
2576 }
2577 break;
2578 }
2579
2580 default:
2581 break;
2582 }
2583
2584 return stre;
2585 }
2586
2587 // Parser utilities
2588 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2589 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2590 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2591 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2592 #define PUGI__POPNODE() { cursor = cursor->parent; }
2593 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2594 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2595 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2596 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2597 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2598 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2599
2600 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2601 {
2602 gap g;
2603
2604 while (true)
2605 {
2606 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2607
2608 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2609 {
2610 *s++ = '\n'; // replace first one with 0x0a
2611
2612 if (*s == '\n') g.push(s, 1);
2613 }
2614 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2615 {
2616 *g.flush(s) = 0;
2617
2618 return s + (s[2] == '>' ? 3 : 2);
2619 }
2620 else if (*s == 0)
2621 {
2622 return 0;
2623 }
2624 else ++s;
2625 }
2626 }
2627
2628 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2629 {
2630 gap g;
2631
2632 while (true)
2633 {
2634 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2635
2636 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2637 {
2638 *s++ = '\n'; // replace first one with 0x0a
2639
2640 if (*s == '\n') g.push(s, 1);
2641 }
2642 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2643 {
2644 *g.flush(s) = 0;
2645
2646 return s + 1;
2647 }
2648 else if (*s == 0)
2649 {
2650 return 0;
2651 }
2652 else ++s;
2653 }
2654 }
2655
2656 typedef char_t* (*strconv_pcdata_t)(char_t*);
2657
2658 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2659 {
2660 static char_t* parse(char_t* s)
2661 {
2662 gap g;
2663
2664 char_t* begin = s;
2665
2666 while (true)
2667 {
2668 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2669
2670 if (*s == '<') // PCDATA ends here
2671 {
2672 char_t* end = g.flush(s);
2673
2674 if (opt_trim::value)
2675 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2676 --end;
2677
2678 *end = 0;
2679
2680 return s + 1;
2681 }
2682 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2683 {
2684 *s++ = '\n'; // replace first one with 0x0a
2685
2686 if (*s == '\n') g.push(s, 1);
2687 }
2688 else if (opt_escape::value && *s == '&')
2689 {
2690 s = strconv_escape(s, g);
2691 }
2692 else if (*s == 0)
2693 {
2694 char_t* end = g.flush(s);
2695
2696 if (opt_trim::value)
2697 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2698 --end;
2699
2700 *end = 0;
2701
2702 return s;
2703 }
2704 else ++s;
2705 }
2706 }
2707 };
2708
2709 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2710 {
2711 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2712
2713 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2714 {
2715 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2716 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2717 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2718 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2719 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2720 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2721 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2722 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2723 default: assert(false); return 0; // unreachable
2724 }
2725 }
2726
2727 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2728
2729 template <typename opt_escape> struct strconv_attribute_impl
2730 {
2731 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2732 {
2733 gap g;
2734
2735 // trim leading whitespaces
2736 if (PUGI__IS_CHARTYPE(*s, ct_space))
2737 {
2738 char_t* str = s;
2739
2740 do ++str;
2741 while (PUGI__IS_CHARTYPE(*str, ct_space));
2742
2743 g.push(s, str - s);
2744 }
2745
2746 while (true)
2747 {
2748 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2749
2750 if (*s == end_quote)
2751 {
2752 char_t* str = g.flush(s);
2753
2754 do *str-- = 0;
2755 while (PUGI__IS_CHARTYPE(*str, ct_space));
2756
2757 return s + 1;
2758 }
2759 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2760 {
2761 *s++ = ' ';
2762
2763 if (PUGI__IS_CHARTYPE(*s, ct_space))
2764 {
2765 char_t* str = s + 1;
2766 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2767
2768 g.push(s, str - s);
2769 }
2770 }
2771 else if (opt_escape::value && *s == '&')
2772 {
2773 s = strconv_escape(s, g);
2774 }
2775 else if (!*s)
2776 {
2777 return 0;
2778 }
2779 else ++s;
2780 }
2781 }
2782
2783 static char_t* parse_wconv(char_t* s, char_t end_quote)
2784 {
2785 gap g;
2786
2787 while (true)
2788 {
2789 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2790
2791 if (*s == end_quote)
2792 {
2793 *g.flush(s) = 0;
2794
2795 return s + 1;
2796 }
2797 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2798 {
2799 if (*s == '\r')
2800 {
2801 *s++ = ' ';
2802
2803 if (*s == '\n') g.push(s, 1);
2804 }
2805 else *s++ = ' ';
2806 }
2807 else if (opt_escape::value && *s == '&')
2808 {
2809 s = strconv_escape(s, g);
2810 }
2811 else if (!*s)
2812 {
2813 return 0;
2814 }
2815 else ++s;
2816 }
2817 }
2818
2819 static char_t* parse_eol(char_t* s, char_t end_quote)
2820 {
2821 gap g;
2822
2823 while (true)
2824 {
2825 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2826
2827 if (*s == end_quote)
2828 {
2829 *g.flush(s) = 0;
2830
2831 return s + 1;
2832 }
2833 else if (*s == '\r')
2834 {
2835 *s++ = '\n';
2836
2837 if (*s == '\n') g.push(s, 1);
2838 }
2839 else if (opt_escape::value && *s == '&')
2840 {
2841 s = strconv_escape(s, g);
2842 }
2843 else if (!*s)
2844 {
2845 return 0;
2846 }
2847 else ++s;
2848 }
2849 }
2850
2851 static char_t* parse_simple(char_t* s, char_t end_quote)
2852 {
2853 gap g;
2854
2855 while (true)
2856 {
2857 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2858
2859 if (*s == end_quote)
2860 {
2861 *g.flush(s) = 0;
2862
2863 return s + 1;
2864 }
2865 else if (opt_escape::value && *s == '&')
2866 {
2867 s = strconv_escape(s, g);
2868 }
2869 else if (!*s)
2870 {
2871 return 0;
2872 }
2873 else ++s;
2874 }
2875 }
2876 };
2877
2878 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2879 {
2880 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2881
2882 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2883 {
2884 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2885 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2886 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2887 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2888 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2889 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2890 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2891 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2892 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2893 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2894 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2895 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2896 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2897 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2898 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2899 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2900 default: assert(false); return 0; // unreachable
2901 }
2902 }
2903
2904 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2905 {
2906 xml_parse_result result;
2907 result.status = status;
2908 result.offset = offset;
2909
2910 return result;
2911 }
2912
2913 struct xml_parser
2914 {
2915 xml_allocator* alloc;
2916 char_t* error_offset;
2917 xml_parse_status error_status;
2918
2919 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2920 {
2921 }
2922
2923 // DOCTYPE consists of nested sections of the following possible types:
2924 // <!-- ... -->, <? ... ?>, "...", '...'
2925 // <![...]]>
2926 // <!...>
2927 // First group can not contain nested groups
2928 // Second group can contain nested groups of the same type
2929 // Third group can contain all other groups
2930 char_t* parse_doctype_primitive(char_t* s)
2931 {
2932 if (*s == '"' || *s == '\'')
2933 {
2934 // quoted string
2935 char_t ch = *s++;
2936 PUGI__SCANFOR(*s == ch);
2937 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2938
2939 s++;
2940 }
2941 else if (s[0] == '<' && s[1] == '?')
2942 {
2943 // <? ... ?>
2944 s += 2;
2945 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2946 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2947
2948 s += 2;
2949 }
2950 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2951 {
2952 s += 4;
2953 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2954 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2955
2956 s += 3;
2957 }
2958 else PUGI__THROW_ERROR(status_bad_doctype, s);
2959
2960 return s;
2961 }
2962
2963 char_t* parse_doctype_ignore(char_t* s)
2964 {
2965 size_t depth = 0;
2966
2967 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2968 s += 3;
2969
2970 while (*s)
2971 {
2972 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2973 {
2974 // nested ignore section
2975 s += 3;
2976 depth++;
2977 }
2978 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2979 {
2980 // ignore section end
2981 s += 3;
2982
2983 if (depth == 0)
2984 return s;
2985
2986 depth--;
2987 }
2988 else s++;
2989 }
2990
2991 PUGI__THROW_ERROR(status_bad_doctype, s);
2992 }
2993
2994 char_t* parse_doctype_group(char_t* s, char_t endch)
2995 {
2996 size_t depth = 0;
2997
2998 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2999 s += 2;
3000
3001 while (*s)
3002 {
3003 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3004 {
3005 if (s[2] == '[')
3006 {
3007 // ignore
3008 s = parse_doctype_ignore(s);
3009 if (!s) return s;
3010 }
3011 else
3012 {
3013 // some control group
3014 s += 2;
3015 depth++;
3016 }
3017 }
3018 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3019 {
3020 // unknown tag (forbidden), or some primitive group
3021 s = parse_doctype_primitive(s);
3022 if (!s) return s;
3023 }
3024 else if (*s == '>')
3025 {
3026 if (depth == 0)
3027 return s;
3028
3029 depth--;
3030 s++;
3031 }
3032 else s++;
3033 }
3034
3035 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3036
3037 return s;
3038 }
3039
3040 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3041 {
3042 // parse node contents, starting with exclamation mark
3043 ++s;
3044
3045 if (*s == '-') // '<!-...'
3046 {
3047 ++s;
3048
3049 if (*s == '-') // '<!--...'
3050 {
3051 ++s;
3052
3053 if (PUGI__OPTSET(parse_comments))
3054 {
3055 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3056 cursor->value = s; // Save the offset.
3057 }
3058
3059 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3060 {
3061 s = strconv_comment(s, endch);
3062
3063 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3064 }
3065 else
3066 {
3067 // Scan for terminating '-->'.
3068 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3069 PUGI__CHECK_ERROR(status_bad_comment, s);
3070
3071 if (PUGI__OPTSET(parse_comments))
3072 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3073
3074 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3075 }
3076 }
3077 else PUGI__THROW_ERROR(status_bad_comment, s);
3078 }
3079 else if (*s == '[')
3080 {
3081 // '<![CDATA[...'
3082 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3083 {
3084 ++s;
3085
3086 if (PUGI__OPTSET(parse_cdata))
3087 {
3088 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3089 cursor->value = s; // Save the offset.
3090
3091 if (PUGI__OPTSET(parse_eol))
3092 {
3093 s = strconv_cdata(s, endch);
3094
3095 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3096 }
3097 else
3098 {
3099 // Scan for terminating ']]>'.
3100 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3101 PUGI__CHECK_ERROR(status_bad_cdata, s);
3102
3103 *s++ = 0; // Zero-terminate this segment.
3104 }
3105 }
3106 else // Flagged for discard, but we still have to scan for the terminator.
3107 {
3108 // Scan for terminating ']]>'.
3109 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3110 PUGI__CHECK_ERROR(status_bad_cdata, s);
3111
3112 ++s;
3113 }
3114
3115 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3116 }
3117 else PUGI__THROW_ERROR(status_bad_cdata, s);
3118 }
3119 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3120 {
3121 s -= 2;
3122
3123 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3124
3125 char_t* mark = s + 9;
3126
3127 s = parse_doctype_group(s, endch);
3128 if (!s) return s;
3129
3130 assert((*s == 0 && endch == '>') || *s == '>');
3131 if (*s) *s++ = 0;
3132
3133 if (PUGI__OPTSET(parse_doctype))
3134 {
3135 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3136
3137 PUGI__PUSHNODE(node_doctype);
3138
3139 cursor->value = mark;
3140 }
3141 }
3142 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3143 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3144 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3145
3146 return s;
3147 }
3148
3149 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3150 {
3151 // load into registers
3152 xml_node_struct* cursor = ref_cursor;
3153 char_t ch = 0;
3154
3155 // parse node contents, starting with question mark
3156 ++s;
3157
3158 // read PI target
3159 char_t* target = s;
3160
3161 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3162
3163 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3164 PUGI__CHECK_ERROR(status_bad_pi, s);
3165
3166 // determine node type; stricmp / strcasecmp is not portable
3167 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3168
3169 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3170 {
3171 if (declaration)
3172 {
3173 // disallow non top-level declarations
3174 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3175
3176 PUGI__PUSHNODE(node_declaration);
3177 }
3178 else
3179 {
3180 PUGI__PUSHNODE(node_pi);
3181 }
3182
3183 cursor->name = target;
3184
3185 PUGI__ENDSEG();
3186
3187 // parse value/attributes
3188 if (ch == '?')
3189 {
3190 // empty node
3191 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3192 s += (*s == '>');
3193
3194 PUGI__POPNODE();
3195 }
3196 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3197 {
3198 PUGI__SKIPWS();
3199
3200 // scan for tag end
3201 char_t* value = s;
3202
3203 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3204 PUGI__CHECK_ERROR(status_bad_pi, s);
3205
3206 if (declaration)
3207 {
3208 // replace ending ? with / so that 'element' terminates properly
3209 *s = '/';
3210
3211 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3212 s = value;
3213 }
3214 else
3215 {
3216 // store value and step over >
3217 cursor->value = value;
3218
3219 PUGI__POPNODE();
3220
3221 PUGI__ENDSEG();
3222
3223 s += (*s == '>');
3224 }
3225 }
3226 else PUGI__THROW_ERROR(status_bad_pi, s);
3227 }
3228 else
3229 {
3230 // scan for tag end
3231 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3232 PUGI__CHECK_ERROR(status_bad_pi, s);
3233
3234 s += (s[1] == '>' ? 2 : 1);
3235 }
3236
3237 // store from registers
3238 ref_cursor = cursor;
3239
3240 return s;
3241 }
3242
3243 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3244 {
3245 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3246 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3247
3248 char_t ch = 0;
3249 xml_node_struct* cursor = root;
3250 char_t* mark = s;
3251
3252 while (*s != 0)
3253 {
3254 if (*s == '<')
3255 {
3256 ++s;
3257
3258 LOC_TAG:
3259 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3260 {
3261 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3262
3263 cursor->name = s;
3264
3265 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3266 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3267
3268 if (ch == '>')
3269 {
3270 // end of tag
3271 }
3272 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3273 {
3274 LOC_ATTRIBUTES:
3275 while (true)
3276 {
3277 PUGI__SKIPWS(); // Eat any whitespace.
3278
3279 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3280 {
3281 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3282 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3283
3284 a->name = s; // Save the offset.
3285
3286 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3287 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3288
3289 if (PUGI__IS_CHARTYPE(ch, ct_space))
3290 {
3291 PUGI__SKIPWS(); // Eat any whitespace.
3292
3293 ch = *s;
3294 ++s;
3295 }
3296
3297 if (ch == '=') // '<... #=...'
3298 {
3299 PUGI__SKIPWS(); // Eat any whitespace.
3300
3301 if (*s == '"' || *s == '\'') // '<... #="...'
3302 {
3303 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3304 ++s; // Step over the quote.
3305 a->value = s; // Save the offset.
3306
3307 s = strconv_attribute(s, ch);
3308
3309 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3310
3311 // After this line the loop continues from the start;
3312 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3313 // everything else will be detected
3314 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3315 }
3316 else PUGI__THROW_ERROR(status_bad_attribute, s);
3317 }
3318 else PUGI__THROW_ERROR(status_bad_attribute, s);
3319 }
3320 else if (*s == '/')
3321 {
3322 ++s;
3323
3324 if (*s == '>')
3325 {
3326 PUGI__POPNODE();
3327 s++;
3328 break;
3329 }
3330 else if (*s == 0 && endch == '>')
3331 {
3332 PUGI__POPNODE();
3333 break;
3334 }
3335 else PUGI__THROW_ERROR(status_bad_start_element, s);
3336 }
3337 else if (*s == '>')
3338 {
3339 ++s;
3340
3341 break;
3342 }
3343 else if (*s == 0 && endch == '>')
3344 {
3345 break;
3346 }
3347 else PUGI__THROW_ERROR(status_bad_start_element, s);
3348 }
3349
3350 // !!!
3351 }
3352 else if (ch == '/') // '<#.../'
3353 {
3354 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3355
3356 PUGI__POPNODE(); // Pop.
3357
3358 s += (*s == '>');
3359 }
3360 else if (ch == 0)
3361 {
3362 // we stepped over null terminator, backtrack & handle closing tag
3363 --s;
3364
3365 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3366 }
3367 else PUGI__THROW_ERROR(status_bad_start_element, s);
3368 }
3369 else if (*s == '/')
3370 {
3371 ++s;
3372
3373 mark = s;
3374
3375 char_t* name = cursor->name;
3376 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3377
3378 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3379 {
3380 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3381 }
3382
3383 if (*name)
3384 {
3385 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3386 else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3387 }
3388
3389 PUGI__POPNODE(); // Pop.
3390
3391 PUGI__SKIPWS();
3392
3393 if (*s == 0)
3394 {
3395 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3396 }
3397 else
3398 {
3399 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3400 ++s;
3401 }
3402 }
3403 else if (*s == '?') // '<?...'
3404 {
3405 s = parse_question(s, cursor, optmsk, endch);
3406 if (!s) return s;
3407
3408 assert(cursor);
3409 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3410 }
3411 else if (*s == '!') // '<!...'
3412 {
3413 s = parse_exclamation(s, cursor, optmsk, endch);
3414 if (!s) return s;
3415 }
3416 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3417 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3418 }
3419 else
3420 {
3421 mark = s; // Save this offset while searching for a terminator.
3422
3423 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3424
3425 if (*s == '<' || !*s)
3426 {
3427 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3428 assert(mark != s);
3429
3430 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3431 {
3432 continue;
3433 }
3434 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3435 {
3436 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3437 }
3438 }
3439
3440 if (!PUGI__OPTSET(parse_trim_pcdata))
3441 s = mark;
3442
3443 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3444 {
3445 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3446 {
3447 cursor->value = s; // Save the offset.
3448 }
3449 else
3450 {
3451 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3452
3453 cursor->value = s; // Save the offset.
3454
3455 PUGI__POPNODE(); // Pop since this is a standalone.
3456 }
3457
3458 s = strconv_pcdata(s);
3459
3460 if (!*s) break;
3461 }
3462 else
3463 {
3464 PUGI__SCANFOR(*s == '<'); // '...<'
3465 if (!*s) break;
3466
3467 ++s;
3468 }
3469
3470 // We're after '<'
3471 goto LOC_TAG;
3472 }
3473 }
3474
3475 // check that last tag is closed
3476 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3477
3478 return s;
3479 }
3480
3481 #ifdef PUGIXML_WCHAR_MODE
3482 static char_t* parse_skip_bom(char_t* s)
3483 {
3484 unsigned int bom = 0xfeff;
3485 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3486 }
3487 #else
3488 static char_t* parse_skip_bom(char_t* s)
3489 {
3490 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3491 }
3492 #endif
3493
3494 static bool has_element_node_siblings(xml_node_struct* node)
3495 {
3496 while (node)
3497 {
3498 if (PUGI__NODETYPE(node) == node_element) return true;
3499
3500 node = node->next_sibling;
3501 }
3502
3503 return false;
3504 }
3505
3506 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3507 {
3508 // early-out for empty documents
3509 if (length == 0)
3510 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3511
3512 // get last child of the root before parsing
3513 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3514
3515 // create parser on stack
3516 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3517
3518 // save last character and make buffer zero-terminated (speeds up parsing)
3519 char_t endch = buffer[length - 1];
3520 buffer[length - 1] = 0;
3521
3522 // skip BOM to make sure it does not end up as part of parse output
3523 char_t* buffer_data = parse_skip_bom(buffer);
3524
3525 // perform actual parsing
3526 parser.parse_tree(buffer_data, root, optmsk, endch);
3527
3528 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3529 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3530
3531 if (result)
3532 {
3533 // since we removed last character, we have to handle the only possible false positive (stray <)
3534 if (endch == '<')
3535 return make_parse_result(status_unrecognized_tag, length - 1);
3536
3537 // check if there are any element nodes parsed
3538 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3539
3540 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3541 return make_parse_result(status_no_document_element, length - 1);
3542 }
3543 else
3544 {
3545 // roll back offset if it occurs on a null terminator in the source buffer
3546 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3547 result.offset--;
3548 }
3549
3550 return result;
3551 }
3552 };
3553
3554 // Output facilities
3555 PUGI__FN xml_encoding get_write_native_encoding()
3556 {
3557 #ifdef PUGIXML_WCHAR_MODE
3558 return get_wchar_encoding();
3559 #else
3560 return encoding_utf8;
3561 #endif
3562 }
3563
3564 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3565 {
3566 // replace wchar encoding with utf implementation
3567 if (encoding == encoding_wchar) return get_wchar_encoding();
3568
3569 // replace utf16 encoding with utf16 with specific endianness
3570 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3571
3572 // replace utf32 encoding with utf32 with specific endianness
3573 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3574
3575 // only do autodetection if no explicit encoding is requested
3576 if (encoding != encoding_auto) return encoding;
3577
3578 // assume utf8 encoding
3579 return encoding_utf8;
3580 }
3581
3582 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3583 {
3584 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3585
3586 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3587
3588 return static_cast<size_t>(end - dest) * sizeof(*dest);
3589 }
3590
3591 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3592 {
3593 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3594
3595 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3596
3597 if (opt_swap)
3598 {
3599 for (typename T::value_type i = dest; i != end; ++i)
3600 *i = endian_swap(*i);
3601 }
3602
3603 return static_cast<size_t>(end - dest) * sizeof(*dest);
3604 }
3605
3606#ifdef PUGIXML_WCHAR_MODE
3607 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3608 {
3609 if (length < 1) return 0;
3610
3611 // discard last character if it's the lead of a surrogate pair
3612 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3613 }
3614
3615 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3616 {
3617 // only endian-swapping is required
3618 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3619 {
3620 convert_wchar_endian_swap(r_char, data, length);
3621
3622 return length * sizeof(char_t);
3623 }
3624
3625 // convert to utf8
3626 if (encoding == encoding_utf8)
3627 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3628
3629 // convert to utf16
3630 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3631 {
3632 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3633
3634 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3635 }
3636
3637 // convert to utf32
3638 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3639 {
3640 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3641
3642 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3643 }
3644
3645 // convert to latin1
3646 if (encoding == encoding_latin1)
3647 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3648
3649 assert(false && "Invalid encoding"); // unreachable
3650 return 0;
3651 }
3652#else
3653 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3654 {
3655 if (length < 5) return 0;
3656
3657 for (size_t i = 1; i <= 4; ++i)
3658 {
3659 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3660
3661 // either a standalone character or a leading one
3662 if ((ch & 0xc0) != 0x80) return length - i;
3663 }
3664
3665 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3666 return length;
3667 }
3668
3669 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3670 {
3671 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3672 {
3673 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3674
3675 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3676 }
3677
3678 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3679 {
3680 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3681
3682 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3683 }
3684
3685 if (encoding == encoding_latin1)
3686 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3687
3688 assert(false && "Invalid encoding"); // unreachable
3689 return 0;
3690 }
3691#endif
3692
3693 class xml_buffered_writer
3694 {
3695 xml_buffered_writer(const xml_buffered_writer&);
3696 xml_buffered_writer& operator=(const xml_buffered_writer&);
3697
3698 public:
3699 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3700 {
3701 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3702 }
3703
3704 size_t flush()
3705 {
3706 flush(buffer, bufsize);
3707 bufsize = 0;
3708 return 0;
3709 }
3710
3711 void flush(const char_t* data, size_t size)
3712 {
3713 if (size == 0) return;
3714
3715 // fast path, just write data
3716 if (encoding == get_write_native_encoding())
3717 writer.write(data, size * sizeof(char_t));
3718 else
3719 {
3720 // convert chunk
3721 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3722 assert(result <= sizeof(scratch));
3723
3724 // write data
3725 writer.write(scratch.data_u8, result);
3726 }
3727 }
3728
3729 void write_direct(const char_t* data, size_t length)
3730 {
3731 // flush the remaining buffer contents
3732 flush();
3733
3734 // handle large chunks
3735 if (length > bufcapacity)
3736 {
3737 if (encoding == get_write_native_encoding())
3738 {
3739 // fast path, can just write data chunk
3740 writer.write(data, length * sizeof(char_t));
3741 return;
3742 }
3743
3744 // need to convert in suitable chunks
3745 while (length > bufcapacity)
3746 {
3747 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3748 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3749 size_t chunk_size = get_valid_length(data, bufcapacity);
3750 assert(chunk_size);
3751
3752 // convert chunk and write
3753 flush(data, chunk_size);
3754
3755 // iterate
3756 data += chunk_size;
3757 length -= chunk_size;
3758 }
3759
3760 // small tail is copied below
3761 bufsize = 0;
3762 }
3763
3764 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3765 bufsize += length;
3766 }
3767
3768 void write_buffer(const char_t* data, size_t length)
3769 {
3770 size_t offset = bufsize;
3771
3772 if (offset + length <= bufcapacity)
3773 {
3774 memcpy(buffer + offset, data, length * sizeof(char_t));
3775 bufsize = offset + length;
3776 }
3777 else
3778 {
3779 write_direct(data, length);
3780 }
3781 }
3782
3783 void write_string(const char_t* data)
3784 {
3785 // write the part of the string that fits in the buffer
3786 size_t offset = bufsize;
3787
3788 while (*data && offset < bufcapacity)
3789 buffer[offset++] = *data++;
3790
3791 // write the rest
3792 if (offset < bufcapacity)
3793 {
3794 bufsize = offset;
3795 }
3796 else
3797 {
3798 // backtrack a bit if we have split the codepoint
3799 size_t length = offset - bufsize;
3800 size_t extra = length - get_valid_length(data - length, length);
3801
3802 bufsize = offset - extra;
3803
3804 write_direct(data - extra, strlength(data) + extra);
3805 }
3806 }
3807
3808 void write(char_t d0)
3809 {
3810 size_t offset = bufsize;
3811 if (offset > bufcapacity - 1) offset = flush();
3812
3813 buffer[offset + 0] = d0;
3814 bufsize = offset + 1;
3815 }
3816
3817 void write(char_t d0, char_t d1)
3818 {
3819 size_t offset = bufsize;
3820 if (offset > bufcapacity - 2) offset = flush();
3821
3822 buffer[offset + 0] = d0;
3823 buffer[offset + 1] = d1;
3824 bufsize = offset + 2;
3825 }
3826
3827 void write(char_t d0, char_t d1, char_t d2)
3828 {
3829 size_t offset = bufsize;
3830 if (offset > bufcapacity - 3) offset = flush();
3831
3832 buffer[offset + 0] = d0;
3833 buffer[offset + 1] = d1;
3834 buffer[offset + 2] = d2;
3835 bufsize = offset + 3;
3836 }
3837
3838 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3839 {
3840 size_t offset = bufsize;
3841 if (offset > bufcapacity - 4) offset = flush();
3842
3843 buffer[offset + 0] = d0;
3844 buffer[offset + 1] = d1;
3845 buffer[offset + 2] = d2;
3846 buffer[offset + 3] = d3;
3847 bufsize = offset + 4;
3848 }
3849
3850 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3851 {
3852 size_t offset = bufsize;
3853 if (offset > bufcapacity - 5) offset = flush();
3854
3855 buffer[offset + 0] = d0;
3856 buffer[offset + 1] = d1;
3857 buffer[offset + 2] = d2;
3858 buffer[offset + 3] = d3;
3859 buffer[offset + 4] = d4;
3860 bufsize = offset + 5;
3861 }
3862
3863 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3864 {
3865 size_t offset = bufsize;
3866 if (offset > bufcapacity - 6) offset = flush();
3867
3868 buffer[offset + 0] = d0;
3869 buffer[offset + 1] = d1;
3870 buffer[offset + 2] = d2;
3871 buffer[offset + 3] = d3;
3872 buffer[offset + 4] = d4;
3873 buffer[offset + 5] = d5;
3874 bufsize = offset + 6;
3875 }
3876
3877 // utf8 maximum expansion: x4 (-> utf32)
3878 // utf16 maximum expansion: x2 (-> utf32)
3879 // utf32 maximum expansion: x1
3880 enum
3881 {
3882 bufcapacitybytes =
3883 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3884 PUGIXML_MEMORY_OUTPUT_STACK
3885 #else
3886 10240
3887 #endif
3888 ,
3889 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3890 };
3891
3892 char_t buffer[bufcapacity];
3893
3894 union
3895 {
3896 uint8_t data_u8[4 * bufcapacity];
3897 uint16_t data_u16[2 * bufcapacity];
3898 uint32_t data_u32[bufcapacity];
3899 char_t data_char[bufcapacity];
3900 } scratch;
3901
3902 xml_writer& writer;
3903 size_t bufsize;
3904 xml_encoding encoding;
3905 };
3906
3907 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3908 {
3909 while (*s)
3910 {
3911 const char_t* prev = s;
3912
3913 // While *s is a usual symbol
3914 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3915
3916 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3917
3918 switch (*s)
3919 {
3920 case 0: break;
3921 case '&':
3922 writer.write('&', 'a', 'm', 'p', ';');
3923 ++s;
3924 break;
3925 case '<':
3926 writer.write('&', 'l', 't', ';');
3927 ++s;
3928 break;
3929 case '>':
3930 writer.write('&', 'g', 't', ';');
3931 ++s;
3932 break;
3933 case '"':
3934 if (flags & format_attribute_single_quote)
3935 writer.write('"');
3936 else
3937 writer.write('&', 'q', 'u', 'o', 't', ';');
3938 ++s;
3939 break;
3940 case '\'':
3941 if (flags & format_attribute_single_quote)
3942 writer.write('&', 'a', 'p', 'o', 's', ';');
3943 else
3944 writer.write('\'');
3945 ++s;
3946 break;
3947 default: // s is not a usual symbol
3948 {
3949 unsigned int ch = static_cast<unsigned int>(*s++);
3950 assert(ch < 32);
3951
3952 if (!(flags & format_skip_control_chars))
3953 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3954 }
3955 }
3956 }
3957 }
3958
3959 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3960 {
3961 if (flags & format_no_escapes)
3962 writer.write_string(s);
3963 else
3964 text_output_escaped(writer, s, type, flags);
3965 }
3966
3967 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3968 {
3969 do
3970 {
3971 writer.write('<', '!', '[', 'C', 'D');
3972 writer.write('A', 'T', 'A', '[');
3973
3974 const char_t* prev = s;
3975
3976 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3977 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3978
3979 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3980 if (*s) s += 2;
3981
3982 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3983
3984 writer.write(']', ']', '>');
3985 }
3986 while (*s);
3987 }
3988
3989 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3990 {
3991 switch (indent_length)
3992 {
3993 case 1:
3994 {
3995 for (unsigned int i = 0; i < depth; ++i)
3996 writer.write(indent[0]);
3997 break;
3998 }
3999
4000 case 2:
4001 {
4002 for (unsigned int i = 0; i < depth; ++i)
4003 writer.write(indent[0], indent[1]);
4004 break;
4005 }
4006
4007 case 3:
4008 {
4009 for (unsigned int i = 0; i < depth; ++i)
4010 writer.write(indent[0], indent[1], indent[2]);
4011 break;
4012 }
4013
4014 case 4:
4015 {
4016 for (unsigned int i = 0; i < depth; ++i)
4017 writer.write(indent[0], indent[1], indent[2], indent[3]);
4018 break;
4019 }
4020
4021 default:
4022 {
4023 for (unsigned int i = 0; i < depth; ++i)
4024 writer.write_buffer(indent, indent_length);
4025 }
4026 }
4027 }
4028
4029 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4030 {
4031 writer.write('<', '!', '-', '-');
4032
4033 while (*s)
4034 {
4035 const char_t* prev = s;
4036
4037 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4038 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4039
4040 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4041
4042 if (*s)
4043 {
4044 assert(*s == '-');
4045
4046 writer.write('-', ' ');
4047 ++s;
4048 }
4049 }
4050
4051 writer.write('-', '-', '>');
4052 }
4053
4054 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4055 {
4056 while (*s)
4057 {
4058 const char_t* prev = s;
4059
4060 // look for ?> sequence - we can't output it since ?> terminates PI
4061 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4062
4063 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4064
4065 if (*s)
4066 {
4067 assert(s[0] == '?' && s[1] == '>');
4068
4069 writer.write('?', ' ', '>');
4070 s += 2;
4071 }
4072 }
4073 }
4074
4075 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4076 {
4077 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4078 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4079
4080 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4081 {
4082 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4083 {
4084 writer.write('\n');
4085
4086 text_output_indent(writer, indent, indent_length, depth + 1);
4087 }
4088 else
4089 {
4090 writer.write(' ');
4091 }
4092
4093 writer.write_string(a->name ? a->name + 0 : default_name);
4094 writer.write('=', enquotation_char);
4095
4096 if (a->value)
4097 text_output(writer, a->value, ctx_special_attr, flags);
4098
4099 writer.write(enquotation_char);
4100 }
4101 }
4102
4103 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4104 {
4105 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4106 const char_t* name = node->name ? node->name + 0 : default_name;
4107
4108 writer.write('<');
4109 writer.write_string(name);
4110
4111 if (node->first_attribute)
4112 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4113
4114 // element nodes can have value if parse_embed_pcdata was used
4115 if (!node->value)
4116 {
4117 if (!node->first_child)
4118 {
4119 if (flags & format_no_empty_element_tags)
4120 {
4121 writer.write('>', '<', '/');
4122 writer.write_string(name);
4123 writer.write('>');
4124
4125 return false;
4126 }
4127 else
4128 {
4129 if ((flags & format_raw) == 0)
4130 writer.write(' ');
4131
4132 writer.write('/', '>');
4133
4134 return false;
4135 }
4136 }
4137 else
4138 {
4139 writer.write('>');
4140
4141 return true;
4142 }
4143 }
4144 else
4145 {
4146 writer.write('>');
4147
4148 text_output(writer, node->value, ctx_special_pcdata, flags);
4149
4150 if (!node->first_child)
4151 {
4152 writer.write('<', '/');
4153 writer.write_string(name);
4154 writer.write('>');
4155
4156 return false;
4157 }
4158 else
4159 {
4160 return true;
4161 }
4162 }
4163 }
4164
4165 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4166 {
4167 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4168 const char_t* name = node->name ? node->name + 0 : default_name;
4169
4170 writer.write('<', '/');
4171 writer.write_string(name);
4172 writer.write('>');
4173 }
4174
4175 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4176 {
4177 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4178
4179 switch (PUGI__NODETYPE(node))
4180 {
4181 case node_pcdata:
4182 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4183 break;
4184
4185 case node_cdata:
4186 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4187 break;
4188
4189 case node_comment:
4190 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4191 break;
4192
4193 case node_pi:
4194 writer.write('<', '?');
4195 writer.write_string(node->name ? node->name + 0 : default_name);
4196
4197 if (node->value)
4198 {
4199 writer.write(' ');
4200 node_output_pi_value(writer, node->value);
4201 }
4202
4203 writer.write('?', '>');
4204 break;
4205
4206 case node_declaration:
4207 writer.write('<', '?');
4208 writer.write_string(node->name ? node->name + 0 : default_name);
4209 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4210 writer.write('?', '>');
4211 break;
4212
4213 case node_doctype:
4214 writer.write('<', '!', 'D', 'O', 'C');
4215 writer.write('T', 'Y', 'P', 'E');
4216
4217 if (node->value)
4218 {
4219 writer.write(' ');
4220 writer.write_string(node->value);
4221 }
4222
4223 writer.write('>');
4224 break;
4225
4226 default:
4227 assert(false && "Invalid node type"); // unreachable
4228 }
4229 }
4230
4231 enum indent_flags_t
4232 {
4233 indent_newline = 1,
4234 indent_indent = 2
4235 };
4236
4237 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4238 {
4239 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4240 unsigned int indent_flags = indent_indent;
4241
4242 xml_node_struct* node = root;
4243
4244 do
4245 {
4246 assert(node);
4247
4248 // begin writing current node
4249 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4250 {
4251 node_output_simple(writer, node, flags);
4252
4253 indent_flags = 0;
4254 }
4255 else
4256 {
4257 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4258 writer.write('\n');
4259
4260 if ((indent_flags & indent_indent) && indent_length)
4261 text_output_indent(writer, indent, indent_length, depth);
4262
4263 if (PUGI__NODETYPE(node) == node_element)
4264 {
4265 indent_flags = indent_newline | indent_indent;
4266
4267 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4268 {
4269 // element nodes can have value if parse_embed_pcdata was used
4270 if (node->value)
4271 indent_flags = 0;
4272
4273 node = node->first_child;
4274 depth++;
4275 continue;
4276 }
4277 }
4278 else if (PUGI__NODETYPE(node) == node_document)
4279 {
4280 indent_flags = indent_indent;
4281
4282 if (node->first_child)
4283 {
4284 node = node->first_child;
4285 continue;
4286 }
4287 }
4288 else
4289 {
4290 node_output_simple(writer, node, flags);
4291
4292 indent_flags = indent_newline | indent_indent;
4293 }
4294 }
4295
4296 // continue to the next node
4297 while (node != root)
4298 {
4299 if (node->next_sibling)
4300 {
4301 node = node->next_sibling;
4302 break;
4303 }
4304
4305 node = node->parent;
4306
4307 // write closing node
4308 if (PUGI__NODETYPE(node) == node_element)
4309 {
4310 depth--;
4311
4312 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4313 writer.write('\n');
4314
4315 if ((indent_flags & indent_indent) && indent_length)
4316 text_output_indent(writer, indent, indent_length, depth);
4317
4318 node_output_end(writer, node);
4319
4320 indent_flags = indent_newline | indent_indent;
4321 }
4322 }
4323 }
4324 while (node != root);
4325
4326 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4327 writer.write('\n');
4328 }
4329
4330 PUGI__FN bool has_declaration(xml_node_struct* node)
4331 {
4332 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4333 {
4334 xml_node_type type = PUGI__NODETYPE(child);
4335
4336 if (type == node_declaration) return true;
4337 if (type == node_element) return false;
4338 }
4339
4340 return false;
4341 }
4342
4343 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4344 {
4345 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4346 if (a == attr)
4347 return true;
4348
4349 return false;
4350 }
4351
4352 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4353 {
4354 return parent == node_element || parent == node_declaration;
4355 }
4356
4357 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4358 {
4359 if (parent != node_document && parent != node_element) return false;
4360 if (child == node_document || child == node_null) return false;
4361 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4362
4363 return true;
4364 }
4365
4366 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4367 {
4368 // check that child can be a child of parent
4369 if (!allow_insert_child(parent.type(), child.type()))
4370 return false;
4371
4372 // check that node is not moved between documents
4373 if (parent.root() != child.root())
4374 return false;
4375
4376 // check that new parent is not in the child subtree
4377 xml_node cur = parent;
4378
4379 while (cur)
4380 {
4381 if (cur == child)
4382 return false;
4383
4384 cur = cur.parent();
4385 }
4386
4387 return true;
4388 }
4389
4390 template <typename String, typename Header>
4391 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4392 {
4393 assert(!dest && (header & header_mask) == 0);
4394
4395 if (source)
4396 {
4397 if (alloc && (source_header & header_mask) == 0)
4398 {
4399 dest = source;
4400
4401 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4402 header |= xml_memory_page_contents_shared_mask;
4403 source_header |= xml_memory_page_contents_shared_mask;
4404 }
4405 else
4406 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4407 }
4408 }
4409
4410 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4411 {
4412 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4413 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4414
4415 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4416 {
4417 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4418
4419 if (da)
4420 {
4421 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4422 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4423 }
4424 }
4425 }
4426
4427 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4428 {
4429 xml_allocator& alloc = get_allocator(dn);
4430 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4431
4432 node_copy_contents(dn, sn, shared_alloc);
4433
4434 xml_node_struct* dit = dn;
4435 xml_node_struct* sit = sn->first_child;
4436
4437 while (sit && sit != sn)
4438 {
4439 // loop invariant: dit is inside the subtree rooted at dn
4440 assert(dit);
4441
4442 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4443 if (sit != dn)
4444 {
4445 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4446
4447 if (copy)
4448 {
4449 node_copy_contents(copy, sit, shared_alloc);
4450
4451 if (sit->first_child)
4452 {
4453 dit = copy;
4454 sit = sit->first_child;
4455 continue;
4456 }
4457 }
4458 }
4459
4460 // continue to the next node
4461 do
4462 {
4463 if (sit->next_sibling)
4464 {
4465 sit = sit->next_sibling;
4466 break;
4467 }
4468
4469 sit = sit->parent;
4470 dit = dit->parent;
4471
4472 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4473 assert(sit == sn || dit);
4474 }
4475 while (sit != sn);
4476 }
4477
4478 assert(!sit || dit == dn->parent);
4479 }
4480
4481 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4482 {
4483 xml_allocator& alloc = get_allocator(da);
4484 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4485
4486 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4487 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4488 }
4489
4490 inline bool is_text_node(xml_node_struct* node)
4491 {
4492 xml_node_type type = PUGI__NODETYPE(node);
4493
4494 return type == node_pcdata || type == node_cdata;
4495 }
4496
4497 // get value with conversion functions
4498 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4499 {
4500 U result = 0;
4501 const char_t* s = value;
4502
4503 while (PUGI__IS_CHARTYPE(*s, ct_space))
4504 s++;
4505
4506 bool negative = (*s == '-');
4507
4508 s += (*s == '+' || *s == '-');
4509
4510 bool overflow = false;
4511
4512 if (s[0] == '0' && (s[1] | ' ') == 'x')
4513 {
4514 s += 2;
4515
4516 // since overflow detection relies on length of the sequence skip leading zeros
4517 while (*s == '0')
4518 s++;
4519
4520 const char_t* start = s;
4521
4522 for (;;)
4523 {
4524 if (static_cast<unsigned>(*s - '0') < 10)
4525 result = result * 16 + (*s - '0');
4526 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4527 result = result * 16 + ((*s | ' ') - 'a' + 10);
4528 else
4529 break;
4530
4531 s++;
4532 }
4533
4534 size_t digits = static_cast<size_t>(s - start);
4535
4536 overflow = digits > sizeof(U) * 2;
4537 }
4538 else
4539 {
4540 // since overflow detection relies on length of the sequence skip leading zeros
4541 while (*s == '0')
4542 s++;
4543
4544 const char_t* start = s;
4545
4546 for (;;)
4547 {
4548 if (static_cast<unsigned>(*s - '0') < 10)
4549 result = result * 10 + (*s - '0');
4550 else
4551 break;
4552
4553 s++;
4554 }
4555
4556 size_t digits = static_cast<size_t>(s - start);
4557
4558 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4559
4560 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4561 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4562 const size_t high_bit = sizeof(U) * 8 - 1;
4563
4564 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4565 }
4566
4567 if (negative)
4568 {
4569 // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4570 #ifdef _CRAYC
4571 return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4572 #else
4573 return (overflow || result > 0 - minv) ? minv : 0 - result;
4574 #endif
4575 }
4576 else
4577 return (overflow || result > maxv) ? maxv : result;
4578 }
4579
4580 PUGI__FN int get_value_int(const char_t* value)
4581 {
4582 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4583 }
4584
4585 PUGI__FN unsigned int get_value_uint(const char_t* value)
4586 {
4587 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4588 }
4589
4590 PUGI__FN double get_value_double(const char_t* value)
4591 {
4592 #ifdef PUGIXML_WCHAR_MODE
4593 return wcstod(value, 0);
4594 #else
4595 return strtod(value, 0);
4596 #endif
4597 }
4598
4599 PUGI__FN float get_value_float(const char_t* value)
4600 {
4601 #ifdef PUGIXML_WCHAR_MODE
4602 return static_cast<float>(wcstod(value, 0));
4603 #else
4604 return static_cast<float>(strtod(value, 0));
4605 #endif
4606 }
4607
4608 PUGI__FN bool get_value_bool(const char_t* value)
4609 {
4610 // only look at first char
4611 char_t first = *value;
4612
4613 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4614 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4615 }
4616
4617#ifdef PUGIXML_HAS_LONG_LONG
4618 PUGI__FN long long get_value_llong(const char_t* value)
4619 {
4620 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4621 }
4622
4623 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4624 {
4625 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4626 }
4627#endif
4628
4629 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4630 {
4631 char_t* result = end - 1;
4632 U rest = negative ? 0 - value : value;
4633
4634 do
4635 {
4636 *result-- = static_cast<char_t>('0' + (rest % 10));
4637 rest /= 10;
4638 }
4639 while (rest);
4640
4641 assert(result >= begin);
4642 (void)begin;
4643
4644 *result = '-';
4645
4646 return result + !negative;
4647 }
4648
4649 // set value with conversion functions
4650 template <typename String, typename Header>
4651 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4652 {
4653 #ifdef PUGIXML_WCHAR_MODE
4654 char_t wbuf[128];
4655 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4656
4657 size_t offset = 0;
4658 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4659
4660 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4661 #else
4662 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4663 #endif
4664 }
4665
4666 template <typename U, typename String, typename Header>
4667 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4668 {
4669 char_t buf[64];
4670 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4671 char_t* begin = integer_to_string(buf, end, value, negative);
4672
4673 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4674 }
4675
4676 template <typename String, typename Header>
4677 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4678 {
4679 char buf[128];
4680 PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
4681
4682 return set_value_ascii(dest, header, header_mask, buf);
4683 }
4684
4685 template <typename String, typename Header>
4686 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4687 {
4688 char buf[128];
4689 PUGI__SNPRINTF(buf, "%.*g", precision, value);
4690
4691 return set_value_ascii(dest, header, header_mask, buf);
4692 }
4693
4694 template <typename String, typename Header>
4695 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4696 {
4697 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4698 }
4699
4700 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4701 {
4702 // check input buffer
4703 if (!contents && size) return make_parse_result(status_io_error);
4704
4705 // get actual encoding
4706 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4707
4708 // get private buffer
4709 char_t* buffer = 0;
4710 size_t length = 0;
4711
4712 // coverity[var_deref_model]
4713 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4714
4715 // delete original buffer if we performed a conversion
4716 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4717
4718 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4719 if (own || buffer != contents) *out_buffer = buffer;
4720
4721 // store buffer for offset_debug
4722 doc->buffer = buffer;
4723
4724 // parse
4725 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4726
4727 // remember encoding
4728 res.encoding = buffer_encoding;
4729
4730 return res;
4731 }
4732
4733 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4734 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4735 {
4736 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4737 // there are 64-bit versions of fseek/ftell, let's use them
4738 typedef __int64 length_type;
4739
4740 _fseeki64(file, 0, SEEK_END);
4741 length_type length = _ftelli64(file);
4742 _fseeki64(file, 0, SEEK_SET);
4743 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4744 // there are 64-bit versions of fseek/ftell, let's use them
4745 typedef off64_t length_type;
4746
4747 fseeko64(file, 0, SEEK_END);
4748 length_type length = ftello64(file);
4749 fseeko64(file, 0, SEEK_SET);
4750 #else
4751 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4752 typedef long length_type;
4753
4754 fseek(file, 0, SEEK_END);
4755 length_type length = ftell(file);
4756 fseek(file, 0, SEEK_SET);
4757 #endif
4758
4759 // check for I/O errors
4760 if (length < 0) return status_io_error;
4761
4762 // check for overflow
4763 size_t result = static_cast<size_t>(length);
4764
4765 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4766
4767 // finalize
4768 out_result = result;
4769
4770 return status_ok;
4771 }
4772
4773 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4774 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4775 {
4776 // We only need to zero-terminate if encoding conversion does not do it for us
4777 #ifdef PUGIXML_WCHAR_MODE
4778 xml_encoding wchar_encoding = get_wchar_encoding();
4779
4780 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4781 {
4782 size_t length = size / sizeof(char_t);
4783
4784 static_cast<char_t*>(buffer)[length] = 0;
4785 return (length + 1) * sizeof(char_t);
4786 }
4787 #else
4788 if (encoding == encoding_utf8)
4789 {
4790 static_cast<char*>(buffer)[size] = 0;
4791 return size + 1;
4792 }
4793 #endif
4794
4795 return size;
4796 }
4797
4798 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4799 {
4800 if (!file) return make_parse_result(status_file_not_found);
4801
4802 // get file size (can result in I/O errors)
4803 size_t size = 0;
4804 xml_parse_status size_status = get_file_size(file, size);
4805 if (size_status != status_ok) return make_parse_result(size_status);
4806
4807 size_t max_suffix_size = sizeof(char_t);
4808
4809 // allocate buffer for the whole file
4810 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4811 if (!contents) return make_parse_result(status_out_of_memory);
4812
4813 // read file in memory
4814 size_t read_size = fread(contents, 1, size, file);
4815
4816 if (read_size != size)
4817 {
4818 xml_memory::deallocate(contents);
4819 return make_parse_result(status_io_error);
4820 }
4821
4822 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4823
4824 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4825 }
4826
4827 PUGI__FN void close_file(FILE* file)
4828 {
4829 fclose(file);
4830 }
4831
4832#ifndef PUGIXML_NO_STL
4833 template <typename T> struct xml_stream_chunk
4834 {
4835 static xml_stream_chunk* create()
4836 {
4837 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4838 if (!memory) return 0;
4839
4840 return new (memory) xml_stream_chunk();
4841 }
4842
4843 static void destroy(xml_stream_chunk* chunk)
4844 {
4845 // free chunk chain
4846 while (chunk)
4847 {
4848 xml_stream_chunk* next_ = chunk->next;
4849
4850 xml_memory::deallocate(chunk);
4851
4852 chunk = next_;
4853 }
4854 }
4855
4856 xml_stream_chunk(): next(0), size(0)
4857 {
4858 }
4859
4860 xml_stream_chunk* next;
4861 size_t size;
4862
4863 T data[xml_memory_page_size / sizeof(T)];
4864 };
4865
4866 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4867 {
4868 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4869
4870 // read file to a chunk list
4871 size_t total = 0;
4872 xml_stream_chunk<T>* last = 0;
4873
4874 while (!stream.eof())
4875 {
4876 // allocate new chunk
4877 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4878 if (!chunk) return status_out_of_memory;
4879
4880 // append chunk to list
4881 if (last) last = last->next = chunk;
4882 else chunks.data = last = chunk;
4883
4884 // read data to chunk
4885 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4886 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4887
4888 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4889 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4890
4891 // guard against huge files (chunk size is small enough to make this overflow check work)
4892 if (total + chunk->size < total) return status_out_of_memory;
4893 total += chunk->size;
4894 }
4895
4896 size_t max_suffix_size = sizeof(char_t);
4897
4898 // copy chunk list to a contiguous buffer
4899 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4900 if (!buffer) return status_out_of_memory;
4901
4902 char* write = buffer;
4903
4904 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4905 {
4906 assert(write + chunk->size <= buffer + total);
4907 memcpy(write, chunk->data, chunk->size);
4908 write += chunk->size;
4909 }
4910
4911 assert(write == buffer + total);
4912
4913 // return buffer
4914 *out_buffer = buffer;
4915 *out_size = total;
4916
4917 return status_ok;
4918 }
4919
4920 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4921 {
4922 // get length of remaining data in stream
4923 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4924 stream.seekg(0, std::ios::end);
4925 std::streamoff length = stream.tellg() - pos;
4926 stream.seekg(pos);
4927
4928 if (stream.fail() || pos < 0) return status_io_error;
4929
4930 // guard against huge files
4931 size_t read_length = static_cast<size_t>(length);
4932
4933 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4934
4935 size_t max_suffix_size = sizeof(char_t);
4936
4937 // read stream data into memory (guard against stream exceptions with buffer holder)
4938 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4939 if (!buffer.data) return status_out_of_memory;
4940
4941 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4942
4943 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4944 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4945
4946 // return buffer
4947 size_t actual_length = static_cast<size_t>(stream.gcount());
4948 assert(actual_length <= read_length);
4949
4950 *out_buffer = buffer.release();
4951 *out_size = actual_length * sizeof(T);
4952
4953 return status_ok;
4954 }
4955
4956 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4957 {
4958 void* buffer = 0;
4959 size_t size = 0;
4960 xml_parse_status status = status_ok;
4961
4962 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4963 if (stream.fail()) return make_parse_result(status_io_error);
4964
4965 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4966 if (stream.tellg() < 0)
4967 {
4968 stream.clear(); // clear error flags that could be set by a failing tellg
4969 status = load_stream_data_noseek(stream, &buffer, &size);
4970 }
4971 else
4972 status = load_stream_data_seek(stream, &buffer, &size);
4973
4974 if (status != status_ok) return make_parse_result(status);
4975
4976 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4977
4978 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4979 }
4980#endif
4981
4982#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4983 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4984 {
4985#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4986 FILE* file = 0;
4987 return _wfopen_s(&file, path, mode) == 0 ? file : 0;
4988#else
4989 return _wfopen(path, mode);
4990#endif
4991 }
4992#else
4993 PUGI__FN char* convert_path_heap(const wchar_t* str)
4994 {
4995 assert(str);
4996
4997 // first pass: get length in utf8 characters
4998 size_t length = strlength_wide(str);
4999 size_t size = as_utf8_begin(str, length);
5000
5001 // allocate resulting string
5002 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5003 if (!result) return 0;
5004
5005 // second pass: convert to utf8
5006 as_utf8_end(result, size, str, length);
5007
5008 // zero-terminate
5009 result[size] = 0;
5010
5011 return result;
5012 }
5013
5014 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5015 {
5016 // there is no standard function to open wide paths, so our best bet is to try utf8 path
5017 char* path_utf8 = convert_path_heap(path);
5018 if (!path_utf8) return 0;
5019
5020 // convert mode to ASCII (we mirror _wfopen interface)
5021 char mode_ascii[4] = {0};
5022 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5023
5024 // try to open the utf8 path
5025 FILE* result = fopen(path_utf8, mode_ascii);
5026
5027 // free dummy buffer
5028 xml_memory::deallocate(path_utf8);
5029
5030 return result;
5031 }
5032#endif
5033
5034 PUGI__FN FILE* open_file(const char* path, const char* mode)
5035 {
5036#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5037 FILE* file = 0;
5038 return fopen_s(&file, path, mode) == 0 ? file : 0;
5039#else
5040 return fopen(path, mode);
5041#endif
5042 }
5043
5044 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5045 {
5046 if (!file) return false;
5047
5048 xml_writer_file writer(file);
5049 doc.save(writer, indent, flags, encoding);
5050
5051 return ferror(file) == 0;
5052 }
5053
5054 struct name_null_sentry
5055 {
5056 xml_node_struct* node;
5057 char_t* name;
5058
5059 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5060 {
5061 node->name = 0;
5062 }
5063
5064 ~name_null_sentry()
5065 {
5066 node->name = name;
5067 }
5068 };
5069PUGI__NS_END
5070
5071namespace pugi
5072{
5073 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5074 {
5075 }
5076
5077 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5078 {
5079 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5080 (void)!result; // unfortunately we can't do proper error handling here
5081 }
5082
5083#ifndef PUGIXML_NO_STL
5084 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5085 {
5086 }
5087
5088 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5089 {
5090 }
5091
5092 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5093 {
5094 if (narrow_stream)
5095 {
5096 assert(!wide_stream);
5097 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5098 }
5099 else
5100 {
5101 assert(wide_stream);
5102 assert(size % sizeof(wchar_t) == 0);
5103
5104 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5105 }
5106 }
5107#endif
5108
5109 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5110 {
5111 }
5112
5113 PUGI__FN xml_tree_walker::~xml_tree_walker()
5114 {
5115 }
5116
5117 PUGI__FN int xml_tree_walker::depth() const
5118 {
5119 return _depth;
5120 }
5121
5122 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5123 {
5124 return true;
5125 }
5126
5127 PUGI__FN bool xml_tree_walker::end(xml_node&)
5128 {
5129 return true;
5130 }
5131
5132 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5133 {
5134 }
5135
5136 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5137 {
5138 }
5139
5140 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5141 {
5142 }
5143
5144 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5145 {
5146 return _attr ? unspecified_bool_xml_attribute : 0;
5147 }
5148
5149 PUGI__FN bool xml_attribute::operator!() const
5150 {
5151 return !_attr;
5152 }
5153
5154 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5155 {
5156 return (_attr == r._attr);
5157 }
5158
5159 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5160 {
5161 return (_attr != r._attr);
5162 }
5163
5164 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5165 {
5166 return (_attr < r._attr);
5167 }
5168
5169 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5170 {
5171 return (_attr > r._attr);
5172 }
5173
5174 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5175 {
5176 return (_attr <= r._attr);
5177 }
5178
5179 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5180 {
5181 return (_attr >= r._attr);
5182 }
5183
5184 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5185 {
5186 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5187 }
5188
5189 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5190 {
5191 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5192 }
5193
5194 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5195 {
5196 return (_attr && _attr->value) ? _attr->value + 0 : def;
5197 }
5198
5199 PUGI__FN int xml_attribute::as_int(int def) const
5200 {
5201 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5202 }
5203
5204 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5205 {
5206 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5207 }
5208
5209 PUGI__FN double xml_attribute::as_double(double def) const
5210 {
5211 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5212 }
5213
5214 PUGI__FN float xml_attribute::as_float(float def) const
5215 {
5216 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5217 }
5218
5219 PUGI__FN bool xml_attribute::as_bool(bool def) const
5220 {
5221 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5222 }
5223
5224#ifdef PUGIXML_HAS_LONG_LONG
5225 PUGI__FN long long xml_attribute::as_llong(long long def) const
5226 {
5227 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5228 }
5229
5230 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5231 {
5232 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5233 }
5234#endif
5235
5236 PUGI__FN bool xml_attribute::empty() const
5237 {
5238 return !_attr;
5239 }
5240
5241 PUGI__FN const char_t* xml_attribute::name() const
5242 {
5243 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5244 }
5245
5246 PUGI__FN const char_t* xml_attribute::value() const
5247 {
5248 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5249 }
5250
5251 PUGI__FN size_t xml_attribute::hash_value() const
5252 {
5253 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5254 }
5255
5256 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5257 {
5258 return _attr;
5259 }
5260
5261 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5262 {
5263 set_value(rhs);
5264 return *this;
5265 }
5266
5267 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5268 {
5269 set_value(rhs);
5270 return *this;
5271 }
5272
5273 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5274 {
5275 set_value(rhs);
5276 return *this;
5277 }
5278
5279 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5280 {
5281 set_value(rhs);
5282 return *this;
5283 }
5284
5285 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5286 {
5287 set_value(rhs);
5288 return *this;
5289 }
5290
5291 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5292 {
5293 set_value(rhs);
5294 return *this;
5295 }
5296
5297 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5298 {
5299 set_value(rhs);
5300 return *this;
5301 }
5302
5303 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5304 {
5305 set_value(rhs);
5306 return *this;
5307 }
5308
5309#ifdef PUGIXML_HAS_LONG_LONG
5310 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5311 {
5312 set_value(rhs);
5313 return *this;
5314 }
5315
5316 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5317 {
5318 set_value(rhs);
5319 return *this;
5320 }
5321#endif
5322
5323 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5324 {
5325 if (!_attr) return false;
5326
5327 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5328 }
5329
5330 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5331 {
5332 if (!_attr) return false;
5333
5334 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5335 }
5336
5337 PUGI__FN bool xml_attribute::set_value(int rhs)
5338 {
5339 if (!_attr) return false;
5340
5341 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5342 }
5343
5344 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5345 {
5346 if (!_attr) return false;
5347
5348 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5349 }
5350
5351 PUGI__FN bool xml_attribute::set_value(long rhs)
5352 {
5353 if (!_attr) return false;
5354
5355 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5356 }
5357
5358 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5359 {
5360 if (!_attr) return false;
5361
5362 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5363 }
5364
5365 PUGI__FN bool xml_attribute::set_value(double rhs)
5366 {
5367 if (!_attr) return false;
5368
5369 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5370 }
5371
5372 PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
5373 {
5374 if (!_attr) return false;
5375
5376 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5377 }
5378
5379 PUGI__FN bool xml_attribute::set_value(float rhs)
5380 {
5381 if (!_attr) return false;
5382
5383 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5384 }
5385
5386 PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
5387 {
5388 if (!_attr) return false;
5389
5390 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5391 }
5392
5393 PUGI__FN bool xml_attribute::set_value(bool rhs)
5394 {
5395 if (!_attr) return false;
5396
5397 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5398 }
5399
5400#ifdef PUGIXML_HAS_LONG_LONG
5401 PUGI__FN bool xml_attribute::set_value(long long rhs)
5402 {
5403 if (!_attr) return false;
5404
5405 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5406 }
5407
5408 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5409 {
5410 if (!_attr) return false;
5411
5412 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5413 }
5414#endif
5415
5416#ifdef __BORLANDC__
5417 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5418 {
5419 return (bool)lhs && rhs;
5420 }
5421
5422 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5423 {
5424 return (bool)lhs || rhs;
5425 }
5426#endif
5427
5428 PUGI__FN xml_node::xml_node(): _root(0)
5429 {
5430 }
5431
5432 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5433 {
5434 }
5435
5436 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5437 {
5438 }
5439
5440 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5441 {
5442 return _root ? unspecified_bool_xml_node : 0;
5443 }
5444
5445 PUGI__FN bool xml_node::operator!() const
5446 {
5447 return !_root;
5448 }
5449
5450 PUGI__FN xml_node::iterator xml_node::begin() const
5451 {
5452 return iterator(_root ? _root->first_child + 0 : 0, _root);
5453 }
5454
5455 PUGI__FN xml_node::iterator xml_node::end() const
5456 {
5457 return iterator(0, _root);
5458 }
5459
5460 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5461 {
5462 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5463 }
5464
5465 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5466 {
5467 return attribute_iterator(0, _root);
5468 }
5469
5470 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5471 {
5472 return xml_object_range<xml_node_iterator>(begin(), end());
5473 }
5474
5475 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5476 {
5477 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5478 }
5479
5480 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5481 {
5482 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5483 }
5484
5485 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5486 {
5487 return (_root == r._root);
5488 }
5489
5490 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5491 {
5492 return (_root != r._root);
5493 }
5494
5495 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5496 {
5497 return (_root < r._root);
5498 }
5499
5500 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5501 {
5502 return (_root > r._root);
5503 }
5504
5505 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5506 {
5507 return (_root <= r._root);
5508 }
5509
5510 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5511 {
5512 return (_root >= r._root);
5513 }
5514
5515 PUGI__FN bool xml_node::empty() const
5516 {
5517 return !_root;
5518 }
5519
5520 PUGI__FN const char_t* xml_node::name() const
5521 {
5522 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5523 }
5524
5525 PUGI__FN xml_node_type xml_node::type() const
5526 {
5527 return _root ? PUGI__NODETYPE(_root) : node_null;
5528 }
5529
5530 PUGI__FN const char_t* xml_node::value() const
5531 {
5532 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5533 }
5534
5535 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5536 {
5537 if (!_root) return xml_node();
5538
5539 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5540 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5541
5542 return xml_node();
5543 }
5544
5545 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5546 {
5547 if (!_root) return xml_attribute();
5548
5549 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5550 if (i->name && impl::strequal(name_, i->name))
5551 return xml_attribute(i);
5552
5553 return xml_attribute();
5554 }
5555
5556 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5557 {
5558 if (!_root) return xml_node();
5559
5560 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5561 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5562
5563 return xml_node();
5564 }
5565
5566 PUGI__FN xml_node xml_node::next_sibling() const
5567 {
5568 return _root ? xml_node(_root->next_sibling) : xml_node();
5569 }
5570
5571 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5572 {
5573 if (!_root) return xml_node();
5574
5575 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5576 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5577
5578 return xml_node();
5579 }
5580
5581 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5582 {
5583 xml_attribute_struct* hint = hint_._attr;
5584
5585 // if hint is not an attribute of node, behavior is not defined
5586 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5587
5588 if (!_root) return xml_attribute();
5589
5590 // optimistically search from hint up until the end
5591 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5592 if (i->name && impl::strequal(name_, i->name))
5593 {
5594 // update hint to maximize efficiency of searching for consecutive attributes
5595 hint_._attr = i->next_attribute;
5596
5597 return xml_attribute(i);
5598 }
5599
5600 // wrap around and search from the first attribute until the hint
5601 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5602 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5603 if (j->name && impl::strequal(name_, j->name))
5604 {
5605 // update hint to maximize efficiency of searching for consecutive attributes
5606 hint_._attr = j->next_attribute;
5607
5608 return xml_attribute(j);
5609 }
5610
5611 return xml_attribute();
5612 }
5613
5614 PUGI__FN xml_node xml_node::previous_sibling() const
5615 {
5616 if (!_root) return xml_node();
5617
5618 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5619 else return xml_node();
5620 }
5621
5622 PUGI__FN xml_node xml_node::parent() const
5623 {
5624 return _root ? xml_node(_root->parent) : xml_node();
5625 }
5626
5627 PUGI__FN xml_node xml_node::root() const
5628 {
5629 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5630 }
5631
5632 PUGI__FN xml_text xml_node::text() const
5633 {
5634 return xml_text(_root);
5635 }
5636
5637 PUGI__FN const char_t* xml_node::child_value() const
5638 {
5639 if (!_root) return PUGIXML_TEXT("");
5640
5641 // element nodes can have value if parse_embed_pcdata was used
5642 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5643 return _root->value;
5644
5645 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5646 if (impl::is_text_node(i) && i->value)
5647 return i->value;
5648
5649 return PUGIXML_TEXT("");
5650 }
5651
5652 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5653 {
5654 return child(name_).child_value();
5655 }
5656
5657 PUGI__FN xml_attribute xml_node::first_attribute() const
5658 {
5659 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5660 }
5661
5662 PUGI__FN xml_attribute xml_node::last_attribute() const
5663 {
5664 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5665 }
5666
5667 PUGI__FN xml_node xml_node::first_child() const
5668 {
5669 return _root ? xml_node(_root->first_child) : xml_node();
5670 }
5671
5672 PUGI__FN xml_node xml_node::last_child() const
5673 {
5674 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5675 }
5676
5677 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5678 {
5679 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5680
5681 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5682 return false;
5683
5684 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5685 }
5686
5687 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5688 {
5689 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5690
5691 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5692 return false;
5693
5694 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5695 }
5696
5697 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5698 {
5699 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5700
5701 impl::xml_allocator& alloc = impl::get_allocator(_root);
5702 if (!alloc.reserve()) return xml_attribute();
5703
5704 xml_attribute a(impl::allocate_attribute(alloc));
5705 if (!a) return xml_attribute();
5706
5707 impl::append_attribute(a._attr, _root);
5708
5709 a.set_name(name_);
5710
5711 return a;
5712 }
5713
5714 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5715 {
5716 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5717
5718 impl::xml_allocator& alloc = impl::get_allocator(_root);
5719 if (!alloc.reserve()) return xml_attribute();
5720
5721 xml_attribute a(impl::allocate_attribute(alloc));
5722 if (!a) return xml_attribute();
5723
5724 impl::prepend_attribute(a._attr, _root);
5725
5726 a.set_name(name_);
5727
5728 return a;
5729 }
5730
5731 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5732 {
5733 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5734 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5735
5736 impl::xml_allocator& alloc = impl::get_allocator(_root);
5737 if (!alloc.reserve()) return xml_attribute();
5738
5739 xml_attribute a(impl::allocate_attribute(alloc));
5740 if (!a) return xml_attribute();
5741
5742 impl::insert_attribute_after(a._attr, attr._attr, _root);
5743
5744 a.set_name(name_);
5745
5746 return a;
5747 }
5748
5749 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5750 {
5751 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5752 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5753
5754 impl::xml_allocator& alloc = impl::get_allocator(_root);
5755 if (!alloc.reserve()) return xml_attribute();
5756
5757 xml_attribute a(impl::allocate_attribute(alloc));
5758 if (!a) return xml_attribute();
5759
5760 impl::insert_attribute_before(a._attr, attr._attr, _root);
5761
5762 a.set_name(name_);
5763
5764 return a;
5765 }
5766
5767 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5768 {
5769 if (!proto) return xml_attribute();
5770 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5771
5772 impl::xml_allocator& alloc = impl::get_allocator(_root);
5773 if (!alloc.reserve()) return xml_attribute();
5774
5775 xml_attribute a(impl::allocate_attribute(alloc));
5776 if (!a) return xml_attribute();
5777
5778 impl::append_attribute(a._attr, _root);
5779 impl::node_copy_attribute(a._attr, proto._attr);
5780
5781 return a;
5782 }
5783
5784 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5785 {
5786 if (!proto) return xml_attribute();
5787 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5788
5789 impl::xml_allocator& alloc = impl::get_allocator(_root);
5790 if (!alloc.reserve()) return xml_attribute();
5791
5792 xml_attribute a(impl::allocate_attribute(alloc));
5793 if (!a) return xml_attribute();
5794
5795 impl::prepend_attribute(a._attr, _root);
5796 impl::node_copy_attribute(a._attr, proto._attr);
5797
5798 return a;
5799 }
5800
5801 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5802 {
5803 if (!proto) return xml_attribute();
5804 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5805 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5806
5807 impl::xml_allocator& alloc = impl::get_allocator(_root);
5808 if (!alloc.reserve()) return xml_attribute();
5809
5810 xml_attribute a(impl::allocate_attribute(alloc));
5811 if (!a) return xml_attribute();
5812
5813 impl::insert_attribute_after(a._attr, attr._attr, _root);
5814 impl::node_copy_attribute(a._attr, proto._attr);
5815
5816 return a;
5817 }
5818
5819 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5820 {
5821 if (!proto) return xml_attribute();
5822 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5823 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5824
5825 impl::xml_allocator& alloc = impl::get_allocator(_root);
5826 if (!alloc.reserve()) return xml_attribute();
5827
5828 xml_attribute a(impl::allocate_attribute(alloc));
5829 if (!a) return xml_attribute();
5830
5831 impl::insert_attribute_before(a._attr, attr._attr, _root);
5832 impl::node_copy_attribute(a._attr, proto._attr);
5833
5834 return a;
5835 }
5836
5837 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5838 {
5839 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5840
5841 impl::xml_allocator& alloc = impl::get_allocator(_root);
5842 if (!alloc.reserve()) return xml_node();
5843
5844 xml_node n(impl::allocate_node(alloc, type_));
5845 if (!n) return xml_node();
5846
5847 impl::append_node(n._root, _root);
5848
5849 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5850
5851 return n;
5852 }
5853
5854 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5855 {
5856 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5857
5858 impl::xml_allocator& alloc = impl::get_allocator(_root);
5859 if (!alloc.reserve()) return xml_node();
5860
5861 xml_node n(impl::allocate_node(alloc, type_));
5862 if (!n) return xml_node();
5863
5864 impl::prepend_node(n._root, _root);
5865
5866 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5867
5868 return n;
5869 }
5870
5871 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5872 {
5873 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5874 if (!node._root || node._root->parent != _root) return xml_node();
5875
5876 impl::xml_allocator& alloc = impl::get_allocator(_root);
5877 if (!alloc.reserve()) return xml_node();
5878
5879 xml_node n(impl::allocate_node(alloc, type_));
5880 if (!n) return xml_node();
5881
5882 impl::insert_node_before(n._root, node._root);
5883
5884 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5885
5886 return n;
5887 }
5888
5889 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5890 {
5891 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5892 if (!node._root || node._root->parent != _root) return xml_node();
5893
5894 impl::xml_allocator& alloc = impl::get_allocator(_root);
5895 if (!alloc.reserve()) return xml_node();
5896
5897 xml_node n(impl::allocate_node(alloc, type_));
5898 if (!n) return xml_node();
5899
5900 impl::insert_node_after(n._root, node._root);
5901
5902 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5903
5904 return n;
5905 }
5906
5907 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5908 {
5909 xml_node result = append_child(node_element);
5910
5911 result.set_name(name_);
5912
5913 return result;
5914 }
5915
5916 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5917 {
5918 xml_node result = prepend_child(node_element);
5919
5920 result.set_name(name_);
5921
5922 return result;
5923 }
5924
5925 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5926 {
5927 xml_node result = insert_child_after(node_element, node);
5928
5929 result.set_name(name_);
5930
5931 return result;
5932 }
5933
5934 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5935 {
5936 xml_node result = insert_child_before(node_element, node);
5937
5938 result.set_name(name_);
5939
5940 return result;
5941 }
5942
5943 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5944 {
5945 xml_node_type type_ = proto.type();
5946 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5947
5948 impl::xml_allocator& alloc = impl::get_allocator(_root);
5949 if (!alloc.reserve()) return xml_node();
5950
5951 xml_node n(impl::allocate_node(alloc, type_));
5952 if (!n) return xml_node();
5953
5954 impl::append_node(n._root, _root);
5955 impl::node_copy_tree(n._root, proto._root);
5956
5957 return n;
5958 }
5959
5960 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5961 {
5962 xml_node_type type_ = proto.type();
5963 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5964
5965 impl::xml_allocator& alloc = impl::get_allocator(_root);
5966 if (!alloc.reserve()) return xml_node();
5967
5968 xml_node n(impl::allocate_node(alloc, type_));
5969 if (!n) return xml_node();
5970
5971 impl::prepend_node(n._root, _root);
5972 impl::node_copy_tree(n._root, proto._root);
5973
5974 return n;
5975 }
5976
5977 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5978 {
5979 xml_node_type type_ = proto.type();
5980 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5981 if (!node._root || node._root->parent != _root) return xml_node();
5982
5983 impl::xml_allocator& alloc = impl::get_allocator(_root);
5984 if (!alloc.reserve()) return xml_node();
5985
5986 xml_node n(impl::allocate_node(alloc, type_));
5987 if (!n) return xml_node();
5988
5989 impl::insert_node_after(n._root, node._root);
5990 impl::node_copy_tree(n._root, proto._root);
5991
5992 return n;
5993 }
5994
5995 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5996 {
5997 xml_node_type type_ = proto.type();
5998 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5999 if (!node._root || node._root->parent != _root) return xml_node();
6000
6001 impl::xml_allocator& alloc = impl::get_allocator(_root);
6002 if (!alloc.reserve()) return xml_node();
6003
6004 xml_node n(impl::allocate_node(alloc, type_));
6005 if (!n) return xml_node();
6006
6007 impl::insert_node_before(n._root, node._root);
6008 impl::node_copy_tree(n._root, proto._root);
6009
6010 return n;
6011 }
6012
6013 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
6014 {
6015 if (!impl::allow_move(*this, moved)) return xml_node();
6016
6017 impl::xml_allocator& alloc = impl::get_allocator(_root);
6018 if (!alloc.reserve()) return xml_node();
6019
6020 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6021 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6022
6023 impl::remove_node(moved._root);
6024 impl::append_node(moved._root, _root);
6025
6026 return moved;
6027 }
6028
6029 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
6030 {
6031 if (!impl::allow_move(*this, moved)) return xml_node();
6032
6033 impl::xml_allocator& alloc = impl::get_allocator(_root);
6034 if (!alloc.reserve()) return xml_node();
6035
6036 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6037 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6038
6039 impl::remove_node(moved._root);
6040 impl::prepend_node(moved._root, _root);
6041
6042 return moved;
6043 }
6044
6045 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6046 {
6047 if (!impl::allow_move(*this, moved)) return xml_node();
6048 if (!node._root || node._root->parent != _root) return xml_node();
6049 if (moved._root == node._root) return xml_node();
6050
6051 impl::xml_allocator& alloc = impl::get_allocator(_root);
6052 if (!alloc.reserve()) return xml_node();
6053
6054 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6055 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6056
6057 impl::remove_node(moved._root);
6058 impl::insert_node_after(moved._root, node._root);
6059
6060 return moved;
6061 }
6062
6063 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6064 {
6065 if (!impl::allow_move(*this, moved)) return xml_node();
6066 if (!node._root || node._root->parent != _root) return xml_node();
6067 if (moved._root == node._root) return xml_node();
6068
6069 impl::xml_allocator& alloc = impl::get_allocator(_root);
6070 if (!alloc.reserve()) return xml_node();
6071
6072 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6073 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6074
6075 impl::remove_node(moved._root);
6076 impl::insert_node_before(moved._root, node._root);
6077
6078 return moved;
6079 }
6080
6081 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6082 {
6083 return remove_attribute(attribute(name_));
6084 }
6085
6086 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6087 {
6088 if (!_root || !a._attr) return false;
6089 if (!impl::is_attribute_of(a._attr, _root)) return false;
6090
6091 impl::xml_allocator& alloc = impl::get_allocator(_root);
6092 if (!alloc.reserve()) return false;
6093
6094 impl::remove_attribute(a._attr, _root);
6095 impl::destroy_attribute(a._attr, alloc);
6096
6097 return true;
6098 }
6099
6100 PUGI__FN bool xml_node::remove_attributes()
6101 {
6102 if (!_root) return false;
6103
6104 impl::xml_allocator& alloc = impl::get_allocator(_root);
6105 if (!alloc.reserve()) return false;
6106
6107 for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6108 {
6109 xml_attribute_struct* next = attr->next_attribute;
6110
6111 impl::destroy_attribute(attr, alloc);
6112
6113 attr = next;
6114 }
6115
6116 _root->first_attribute = 0;
6117
6118 return true;
6119 }
6120
6121 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6122 {
6123 return remove_child(child(name_));
6124 }
6125
6126 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6127 {
6128 if (!_root || !n._root || n._root->parent != _root) return false;
6129
6130 impl::xml_allocator& alloc = impl::get_allocator(_root);
6131 if (!alloc.reserve()) return false;
6132
6133 impl::remove_node(n._root);
6134 impl::destroy_node(n._root, alloc);
6135
6136 return true;
6137 }
6138
6139 PUGI__FN bool xml_node::remove_children()
6140 {
6141 if (!_root) return false;
6142
6143 impl::xml_allocator& alloc = impl::get_allocator(_root);
6144 if (!alloc.reserve()) return false;
6145
6146 for (xml_node_struct* cur = _root->first_child; cur; )
6147 {
6148 xml_node_struct* next = cur->next_sibling;
6149
6150 impl::destroy_node(cur, alloc);
6151
6152 cur = next;
6153 }
6154
6155 _root->first_child = 0;
6156
6157 return true;
6158 }
6159
6160 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6161 {
6162 // append_buffer is only valid for elements/documents
6163 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6164
6165 // get document node
6166 impl::xml_document_struct* doc = &impl::get_document(_root);
6167
6168 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6169 doc->header |= impl::xml_memory_page_contents_shared_mask;
6170
6171 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6172 impl::xml_memory_page* page = 0;
6173 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6174 (void)page;
6175
6176 if (!extra) return impl::make_parse_result(status_out_of_memory);
6177
6178 #ifdef PUGIXML_COMPACT
6179 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6180 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6181 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6182 #endif
6183
6184 // add extra buffer to the list
6185 extra->buffer = 0;
6186 extra->next = doc->extra_buffers;
6187 doc->extra_buffers = extra;
6188
6189 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6190 impl::name_null_sentry sentry(_root);
6191
6192 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6193 }
6194
6195 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6196 {
6197 if (!_root) return xml_node();
6198
6199 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6200 if (i->name && impl::strequal(name_, i->name))
6201 {
6202 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6203 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6204 return xml_node(i);
6205 }
6206
6207 return xml_node();
6208 }
6209
6210 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6211 {
6212 if (!_root) return xml_node();
6213
6214 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6215 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6216 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6217 return xml_node(i);
6218
6219 return xml_node();
6220 }
6221
6222#ifndef PUGIXML_NO_STL
6223 PUGI__FN string_t xml_node::path(char_t delimiter) const
6224 {
6225 if (!_root) return string_t();
6226
6227 size_t offset = 0;
6228
6229 for (xml_node_struct* i = _root; i; i = i->parent)
6230 {
6231 offset += (i != _root);
6232 offset += i->name ? impl::strlength(i->name) : 0;
6233 }
6234
6235 string_t result;
6236 result.resize(offset);
6237
6238 for (xml_node_struct* j = _root; j; j = j->parent)
6239 {
6240 if (j != _root)
6241 result[--offset] = delimiter;
6242
6243 if (j->name)
6244 {
6245 size_t length = impl::strlength(j->name);
6246
6247 offset -= length;
6248 memcpy(&result[offset], j->name, length * sizeof(char_t));
6249 }
6250 }
6251
6252 assert(offset == 0);
6253
6254 return result;
6255 }
6256#endif
6257
6258 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6259 {
6260 xml_node context = path_[0] == delimiter ? root() : *this;
6261
6262 if (!context._root) return xml_node();
6263
6264 const char_t* path_segment = path_;
6265
6266 while (*path_segment == delimiter) ++path_segment;
6267
6268 const char_t* path_segment_end = path_segment;
6269
6270 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6271
6272 if (path_segment == path_segment_end) return context;
6273
6274 const char_t* next_segment = path_segment_end;
6275
6276 while (*next_segment == delimiter) ++next_segment;
6277
6278 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6279 return context.first_element_by_path(next_segment, delimiter);
6280 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6281 return context.parent().first_element_by_path(next_segment, delimiter);
6282 else
6283 {
6284 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6285 {
6286 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6287 {
6288 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6289
6290 if (subsearch) return subsearch;
6291 }
6292 }
6293
6294 return xml_node();
6295 }
6296 }
6297
6298 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6299 {
6300 walker._depth = -1;
6301
6302 xml_node arg_begin(_root);
6303 if (!walker.begin(arg_begin)) return false;
6304
6305 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6306
6307 if (cur)
6308 {
6309 ++walker._depth;
6310
6311 do
6312 {
6313 xml_node arg_for_each(cur);
6314 if (!walker.for_each(arg_for_each))
6315 return false;
6316
6317 if (cur->first_child)
6318 {
6319 ++walker._depth;
6320 cur = cur->first_child;
6321 }
6322 else if (cur->next_sibling)
6323 cur = cur->next_sibling;
6324 else
6325 {
6326 while (!cur->next_sibling && cur != _root && cur->parent)
6327 {
6328 --walker._depth;
6329 cur = cur->parent;
6330 }
6331
6332 if (cur != _root)
6333 cur = cur->next_sibling;
6334 }
6335 }
6336 while (cur && cur != _root);
6337 }
6338
6339 assert(walker._depth == -1);
6340
6341 xml_node arg_end(_root);
6342 return walker.end(arg_end);
6343 }
6344
6345 PUGI__FN size_t xml_node::hash_value() const
6346 {
6347 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6348 }
6349
6350 PUGI__FN xml_node_struct* xml_node::internal_object() const
6351 {
6352 return _root;
6353 }
6354
6355 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6356 {
6357 if (!_root) return;
6358
6359 impl::xml_buffered_writer buffered_writer(writer, encoding);
6360
6361 impl::node_output(buffered_writer, _root, indent, flags, depth);
6362
6363 buffered_writer.flush();
6364 }
6365
6366#ifndef PUGIXML_NO_STL
6367 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6368 {
6369 xml_writer_stream writer(stream);
6370
6371 print(writer, indent, flags, encoding, depth);
6372 }
6373
6374 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6375 {
6376 xml_writer_stream writer(stream);
6377
6378 print(writer, indent, flags, encoding_wchar, depth);
6379 }
6380#endif
6381
6382 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6383 {
6384 if (!_root) return -1;
6385
6386 impl::xml_document_struct& doc = impl::get_document(_root);
6387
6388 // we can determine the offset reliably only if there is exactly once parse buffer
6389 if (!doc.buffer || doc.extra_buffers) return -1;
6390
6391 switch (type())
6392 {
6393 case node_document:
6394 return 0;
6395
6396 case node_element:
6397 case node_declaration:
6398 case node_pi:
6399 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6400
6401 case node_pcdata:
6402 case node_cdata:
6403 case node_comment:
6404 case node_doctype:
6405 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6406
6407 default:
6408 assert(false && "Invalid node type"); // unreachable
6409 return -1;
6410 }
6411 }
6412
6413#ifdef __BORLANDC__
6414 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6415 {
6416 return (bool)lhs && rhs;
6417 }
6418
6419 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6420 {
6421 return (bool)lhs || rhs;
6422 }
6423#endif
6424
6425 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6426 {
6427 }
6428
6429 PUGI__FN xml_node_struct* xml_text::_data() const
6430 {
6431 if (!_root || impl::is_text_node(_root)) return _root;
6432
6433 // element nodes can have value if parse_embed_pcdata was used
6434 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6435 return _root;
6436
6437 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6438 if (impl::is_text_node(node))
6439 return node;
6440
6441 return 0;
6442 }
6443
6444 PUGI__FN xml_node_struct* xml_text::_data_new()
6445 {
6446 xml_node_struct* d = _data();
6447 if (d) return d;
6448
6449 return xml_node(_root).append_child(node_pcdata).internal_object();
6450 }
6451
6452 PUGI__FN xml_text::xml_text(): _root(0)
6453 {
6454 }
6455
6456 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6457 {
6458 }
6459
6460 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6461 {
6462 return _data() ? unspecified_bool_xml_text : 0;
6463 }
6464
6465 PUGI__FN bool xml_text::operator!() const
6466 {
6467 return !_data();
6468 }
6469
6470 PUGI__FN bool xml_text::empty() const
6471 {
6472 return _data() == 0;
6473 }
6474
6475 PUGI__FN const char_t* xml_text::get() const
6476 {
6477 xml_node_struct* d = _data();
6478
6479 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6480 }
6481
6482 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6483 {
6484 xml_node_struct* d = _data();
6485
6486 return (d && d->value) ? d->value + 0 : def;
6487 }
6488
6489 PUGI__FN int xml_text::as_int(int def) const
6490 {
6491 xml_node_struct* d = _data();
6492
6493 return (d && d->value) ? impl::get_value_int(d->value) : def;
6494 }
6495
6496 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6497 {
6498 xml_node_struct* d = _data();
6499
6500 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6501 }
6502
6503 PUGI__FN double xml_text::as_double(double def) const
6504 {
6505 xml_node_struct* d = _data();
6506
6507 return (d && d->value) ? impl::get_value_double(d->value) : def;
6508 }
6509
6510 PUGI__FN float xml_text::as_float(float def) const
6511 {
6512 xml_node_struct* d = _data();
6513
6514 return (d && d->value) ? impl::get_value_float(d->value) : def;
6515 }
6516
6517 PUGI__FN bool xml_text::as_bool(bool def) const
6518 {
6519 xml_node_struct* d = _data();
6520
6521 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6522 }
6523
6524#ifdef PUGIXML_HAS_LONG_LONG
6525 PUGI__FN long long xml_text::as_llong(long long def) const
6526 {
6527 xml_node_struct* d = _data();
6528
6529 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6530 }
6531
6532 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6533 {
6534 xml_node_struct* d = _data();
6535
6536 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6537 }
6538#endif
6539
6540 PUGI__FN bool xml_text::set(const char_t* rhs)
6541 {
6542 xml_node_struct* dn = _data_new();
6543
6544 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6545 }
6546
6547 PUGI__FN bool xml_text::set(int rhs)
6548 {
6549 xml_node_struct* dn = _data_new();
6550
6551 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6552 }
6553
6554 PUGI__FN bool xml_text::set(unsigned int rhs)
6555 {
6556 xml_node_struct* dn = _data_new();
6557
6558 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6559 }
6560
6561 PUGI__FN bool xml_text::set(long rhs)
6562 {
6563 xml_node_struct* dn = _data_new();
6564
6565 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6566 }
6567
6568 PUGI__FN bool xml_text::set(unsigned long rhs)
6569 {
6570 xml_node_struct* dn = _data_new();
6571
6572 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6573 }
6574
6575 PUGI__FN bool xml_text::set(float rhs)
6576 {
6577 xml_node_struct* dn = _data_new();
6578
6579 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6580 }
6581
6582 PUGI__FN bool xml_text::set(float rhs, int precision)
6583 {
6584 xml_node_struct* dn = _data_new();
6585
6586 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6587 }
6588
6589 PUGI__FN bool xml_text::set(double rhs)
6590 {
6591 xml_node_struct* dn = _data_new();
6592
6593 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6594 }
6595
6596 PUGI__FN bool xml_text::set(double rhs, int precision)
6597 {
6598 xml_node_struct* dn = _data_new();
6599
6600 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6601 }
6602
6603 PUGI__FN bool xml_text::set(bool rhs)
6604 {
6605 xml_node_struct* dn = _data_new();
6606
6607 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6608 }
6609
6610#ifdef PUGIXML_HAS_LONG_LONG
6611 PUGI__FN bool xml_text::set(long long rhs)
6612 {
6613 xml_node_struct* dn = _data_new();
6614
6615 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6616 }
6617
6618 PUGI__FN bool xml_text::set(unsigned long long rhs)
6619 {
6620 xml_node_struct* dn = _data_new();
6621
6622 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6623 }
6624#endif
6625
6626 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6627 {
6628 set(rhs);
6629 return *this;
6630 }
6631
6632 PUGI__FN xml_text& xml_text::operator=(int rhs)
6633 {
6634 set(rhs);
6635 return *this;
6636 }
6637
6638 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6639 {
6640 set(rhs);
6641 return *this;
6642 }
6643
6644 PUGI__FN xml_text& xml_text::operator=(long rhs)
6645 {
6646 set(rhs);
6647 return *this;
6648 }
6649
6650 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6651 {
6652 set(rhs);
6653 return *this;
6654 }
6655
6656 PUGI__FN xml_text& xml_text::operator=(double rhs)
6657 {
6658 set(rhs);
6659 return *this;
6660 }
6661
6662 PUGI__FN xml_text& xml_text::operator=(float rhs)
6663 {
6664 set(rhs);
6665 return *this;
6666 }
6667
6668 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6669 {
6670 set(rhs);
6671 return *this;
6672 }
6673
6674#ifdef PUGIXML_HAS_LONG_LONG
6675 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6676 {
6677 set(rhs);
6678 return *this;
6679 }
6680
6681 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6682 {
6683 set(rhs);
6684 return *this;
6685 }
6686#endif
6687
6688 PUGI__FN xml_node xml_text::data() const
6689 {
6690 return xml_node(_data());
6691 }
6692
6693#ifdef __BORLANDC__
6694 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6695 {
6696 return (bool)lhs && rhs;
6697 }
6698
6699 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6700 {
6701 return (bool)lhs || rhs;
6702 }
6703#endif
6704
6705 PUGI__FN xml_node_iterator::xml_node_iterator()
6706 {
6707 }
6708
6709 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6710 {
6711 }
6712
6713 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6714 {
6715 }
6716
6717 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6718 {
6719 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6720 }
6721
6722 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6723 {
6724 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6725 }
6726
6727 PUGI__FN xml_node& xml_node_iterator::operator*() const
6728 {
6729 assert(_wrap._root);
6730 return _wrap;
6731 }
6732
6733 PUGI__FN xml_node* xml_node_iterator::operator->() const
6734 {
6735 assert(_wrap._root);
6736 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6737 }
6738
6739 PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
6740 {
6741 assert(_wrap._root);
6742 _wrap._root = _wrap._root->next_sibling;
6743 return *this;
6744 }
6745
6746 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6747 {
6748 xml_node_iterator temp = *this;
6749 ++*this;
6750 return temp;
6751 }
6752
6753 PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
6754 {
6755 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6756 return *this;
6757 }
6758
6759 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6760 {
6761 xml_node_iterator temp = *this;
6762 --*this;
6763 return temp;
6764 }
6765
6766 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6767 {
6768 }
6769
6770 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6771 {
6772 }
6773
6774 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6775 {
6776 }
6777
6778 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6779 {
6780 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6781 }
6782
6783 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6784 {
6785 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6786 }
6787
6788 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6789 {
6790 assert(_wrap._attr);
6791 return _wrap;
6792 }
6793
6794 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6795 {
6796 assert(_wrap._attr);
6797 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6798 }
6799
6800 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
6801 {
6802 assert(_wrap._attr);
6803 _wrap._attr = _wrap._attr->next_attribute;
6804 return *this;
6805 }
6806
6807 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6808 {
6809 xml_attribute_iterator temp = *this;
6810 ++*this;
6811 return temp;
6812 }
6813
6814 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
6815 {
6816 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6817 return *this;
6818 }
6819
6820 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6821 {
6822 xml_attribute_iterator temp = *this;
6823 --*this;
6824 return temp;
6825 }
6826
6827 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6828 {
6829 }
6830
6831 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6832 {
6833 }
6834
6835 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6836 {
6837 }
6838
6839 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6840 {
6841 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6842 }
6843
6844 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6845 {
6846 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6847 }
6848
6849 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6850 {
6851 assert(_wrap._root);
6852 return _wrap;
6853 }
6854
6855 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6856 {
6857 assert(_wrap._root);
6858 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6859 }
6860
6861 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
6862 {
6863 assert(_wrap._root);
6864 _wrap = _wrap.next_sibling(_name);
6865 return *this;
6866 }
6867
6868 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6869 {
6870 xml_named_node_iterator temp = *this;
6871 ++*this;
6872 return temp;
6873 }
6874
6875 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
6876 {
6877 if (_wrap._root)
6878 _wrap = _wrap.previous_sibling(_name);
6879 else
6880 {
6881 _wrap = _parent.last_child();
6882
6883 if (!impl::strequal(_wrap.name(), _name))
6884 _wrap = _wrap.previous_sibling(_name);
6885 }
6886
6887 return *this;
6888 }
6889
6890 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6891 {
6892 xml_named_node_iterator temp = *this;
6893 --*this;
6894 return temp;
6895 }
6896
6897 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6898 {
6899 }
6900
6901 PUGI__FN xml_parse_result::operator bool() const
6902 {
6903 return status == status_ok;
6904 }
6905
6906 PUGI__FN const char* xml_parse_result::description() const
6907 {
6908 switch (status)
6909 {
6910 case status_ok: return "No error";
6911
6912 case status_file_not_found: return "File was not found";
6913 case status_io_error: return "Error reading from file/stream";
6914 case status_out_of_memory: return "Could not allocate memory";
6915 case status_internal_error: return "Internal error occurred";
6916
6917 case status_unrecognized_tag: return "Could not determine tag type";
6918
6919 case status_bad_pi: return "Error parsing document declaration/processing instruction";
6920 case status_bad_comment: return "Error parsing comment";
6921 case status_bad_cdata: return "Error parsing CDATA section";
6922 case status_bad_doctype: return "Error parsing document type declaration";
6923 case status_bad_pcdata: return "Error parsing PCDATA section";
6924 case status_bad_start_element: return "Error parsing start element tag";
6925 case status_bad_attribute: return "Error parsing element attribute";
6926 case status_bad_end_element: return "Error parsing end element tag";
6927 case status_end_element_mismatch: return "Start-end tags mismatch";
6928
6929 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6930
6931 case status_no_document_element: return "No document element found";
6932
6933 default: return "Unknown error";
6934 }
6935 }
6936
6937 PUGI__FN xml_document::xml_document(): _buffer(0)
6938 {
6939 _create();
6940 }
6941
6942 PUGI__FN xml_document::~xml_document()
6943 {
6944 _destroy();
6945 }
6946
6947#ifdef PUGIXML_HAS_MOVE
6948 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6949 {
6950 _create();
6951 _move(rhs);
6952 }
6953
6954 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6955 {
6956 if (this == &rhs) return *this;
6957
6958 _destroy();
6959 _create();
6960 _move(rhs);
6961
6962 return *this;
6963 }
6964#endif
6965
6966 PUGI__FN void xml_document::reset()
6967 {
6968 _destroy();
6969 _create();
6970 }
6971
6972 PUGI__FN void xml_document::reset(const xml_document& proto)
6973 {
6974 reset();
6975
6976 impl::node_copy_tree(_root, proto._root);
6977 }
6978
6979 PUGI__FN void xml_document::_create()
6980 {
6981 assert(!_root);
6982
6983 #ifdef PUGIXML_COMPACT
6984 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6985 const size_t page_offset = sizeof(void*);
6986 #else
6987 const size_t page_offset = 0;
6988 #endif
6989
6990 // initialize sentinel page
6991 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6992
6993 // prepare page structure
6994 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6995 assert(page);
6996
6997 page->busy_size = impl::xml_memory_page_size;
6998
6999 // setup first page marker
7000 #ifdef PUGIXML_COMPACT
7001 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
7002 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7003 *page->compact_page_marker = sizeof(impl::xml_memory_page);
7004 #endif
7005
7006 // allocate new root
7007 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7008 _root->prev_sibling_c = _root;
7009
7010 // setup sentinel page
7011 page->allocator = static_cast<impl::xml_document_struct*>(_root);
7012
7013 // setup hash table pointer in allocator
7014 #ifdef PUGIXML_COMPACT
7015 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7016 #endif
7017
7018 // verify the document allocation
7019 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7020 }
7021
7022 PUGI__FN void xml_document::_destroy()
7023 {
7024 assert(_root);
7025
7026 // destroy static storage
7027 if (_buffer)
7028 {
7029 impl::xml_memory::deallocate(_buffer);
7030 _buffer = 0;
7031 }
7032
7033 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7034 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7035 {
7036 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7037 }
7038
7039 // destroy dynamic storage, leave sentinel page (it's in static memory)
7040 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
7041 assert(root_page && !root_page->prev);
7042 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7043
7044 for (impl::xml_memory_page* page = root_page->next; page; )
7045 {
7046 impl::xml_memory_page* next = page->next;
7047
7048 impl::xml_allocator::deallocate_page(page);
7049
7050 page = next;
7051 }
7052
7053 #ifdef PUGIXML_COMPACT
7054 // destroy hash table
7055 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7056 #endif
7057
7058 _root = 0;
7059 }
7060
7061#ifdef PUGIXML_HAS_MOVE
7062 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7063 {
7064 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7065 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7066
7067 // save first child pointer for later; this needs hash access
7068 xml_node_struct* other_first_child = other->first_child;
7069
7070 #ifdef PUGIXML_COMPACT
7071 // reserve space for the hash table up front; this is the only operation that can fail
7072 // if it does, we have no choice but to throw (if we have exceptions)
7073 if (other_first_child)
7074 {
7075 size_t other_children = 0;
7076 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7077 other_children++;
7078
7079 // in compact mode, each pointer assignment could result in a hash table request
7080 // during move, we have to relocate document first_child and parents of all children
7081 // normally there's just one child and its parent has a pointerless encoding but
7082 // we assume the worst here
7083 if (!other->_hash->reserve(other_children + 1))
7084 {
7085 #ifdef PUGIXML_NO_EXCEPTIONS
7086 return;
7087 #else
7088 throw std::bad_alloc();
7089 #endif
7090 }
7091 }
7092 #endif
7093
7094 // move allocation state
7095 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
7096 if (other->_root != PUGI__GETPAGE(other))
7097 {
7098 doc->_root = other->_root;
7099 doc->_busy_size = other->_busy_size;
7100 }
7101
7102 // move buffer state
7103 doc->buffer = other->buffer;
7104 doc->extra_buffers = other->extra_buffers;
7105 _buffer = rhs._buffer;
7106
7107 #ifdef PUGIXML_COMPACT
7108 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7109 doc->hash = other->hash;
7110 doc->_hash = &doc->hash;
7111
7112 // make sure we don't access other hash up until the end when we reinitialize other document
7113 other->_hash = 0;
7114 #endif
7115
7116 // move page structure
7117 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7118 assert(doc_page && !doc_page->prev && !doc_page->next);
7119
7120 impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7121 assert(other_page && !other_page->prev);
7122
7123 // relink pages since root page is embedded into xml_document
7124 if (impl::xml_memory_page* page = other_page->next)
7125 {
7126 assert(page->prev == other_page);
7127
7128 page->prev = doc_page;
7129
7130 doc_page->next = page;
7131 other_page->next = 0;
7132 }
7133
7134 // make sure pages point to the correct document state
7135 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7136 {
7137 assert(page->allocator == other);
7138
7139 page->allocator = doc;
7140
7141 #ifdef PUGIXML_COMPACT
7142 // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7143 if (page->compact_shared_parent == other)
7144 page->compact_shared_parent = doc;
7145 #endif
7146 }
7147
7148 // move tree structure
7149 assert(!doc->first_child);
7150
7151 doc->first_child = other_first_child;
7152
7153 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7154 {
7155 #ifdef PUGIXML_COMPACT
7156 // most children will have migrated when we reassigned compact_shared_parent
7157 assert(node->parent == other || node->parent == doc);
7158
7159 node->parent = doc;
7160 #else
7161 assert(node->parent == other);
7162 node->parent = doc;
7163 #endif
7164 }
7165
7166 // reset other document
7167 new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7168 rhs._buffer = 0;
7169 }
7170#endif
7171
7172#ifndef PUGIXML_NO_STL
7173 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7174 {
7175 reset();
7176
7177 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7178 }
7179
7180 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7181 {
7182 reset();
7183
7184 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7185 }
7186#endif
7187
7188 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7189 {
7190 // Force native encoding (skip autodetection)
7191 #ifdef PUGIXML_WCHAR_MODE
7192 xml_encoding encoding = encoding_wchar;
7193 #else
7194 xml_encoding encoding = encoding_utf8;
7195 #endif
7196
7197 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7198 }
7199
7200 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7201 {
7202 return load_string(contents, options);
7203 }
7204
7205 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7206 {
7207 reset();
7208
7209 using impl::auto_deleter; // MSVC7 workaround
7210 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7211
7212 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7213 }
7214
7215 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7216 {
7217 reset();
7218
7219 using impl::auto_deleter; // MSVC7 workaround
7220 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7221
7222 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7223 }
7224
7225 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7226 {
7227 reset();
7228
7229 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7230 }
7231
7232 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7233 {
7234 reset();
7235
7236 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7237 }
7238
7239 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7240 {
7241 reset();
7242
7243 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7244 }
7245
7246 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7247 {
7248 impl::xml_buffered_writer buffered_writer(writer, encoding);
7249
7250 if ((flags & format_write_bom) && encoding != encoding_latin1)
7251 {
7252 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7253 #ifdef PUGIXML_WCHAR_MODE
7254 unsigned int bom = 0xfeff;
7255 buffered_writer.write(static_cast<wchar_t>(bom));
7256 #else
7257 buffered_writer.write('\xef', '\xbb', '\xbf');
7258 #endif
7259 }
7260
7261 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7262 {
7263 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7264 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7265 buffered_writer.write('?', '>');
7266 if (!(flags & format_raw)) buffered_writer.write('\n');
7267 }
7268
7269 impl::node_output(buffered_writer, _root, indent, flags, 0);
7270
7271 buffered_writer.flush();
7272 }
7273
7274#ifndef PUGIXML_NO_STL
7275 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7276 {
7277 xml_writer_stream writer(stream);
7278
7279 save(writer, indent, flags, encoding);
7280 }
7281
7282 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7283 {
7284 xml_writer_stream writer(stream);
7285
7286 save(writer, indent, flags, encoding_wchar);
7287 }
7288#endif
7289
7290 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7291 {
7292 using impl::auto_deleter; // MSVC7 workaround
7293 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7294
7295 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7296 }
7297
7298 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7299 {
7300 using impl::auto_deleter; // MSVC7 workaround
7301 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7302
7303 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7304 }
7305
7306 PUGI__FN xml_node xml_document::document_element() const
7307 {
7308 assert(_root);
7309
7310 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7311 if (PUGI__NODETYPE(i) == node_element)
7312 return xml_node(i);
7313
7314 return xml_node();
7315 }
7316
7317#ifndef PUGIXML_NO_STL
7318 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7319 {
7320 assert(str);
7321
7322 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7323 }
7324
7325 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7326 {
7327 return impl::as_utf8_impl(str.c_str(), str.size());
7328 }
7329
7330 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7331 {
7332 assert(str);
7333
7334 return impl::as_wide_impl(str, strlen(str));
7335 }
7336
7337 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7338 {
7339 return impl::as_wide_impl(str.c_str(), str.size());
7340 }
7341#endif
7342
7343 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7344 {
7345 impl::xml_memory::allocate = allocate;
7346 impl::xml_memory::deallocate = deallocate;
7347 }
7348
7349 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7350 {
7351 return impl::xml_memory::allocate;
7352 }
7353
7354 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7355 {
7356 return impl::xml_memory::deallocate;
7357 }
7358}
7359
7360#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7361namespace std
7362{
7363 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7364 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7365 {
7366 return std::bidirectional_iterator_tag();
7367 }
7368
7369 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7370 {
7371 return std::bidirectional_iterator_tag();
7372 }
7373
7374 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7375 {
7376 return std::bidirectional_iterator_tag();
7377 }
7378}
7379#endif
7380
7381#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7382namespace std
7383{
7384 // Workarounds for (non-standard) iterator category detection
7385 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7386 {
7387 return std::bidirectional_iterator_tag();
7388 }
7389
7390 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7391 {
7392 return std::bidirectional_iterator_tag();
7393 }
7394
7395 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7396 {
7397 return std::bidirectional_iterator_tag();
7398 }
7399}
7400#endif
7401
7402#ifndef PUGIXML_NO_XPATH
7403// STL replacements
7404PUGI__NS_BEGIN
7405 struct equal_to
7406 {
7407 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7408 {
7409 return lhs == rhs;
7410 }
7411 };
7412
7413 struct not_equal_to
7414 {
7415 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7416 {
7417 return lhs != rhs;
7418 }
7419 };
7420
7421 struct less
7422 {
7423 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7424 {
7425 return lhs < rhs;
7426 }
7427 };
7428
7429 struct less_equal
7430 {
7431 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7432 {
7433 return lhs <= rhs;
7434 }
7435 };
7436
7437 template <typename T> inline void swap(T& lhs, T& rhs)
7438 {
7439 T temp = lhs;
7440 lhs = rhs;
7441 rhs = temp;
7442 }
7443
7444 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
7445 {
7446 I result = begin;
7447
7448 for (I it = begin + 1; it != end; ++it)
7449 if (pred(*it, *result))
7450 result = it;
7451
7452 return result;
7453 }
7454
7455 template <typename I> PUGI__FN void reverse(I begin, I end)
7456 {
7457 while (end - begin > 1)
7458 swap(*begin++, *--end);
7459 }
7460
7461 template <typename I> PUGI__FN I unique(I begin, I end)
7462 {
7463 // fast skip head
7464 while (end - begin > 1 && *begin != *(begin + 1))
7465 begin++;
7466
7467 if (begin == end)
7468 return begin;
7469
7470 // last written element
7471 I write = begin++;
7472
7473 // merge unique elements
7474 while (begin != end)
7475 {
7476 if (*begin != *write)
7477 *++write = *begin++;
7478 else
7479 begin++;
7480 }
7481
7482 // past-the-end (write points to live element)
7483 return write + 1;
7484 }
7485
7486 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
7487 {
7488 if (begin == end)
7489 return;
7490
7491 for (T* it = begin + 1; it != end; ++it)
7492 {
7493 T val = *it;
7494 T* hole = it;
7495
7496 // move hole backwards
7497 while (hole > begin && pred(val, *(hole - 1)))
7498 {
7499 *hole = *(hole - 1);
7500 hole--;
7501 }
7502
7503 // fill hole with element
7504 *hole = val;
7505 }
7506 }
7507
7508 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7509 {
7510 if (pred(*middle, *first))
7511 swap(middle, first);
7512 if (pred(*last, *middle))
7513 swap(last, middle);
7514 if (pred(*middle, *first))
7515 swap(middle, first);
7516
7517 return middle;
7518 }
7519
7520 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7521 {
7522 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7523 T* eq = begin;
7524 T* lt = begin;
7525 T* gt = end;
7526
7527 while (lt < gt)
7528 {
7529 if (pred(*lt, pivot))
7530 lt++;
7531 else if (*lt == pivot)
7532 swap(*eq++, *lt++);
7533 else
7534 swap(*lt, *--gt);
7535 }
7536
7537 // we now have just 4 groups: = < >; move equal elements to the middle
7538 T* eqbeg = gt;
7539
7540 for (T* it = begin; it != eq; ++it)
7541 swap(*it, *--eqbeg);
7542
7543 *out_eqbeg = eqbeg;
7544 *out_eqend = gt;
7545 }
7546
7547 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
7548 {
7549 // sort large chunks
7550 while (end - begin > 16)
7551 {
7552 // find median element
7553 I middle = begin + (end - begin) / 2;
7554 I median = median3(begin, middle, end - 1, pred);
7555
7556 // partition in three chunks (< = >)
7557 I eqbeg, eqend;
7558 partition3(begin, end, *median, pred, &eqbeg, &eqend);
7559
7560 // loop on larger half
7561 if (eqbeg - begin > end - eqend)
7562 {
7563 sort(eqend, end, pred);
7564 end = eqbeg;
7565 }
7566 else
7567 {
7568 sort(begin, eqbeg, pred);
7569 begin = eqend;
7570 }
7571 }
7572
7573 // insertion sort small chunk
7574 insertion_sort(begin, end, pred);
7575 }
7576
7577 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
7578 {
7579 assert(key);
7580
7581 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7582
7583 // MurmurHash3 32-bit finalizer
7584 h ^= h >> 16;
7585 h *= 0x85ebca6bu;
7586 h ^= h >> 13;
7587 h *= 0xc2b2ae35u;
7588 h ^= h >> 16;
7589
7590 size_t hashmod = size - 1;
7591 size_t bucket = h & hashmod;
7592
7593 for (size_t probe = 0; probe <= hashmod; ++probe)
7594 {
7595 if (table[bucket] == 0)
7596 {
7597 table[bucket] = key;
7598 return true;
7599 }
7600
7601 if (table[bucket] == key)
7602 return false;
7603
7604 // hash collision, quadratic probing
7605 bucket = (bucket + probe + 1) & hashmod;
7606 }
7607
7608 assert(false && "Hash table is full"); // unreachable
7609 return false;
7610 }
7611PUGI__NS_END
7612
7613// Allocator used for AST and evaluation stacks
7614PUGI__NS_BEGIN
7615 static const size_t xpath_memory_page_size =
7616 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7617 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7618 #else
7619 4096
7620 #endif
7621 ;
7622
7623 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7624
7625 struct xpath_memory_block
7626 {
7627 xpath_memory_block* next;
7628 size_t capacity;
7629
7630 union
7631 {
7632 char data[xpath_memory_page_size];
7633 double alignment;
7634 };
7635 };
7636
7637 struct xpath_allocator
7638 {
7639 xpath_memory_block* _root;
7640 size_t _root_size;
7641 bool* _error;
7642
7643 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7644 {
7645 }
7646
7647 void* allocate(size_t size)
7648 {
7649 // round size up to block alignment boundary
7650 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7651
7652 if (_root_size + size <= _root->capacity)
7653 {
7654 void* buf = &_root->data[0] + _root_size;
7655 _root_size += size;
7656 return buf;
7657 }
7658 else
7659 {
7660 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7661 size_t block_capacity_base = sizeof(_root->data);
7662 size_t block_capacity_req = size + block_capacity_base / 4;
7663 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7664
7665 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7666
7667 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7668 if (!block)
7669 {
7670 if (_error) *_error = true;
7671 return 0;
7672 }
7673
7674 block->next = _root;
7675 block->capacity = block_capacity;
7676
7677 _root = block;
7678 _root_size = size;
7679
7680 return block->data;
7681 }
7682 }
7683
7684 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7685 {
7686 // round size up to block alignment boundary
7687 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7688 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7689
7690 // we can only reallocate the last object
7691 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7692
7693 // try to reallocate the object inplace
7694 if (ptr && _root_size - old_size + new_size <= _root->capacity)
7695 {
7696 _root_size = _root_size - old_size + new_size;
7697 return ptr;
7698 }
7699
7700 // allocate a new block
7701 void* result = allocate(new_size);
7702 if (!result) return 0;
7703
7704 // we have a new block
7705 if (ptr)
7706 {
7707 // copy old data (we only support growing)
7708 assert(new_size >= old_size);
7709 memcpy(result, ptr, old_size);
7710
7711 // free the previous page if it had no other objects
7712 assert(_root->data == result);
7713 assert(_root->next);
7714
7715 if (_root->next->data == ptr)
7716 {
7717 // deallocate the whole page, unless it was the first one
7718 xpath_memory_block* next = _root->next->next;
7719
7720 if (next)
7721 {
7722 xml_memory::deallocate(_root->next);
7723 _root->next = next;
7724 }
7725 }
7726 }
7727
7728 return result;
7729 }
7730
7731 void revert(const xpath_allocator& state)
7732 {
7733 // free all new pages
7734 xpath_memory_block* cur = _root;
7735
7736 while (cur != state._root)
7737 {
7738 xpath_memory_block* next = cur->next;
7739
7740 xml_memory::deallocate(cur);
7741
7742 cur = next;
7743 }
7744
7745 // restore state
7746 _root = state._root;
7747 _root_size = state._root_size;
7748 }
7749
7750 void release()
7751 {
7752 xpath_memory_block* cur = _root;
7753 assert(cur);
7754
7755 while (cur->next)
7756 {
7757 xpath_memory_block* next = cur->next;
7758
7759 xml_memory::deallocate(cur);
7760
7761 cur = next;
7762 }
7763 }
7764 };
7765
7766 struct xpath_allocator_capture
7767 {
7768 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7769 {
7770 }
7771
7772 ~xpath_allocator_capture()
7773 {
7774 _target->revert(_state);
7775 }
7776
7777 xpath_allocator* _target;
7778 xpath_allocator _state;
7779 };
7780
7781 struct xpath_stack
7782 {
7783 xpath_allocator* result;
7784 xpath_allocator* temp;
7785 };
7786
7787 struct xpath_stack_data
7788 {
7789 xpath_memory_block blocks[2];
7790 xpath_allocator result;
7791 xpath_allocator temp;
7792 xpath_stack stack;
7793 bool oom;
7794
7795 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7796 {
7797 blocks[0].next = blocks[1].next = 0;
7798 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7799
7800 stack.result = &result;
7801 stack.temp = &temp;
7802 }
7803
7804 ~xpath_stack_data()
7805 {
7806 result.release();
7807 temp.release();
7808 }
7809 };
7810PUGI__NS_END
7811
7812// String class
7813PUGI__NS_BEGIN
7814 class xpath_string
7815 {
7816 const char_t* _buffer;
7817 bool _uses_heap;
7818 size_t _length_heap;
7819
7820 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7821 {
7822 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7823 if (!result) return 0;
7824
7825 memcpy(result, string, length * sizeof(char_t));
7826 result[length] = 0;
7827
7828 return result;
7829 }
7830
7831 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7832 {
7833 }
7834
7835 public:
7836 static xpath_string from_const(const char_t* str)
7837 {
7838 return xpath_string(str, false, 0);
7839 }
7840
7841 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7842 {
7843 assert(begin <= end && *end == 0);
7844
7845 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7846 }
7847
7848 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7849 {
7850 assert(begin <= end);
7851
7852 if (begin == end)
7853 return xpath_string();
7854
7855 size_t length = static_cast<size_t>(end - begin);
7856 const char_t* data = duplicate_string(begin, length, alloc);
7857
7858 return data ? xpath_string(data, true, length) : xpath_string();
7859 }
7860
7861 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7862 {
7863 }
7864
7865 void append(const xpath_string& o, xpath_allocator* alloc)
7866 {
7867 // skip empty sources
7868 if (!*o._buffer) return;
7869
7870 // fast append for constant empty target and constant source
7871 if (!*_buffer && !_uses_heap && !o._uses_heap)
7872 {
7873 _buffer = o._buffer;
7874 }
7875 else
7876 {
7877 // need to make heap copy
7878 size_t target_length = length();
7879 size_t source_length = o.length();
7880 size_t result_length = target_length + source_length;
7881
7882 // allocate new buffer
7883 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7884 if (!result) return;
7885
7886 // append first string to the new buffer in case there was no reallocation
7887 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7888
7889 // append second string to the new buffer
7890 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7891 result[result_length] = 0;
7892
7893 // finalize
7894 _buffer = result;
7895 _uses_heap = true;
7896 _length_heap = result_length;
7897 }
7898 }
7899
7900 const char_t* c_str() const
7901 {
7902 return _buffer;
7903 }
7904
7905 size_t length() const
7906 {
7907 return _uses_heap ? _length_heap : strlength(_buffer);
7908 }
7909
7910 char_t* data(xpath_allocator* alloc)
7911 {
7912 // make private heap copy
7913 if (!_uses_heap)
7914 {
7915 size_t length_ = strlength(_buffer);
7916 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7917
7918 if (!data_) return 0;
7919
7920 _buffer = data_;
7921 _uses_heap = true;
7922 _length_heap = length_;
7923 }
7924
7925 return const_cast<char_t*>(_buffer);
7926 }
7927
7928 bool empty() const
7929 {
7930 return *_buffer == 0;
7931 }
7932
7933 bool operator==(const xpath_string& o) const
7934 {
7935 return strequal(_buffer, o._buffer);
7936 }
7937
7938 bool operator!=(const xpath_string& o) const
7939 {
7940 return !strequal(_buffer, o._buffer);
7941 }
7942
7943 bool uses_heap() const
7944 {
7945 return _uses_heap;
7946 }
7947 };
7948PUGI__NS_END
7949
7950PUGI__NS_BEGIN
7951 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7952 {
7953 while (*pattern && *string == *pattern)
7954 {
7955 string++;
7956 pattern++;
7957 }
7958
7959 return *pattern == 0;
7960 }
7961
7962 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7963 {
7964 #ifdef PUGIXML_WCHAR_MODE
7965 return wcschr(s, c);
7966 #else
7967 return strchr(s, c);
7968 #endif
7969 }
7970
7971 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7972 {
7973 #ifdef PUGIXML_WCHAR_MODE
7974 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7975 return (*p == 0) ? s : wcsstr(s, p);
7976 #else
7977 return strstr(s, p);
7978 #endif
7979 }
7980
7981 // Converts symbol to lower case, if it is an ASCII one
7982 PUGI__FN char_t tolower_ascii(char_t ch)
7983 {
7984 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7985 }
7986
7987 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7988 {
7989 if (na.attribute())
7990 return xpath_string::from_const(na.attribute().value());
7991 else
7992 {
7993 xml_node n = na.node();
7994
7995 switch (n.type())
7996 {
7997 case node_pcdata:
7998 case node_cdata:
7999 case node_comment:
8000 case node_pi:
8001 return xpath_string::from_const(n.value());
8002
8003 case node_document:
8004 case node_element:
8005 {
8006 xpath_string result;
8007
8008 // element nodes can have value if parse_embed_pcdata was used
8009 if (n.value()[0])
8010 result.append(xpath_string::from_const(n.value()), alloc);
8011
8012 xml_node cur = n.first_child();
8013
8014 while (cur && cur != n)
8015 {
8016 if (cur.type() == node_pcdata || cur.type() == node_cdata)
8017 result.append(xpath_string::from_const(cur.value()), alloc);
8018
8019 if (cur.first_child())
8020 cur = cur.first_child();
8021 else if (cur.next_sibling())
8022 cur = cur.next_sibling();
8023 else
8024 {
8025 while (!cur.next_sibling() && cur != n)
8026 cur = cur.parent();
8027
8028 if (cur != n) cur = cur.next_sibling();
8029 }
8030 }
8031
8032 return result;
8033 }
8034
8035 default:
8036 return xpath_string();
8037 }
8038 }
8039 }
8040
8041 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8042 {
8043 assert(ln->parent == rn->parent);
8044
8045 // there is no common ancestor (the shared parent is null), nodes are from different documents
8046 if (!ln->parent) return ln < rn;
8047
8048 // determine sibling order
8049 xml_node_struct* ls = ln;
8050 xml_node_struct* rs = rn;
8051
8052 while (ls && rs)
8053 {
8054 if (ls == rn) return true;
8055 if (rs == ln) return false;
8056
8057 ls = ls->next_sibling;
8058 rs = rs->next_sibling;
8059 }
8060
8061 // if rn sibling chain ended ln must be before rn
8062 return !rs;
8063 }
8064
8065 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8066 {
8067 // find common ancestor at the same depth, if any
8068 xml_node_struct* lp = ln;
8069 xml_node_struct* rp = rn;
8070
8071 while (lp && rp && lp->parent != rp->parent)
8072 {
8073 lp = lp->parent;
8074 rp = rp->parent;
8075 }
8076
8077 // parents are the same!
8078 if (lp && rp) return node_is_before_sibling(lp, rp);
8079
8080 // nodes are at different depths, need to normalize heights
8081 bool left_higher = !lp;
8082
8083 while (lp)
8084 {
8085 lp = lp->parent;
8086 ln = ln->parent;
8087 }
8088
8089 while (rp)
8090 {
8091 rp = rp->parent;
8092 rn = rn->parent;
8093 }
8094
8095 // one node is the ancestor of the other
8096 if (ln == rn) return left_higher;
8097
8098 // find common ancestor... again
8099 while (ln->parent != rn->parent)
8100 {
8101 ln = ln->parent;
8102 rn = rn->parent;
8103 }
8104
8105 return node_is_before_sibling(ln, rn);
8106 }
8107
8108 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8109 {
8110 while (node && node != parent) node = node->parent;
8111
8112 return parent && node == parent;
8113 }
8114
8115 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
8116 {
8117 xml_node_struct* node = xnode.node().internal_object();
8118
8119 if (node)
8120 {
8121 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8122 {
8123 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8124 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8125 }
8126
8127 return 0;
8128 }
8129
8130 xml_attribute_struct* attr = xnode.attribute().internal_object();
8131
8132 if (attr)
8133 {
8134 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8135 {
8136 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8137 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8138 }
8139
8140 return 0;
8141 }
8142
8143 return 0;
8144 }
8145
8146 struct document_order_comparator
8147 {
8148 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8149 {
8150 // optimized document order based check
8151 const void* lo = document_buffer_order(lhs);
8152 const void* ro = document_buffer_order(rhs);
8153
8154 if (lo && ro) return lo < ro;
8155
8156 // slow comparison
8157 xml_node ln = lhs.node(), rn = rhs.node();
8158
8159 // compare attributes
8160 if (lhs.attribute() && rhs.attribute())
8161 {
8162 // shared parent
8163 if (lhs.parent() == rhs.parent())
8164 {
8165 // determine sibling order
8166 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8167 if (a == rhs.attribute())
8168 return true;
8169
8170 return false;
8171 }
8172
8173 // compare attribute parents
8174 ln = lhs.parent();
8175 rn = rhs.parent();
8176 }
8177 else if (lhs.attribute())
8178 {
8179 // attributes go after the parent element
8180 if (lhs.parent() == rhs.node()) return false;
8181
8182 ln = lhs.parent();
8183 }
8184 else if (rhs.attribute())
8185 {
8186 // attributes go after the parent element
8187 if (rhs.parent() == lhs.node()) return true;
8188
8189 rn = rhs.parent();
8190 }
8191
8192 if (ln == rn) return false;
8193
8194 if (!ln || !rn) return ln < rn;
8195
8196 return node_is_before(ln.internal_object(), rn.internal_object());
8197 }
8198 };
8199
8200 PUGI__FN double gen_nan()
8201 {
8202 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8203 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8204 typedef uint32_t UI; // BCC5 workaround
8205 union { float f; UI i; } u;
8206 u.i = 0x7fc00000;
8207 return double(u.f);
8208 #else
8209 // fallback
8210 const volatile double zero = 0.0;
8211 return zero / zero;
8212 #endif
8213 }
8214
8215 PUGI__FN bool is_nan(double value)
8216 {
8217 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8218 return !!_isnan(value);
8219 #elif defined(fpclassify) && defined(FP_NAN)
8220 return fpclassify(value) == FP_NAN;
8221 #else
8222 // fallback
8223 const volatile double v = value;
8224 return v != v;
8225 #endif
8226 }
8227
8228 PUGI__FN const char_t* convert_number_to_string_special(double value)
8229 {
8230 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8231 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8232 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8233 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8234 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8235 switch (fpclassify(value))
8236 {
8237 case FP_NAN:
8238 return PUGIXML_TEXT("NaN");
8239
8240 case FP_INFINITE:
8241 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8242
8243 case FP_ZERO:
8244 return PUGIXML_TEXT("0");
8245
8246 default:
8247 return 0;
8248 }
8249 #else
8250 // fallback
8251 const volatile double v = value;
8252
8253 if (v == 0) return PUGIXML_TEXT("0");
8254 if (v != v) return PUGIXML_TEXT("NaN");
8255 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8256 return 0;
8257 #endif
8258 }
8259
8260 PUGI__FN bool convert_number_to_boolean(double value)
8261 {
8262 return (value != 0 && !is_nan(value));
8263 }
8264
8265 PUGI__FN void truncate_zeros(char* begin, char* end)
8266 {
8267 while (begin != end && end[-1] == '0') end--;
8268
8269 *end = 0;
8270 }
8271
8272 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8273#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
8274 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8275 {
8276 // get base values
8277 int sign, exponent;
8278 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8279
8280 // truncate redundant zeros
8281 truncate_zeros(buffer, buffer + strlen(buffer));
8282
8283 // fill results
8284 *out_mantissa = buffer;
8285 *out_exponent = exponent;
8286 }
8287#else
8288 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8289 {
8290 // get a scientific notation value with IEEE DBL_DIG decimals
8291 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8292
8293 // get the exponent (possibly negative)
8294 char* exponent_string = strchr(buffer, 'e');
8295 assert(exponent_string);
8296
8297 int exponent = atoi(exponent_string + 1);
8298
8299 // extract mantissa string: skip sign
8300 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8301 assert(mantissa[0] != '0' && mantissa[1] == '.');
8302
8303 // divide mantissa by 10 to eliminate integer part
8304 mantissa[1] = mantissa[0];
8305 mantissa++;
8306 exponent++;
8307
8308 // remove extra mantissa digits and zero-terminate mantissa
8309 truncate_zeros(mantissa, exponent_string);
8310
8311 // fill results
8312 *out_mantissa = mantissa;
8313 *out_exponent = exponent;
8314 }
8315#endif
8316
8317 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8318 {
8319 // try special number conversion
8320 const char_t* special = convert_number_to_string_special(value);
8321 if (special) return xpath_string::from_const(special);
8322
8323 // get mantissa + exponent form
8324 char mantissa_buffer[32];
8325
8326 char* mantissa;
8327 int exponent;
8328 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8329
8330 // allocate a buffer of suitable length for the number
8331 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8332 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8333 if (!result) return xpath_string();
8334
8335 // make the number!
8336 char_t* s = result;
8337
8338 // sign
8339 if (value < 0) *s++ = '-';
8340
8341 // integer part
8342 if (exponent <= 0)
8343 {
8344 *s++ = '0';
8345 }
8346 else
8347 {
8348 while (exponent > 0)
8349 {
8350 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8351 *s++ = *mantissa ? *mantissa++ : '0';
8352 exponent--;
8353 }
8354 }
8355
8356 // fractional part
8357 if (*mantissa)
8358 {
8359 // decimal point
8360 *s++ = '.';
8361
8362 // extra zeroes from negative exponent
8363 while (exponent < 0)
8364 {
8365 *s++ = '0';
8366 exponent++;
8367 }
8368
8369 // extra mantissa digits
8370 while (*mantissa)
8371 {
8372 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8373 *s++ = *mantissa++;
8374 }
8375 }
8376
8377 // zero-terminate
8378 assert(s < result + result_size);
8379 *s = 0;
8380
8381 return xpath_string::from_heap_preallocated(result, s);
8382 }
8383
8384 PUGI__FN bool check_string_to_number_format(const char_t* string)
8385 {
8386 // parse leading whitespace
8387 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8388
8389 // parse sign
8390 if (*string == '-') ++string;
8391
8392 if (!*string) return false;
8393
8394 // if there is no integer part, there should be a decimal part with at least one digit
8395 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8396
8397 // parse integer part
8398 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8399
8400 // parse decimal part
8401 if (*string == '.')
8402 {
8403 ++string;
8404
8405 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8406 }
8407
8408 // parse trailing whitespace
8409 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8410
8411 return *string == 0;
8412 }
8413
8414 PUGI__FN double convert_string_to_number(const char_t* string)
8415 {
8416 // check string format
8417 if (!check_string_to_number_format(string)) return gen_nan();
8418
8419 // parse string
8420 #ifdef PUGIXML_WCHAR_MODE
8421 return wcstod(string, 0);
8422 #else
8423 return strtod(string, 0);
8424 #endif
8425 }
8426
8427 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8428 {
8429 size_t length = static_cast<size_t>(end - begin);
8430 char_t* scratch = buffer;
8431
8432 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8433 {
8434 // need to make dummy on-heap copy
8435 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8436 if (!scratch) return false;
8437 }
8438
8439 // copy string to zero-terminated buffer and perform conversion
8440 memcpy(scratch, begin, length * sizeof(char_t));
8441 scratch[length] = 0;
8442
8443 *out_result = convert_string_to_number(scratch);
8444
8445 // free dummy buffer
8446 if (scratch != buffer) xml_memory::deallocate(scratch);
8447
8448 return true;
8449 }
8450
8451 PUGI__FN double round_nearest(double value)
8452 {
8453 return floor(value + 0.5);
8454 }
8455
8456 PUGI__FN double round_nearest_nzero(double value)
8457 {
8458 // same as round_nearest, but returns -0 for [-0.5, -0]
8459 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8460 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8461 }
8462
8463 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8464 {
8465 return node.attribute() ? node.attribute().name() : node.node().name();
8466 }
8467
8468 PUGI__FN const char_t* local_name(const xpath_node& node)
8469 {
8470 const char_t* name = qualified_name(node);
8471 const char_t* p = find_char(name, ':');
8472
8473 return p ? p + 1 : name;
8474 }
8475
8476 struct namespace_uri_predicate
8477 {
8478 const char_t* prefix;
8479 size_t prefix_length;
8480
8481 namespace_uri_predicate(const char_t* name)
8482 {
8483 const char_t* pos = find_char(name, ':');
8484
8485 prefix = pos ? name : 0;
8486 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8487 }
8488
8489 bool operator()(xml_attribute a) const
8490 {
8491 const char_t* name = a.name();
8492
8493 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8494
8495 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8496 }
8497 };
8498
8499 PUGI__FN const char_t* namespace_uri(xml_node node)
8500 {
8501 namespace_uri_predicate pred = node.name();
8502
8503 xml_node p = node;
8504
8505 while (p)
8506 {
8507 xml_attribute a = p.find_attribute(pred);
8508
8509 if (a) return a.value();
8510
8511 p = p.parent();
8512 }
8513
8514 return PUGIXML_TEXT("");
8515 }
8516
8517 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8518 {
8519 namespace_uri_predicate pred = attr.name();
8520
8521 // Default namespace does not apply to attributes
8522 if (!pred.prefix) return PUGIXML_TEXT("");
8523
8524 xml_node p = parent;
8525
8526 while (p)
8527 {
8528 xml_attribute a = p.find_attribute(pred);
8529
8530 if (a) return a.value();
8531
8532 p = p.parent();
8533 }
8534
8535 return PUGIXML_TEXT("");
8536 }
8537
8538 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8539 {
8540 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8541 }
8542
8543 PUGI__FN char_t* normalize_space(char_t* buffer)
8544 {
8545 char_t* write = buffer;
8546
8547 for (char_t* it = buffer; *it; )
8548 {
8549 char_t ch = *it++;
8550
8551 if (PUGI__IS_CHARTYPE(ch, ct_space))
8552 {
8553 // replace whitespace sequence with single space
8554 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8555
8556 // avoid leading spaces
8557 if (write != buffer) *write++ = ' ';
8558 }
8559 else *write++ = ch;
8560 }
8561
8562 // remove trailing space
8563 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8564
8565 // zero-terminate
8566 *write = 0;
8567
8568 return write;
8569 }
8570
8571 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8572 {
8573 char_t* write = buffer;
8574
8575 while (*buffer)
8576 {
8577 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8578
8579 const char_t* pos = find_char(from, ch);
8580
8581 if (!pos)
8582 *write++ = ch; // do not process
8583 else if (static_cast<size_t>(pos - from) < to_length)
8584 *write++ = to[pos - from]; // replace
8585 }
8586
8587 // zero-terminate
8588 *write = 0;
8589
8590 return write;
8591 }
8592
8593 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8594 {
8595 unsigned char table[128] = {0};
8596
8597 while (*from)
8598 {
8599 unsigned int fc = static_cast<unsigned int>(*from);
8600 unsigned int tc = static_cast<unsigned int>(*to);
8601
8602 if (fc >= 128 || tc >= 128)
8603 return 0;
8604
8605 // code=128 means "skip character"
8606 if (!table[fc])
8607 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8608
8609 from++;
8610 if (tc) to++;
8611 }
8612
8613 for (int i = 0; i < 128; ++i)
8614 if (!table[i])
8615 table[i] = static_cast<unsigned char>(i);
8616
8617 void* result = alloc->allocate(sizeof(table));
8618 if (!result) return 0;
8619
8620 memcpy(result, table, sizeof(table));
8621
8622 return static_cast<unsigned char*>(result);
8623 }
8624
8625 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8626 {
8627 char_t* write = buffer;
8628
8629 while (*buffer)
8630 {
8631 char_t ch = *buffer++;
8632 unsigned int index = static_cast<unsigned int>(ch);
8633
8634 if (index < 128)
8635 {
8636 unsigned char code = table[index];
8637
8638 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8639 // this code skips these characters without extra branches
8640 *write = static_cast<char_t>(code);
8641 write += 1 - (code >> 7);
8642 }
8643 else
8644 {
8645 *write++ = ch;
8646 }
8647 }
8648
8649 // zero-terminate
8650 *write = 0;
8651
8652 return write;
8653 }
8654
8655 inline bool is_xpath_attribute(const char_t* name)
8656 {
8657 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8658 }
8659
8660 struct xpath_variable_boolean: xpath_variable
8661 {
8662 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8663 {
8664 }
8665
8666 bool value;
8667 char_t name[1];
8668 };
8669
8670 struct xpath_variable_number: xpath_variable
8671 {
8672 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8673 {
8674 }
8675
8676 double value;
8677 char_t name[1];
8678 };
8679
8680 struct xpath_variable_string: xpath_variable
8681 {
8682 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8683 {
8684 }
8685
8686 ~xpath_variable_string()
8687 {
8688 if (value) xml_memory::deallocate(value);
8689 }
8690
8691 char_t* value;
8692 char_t name[1];
8693 };
8694
8695 struct xpath_variable_node_set: xpath_variable
8696 {
8697 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8698 {
8699 }
8700
8701 xpath_node_set value;
8702 char_t name[1];
8703 };
8704
8705 static const xpath_node_set dummy_node_set;
8706
8707 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8708 {
8709 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8710 unsigned int result = 0;
8711
8712 while (*str)
8713 {
8714 result += static_cast<unsigned int>(*str++);
8715 result += result << 10;
8716 result ^= result >> 6;
8717 }
8718
8719 result += result << 3;
8720 result ^= result >> 11;
8721 result += result << 15;
8722
8723 return result;
8724 }
8725
8726 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8727 {
8728 size_t length = strlength(name);
8729 if (length == 0) return 0; // empty variable names are invalid
8730
8731 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8732 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8733 if (!memory) return 0;
8734
8735 T* result = new (memory) T();
8736
8737 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8738
8739 return result;
8740 }
8741
8742 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8743 {
8744 switch (type)
8745 {
8746 case xpath_type_node_set:
8747 return new_xpath_variable<xpath_variable_node_set>(name);
8748
8749 case xpath_type_number:
8750 return new_xpath_variable<xpath_variable_number>(name);
8751
8752 case xpath_type_string:
8753 return new_xpath_variable<xpath_variable_string>(name);
8754
8755 case xpath_type_boolean:
8756 return new_xpath_variable<xpath_variable_boolean>(name);
8757
8758 default:
8759 return 0;
8760 }
8761 }
8762
8763 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8764 {
8765 var->~T();
8766 xml_memory::deallocate(var);
8767 }
8768
8769 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8770 {
8771 switch (type)
8772 {
8773 case xpath_type_node_set:
8774 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8775 break;
8776
8777 case xpath_type_number:
8778 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8779 break;
8780
8781 case xpath_type_string:
8782 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8783 break;
8784
8785 case xpath_type_boolean:
8786 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8787 break;
8788
8789 default:
8790 assert(false && "Invalid variable type"); // unreachable
8791 }
8792 }
8793
8794 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8795 {
8796 switch (rhs->type())
8797 {
8798 case xpath_type_node_set:
8799 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8800
8801 case xpath_type_number:
8802 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8803
8804 case xpath_type_string:
8805 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8806
8807 case xpath_type_boolean:
8808 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8809
8810 default:
8811 assert(false && "Invalid variable type"); // unreachable
8812 return false;
8813 }
8814 }
8815
8816 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8817 {
8818 size_t length = static_cast<size_t>(end - begin);
8819 char_t* scratch = buffer;
8820
8821 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8822 {
8823 // need to make dummy on-heap copy
8824 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8825 if (!scratch) return false;
8826 }
8827
8828 // copy string to zero-terminated buffer and perform lookup
8829 memcpy(scratch, begin, length * sizeof(char_t));
8830 scratch[length] = 0;
8831
8832 *out_result = set->get(scratch);
8833
8834 // free dummy buffer
8835 if (scratch != buffer) xml_memory::deallocate(scratch);
8836
8837 return true;
8838 }
8839PUGI__NS_END
8840
8841// Internal node set class
8842PUGI__NS_BEGIN
8843 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8844 {
8845 if (end - begin < 2)
8846 return xpath_node_set::type_sorted;
8847
8848 document_order_comparator cmp;
8849
8850 bool first = cmp(begin[0], begin[1]);
8851
8852 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8853 if (cmp(it[0], it[1]) != first)
8854 return xpath_node_set::type_unsorted;
8855
8856 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8857 }
8858
8859 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8860 {
8861 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8862
8863 if (type == xpath_node_set::type_unsorted)
8864 {
8865 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8866
8867 if (sorted == xpath_node_set::type_unsorted)
8868 {
8869 sort(begin, end, document_order_comparator());
8870
8871 type = xpath_node_set::type_sorted;
8872 }
8873 else
8874 type = sorted;
8875 }
8876
8877 if (type != order) reverse(begin, end);
8878
8879 return order;
8880 }
8881
8882 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8883 {
8884 if (begin == end) return xpath_node();
8885
8886 switch (type)
8887 {
8888 case xpath_node_set::type_sorted:
8889 return *begin;
8890
8891 case xpath_node_set::type_sorted_reverse:
8892 return *(end - 1);
8893
8894 case xpath_node_set::type_unsorted:
8895 return *min_element(begin, end, document_order_comparator());
8896
8897 default:
8898 assert(false && "Invalid node set type"); // unreachable
8899 return xpath_node();
8900 }
8901 }
8902
8903 class xpath_node_set_raw
8904 {
8905 xpath_node_set::type_t _type;
8906
8907 xpath_node* _begin;
8908 xpath_node* _end;
8909 xpath_node* _eos;
8910
8911 public:
8912 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8913 {
8914 }
8915
8916 xpath_node* begin() const
8917 {
8918 return _begin;
8919 }
8920
8921 xpath_node* end() const
8922 {
8923 return _end;
8924 }
8925
8926 bool empty() const
8927 {
8928 return _begin == _end;
8929 }
8930
8931 size_t size() const
8932 {
8933 return static_cast<size_t>(_end - _begin);
8934 }
8935
8936 xpath_node first() const
8937 {
8938 return xpath_first(_begin, _end, _type);
8939 }
8940
8941 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8942
8943 void push_back(const xpath_node& node, xpath_allocator* alloc)
8944 {
8945 if (_end != _eos)
8946 *_end++ = node;
8947 else
8948 push_back_grow(node, alloc);
8949 }
8950
8951 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8952 {
8953 if (begin_ == end_) return;
8954
8955 size_t size_ = static_cast<size_t>(_end - _begin);
8956 size_t capacity = static_cast<size_t>(_eos - _begin);
8957 size_t count = static_cast<size_t>(end_ - begin_);
8958
8959 if (size_ + count > capacity)
8960 {
8961 // reallocate the old array or allocate a new one
8962 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8963 if (!data) return;
8964
8965 // finalize
8966 _begin = data;
8967 _end = data + size_;
8968 _eos = data + size_ + count;
8969 }
8970
8971 memcpy(_end, begin_, count * sizeof(xpath_node));
8972 _end += count;
8973 }
8974
8975 void sort_do()
8976 {
8977 _type = xpath_sort(_begin, _end, _type, false);
8978 }
8979
8980 void truncate(xpath_node* pos)
8981 {
8982 assert(_begin <= pos && pos <= _end);
8983
8984 _end = pos;
8985 }
8986
8987 void remove_duplicates(xpath_allocator* alloc)
8988 {
8989 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
8990 {
8991 xpath_allocator_capture cr(alloc);
8992
8993 size_t size_ = static_cast<size_t>(_end - _begin);
8994
8995 size_t hash_size = 1;
8996 while (hash_size < size_ + size_ / 2) hash_size *= 2;
8997
8998 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
8999 if (!hash_data) return;
9000
9001 memset(hash_data, 0, hash_size * sizeof(const void**));
9002
9003 xpath_node* write = _begin;
9004
9005 for (xpath_node* it = _begin; it != _end; ++it)
9006 {
9007 const void* attr = it->attribute().internal_object();
9008 const void* node = it->node().internal_object();
9009 const void* key = attr ? attr : node;
9010
9011 if (key && hash_insert(hash_data, hash_size, key))
9012 {
9013 *write++ = *it;
9014 }
9015 }
9016
9017 _end = write;
9018 }
9019 else
9020 {
9021 _end = unique(_begin, _end);
9022 }
9023 }
9024
9025 xpath_node_set::type_t type() const
9026 {
9027 return _type;
9028 }
9029
9030 void set_type(xpath_node_set::type_t value)
9031 {
9032 _type = value;
9033 }
9034 };
9035
9036 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9037 {
9038 size_t capacity = static_cast<size_t>(_eos - _begin);
9039
9040 // get new capacity (1.5x rule)
9041 size_t new_capacity = capacity + capacity / 2 + 1;
9042
9043 // reallocate the old array or allocate a new one
9044 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9045 if (!data) return;
9046
9047 // finalize
9048 _begin = data;
9049 _end = data + capacity;
9050 _eos = data + new_capacity;
9051
9052 // push
9053 *_end++ = node;
9054 }
9055PUGI__NS_END
9056
9057PUGI__NS_BEGIN
9058 struct xpath_context
9059 {
9060 xpath_node n;
9061 size_t position, size;
9062
9063 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9064 {
9065 }
9066 };
9067
9068 enum lexeme_t
9069 {
9070 lex_none = 0,
9071 lex_equal,
9072 lex_not_equal,
9073 lex_less,
9074 lex_greater,
9075 lex_less_or_equal,
9076 lex_greater_or_equal,
9077 lex_plus,
9078 lex_minus,
9079 lex_multiply,
9080 lex_union,
9081 lex_var_ref,
9082 lex_open_brace,
9083 lex_close_brace,
9084 lex_quoted_string,
9085 lex_number,
9086 lex_slash,
9087 lex_double_slash,
9088 lex_open_square_brace,
9089 lex_close_square_brace,
9090 lex_string,
9091 lex_comma,
9092 lex_axis_attribute,
9093 lex_dot,
9094 lex_double_dot,
9095 lex_double_colon,
9096 lex_eof
9097 };
9098
9099 struct xpath_lexer_string
9100 {
9101 const char_t* begin;
9102 const char_t* end;
9103
9104 xpath_lexer_string(): begin(0), end(0)
9105 {
9106 }
9107
9108 bool operator==(const char_t* other) const
9109 {
9110 size_t length = static_cast<size_t>(end - begin);
9111
9112 return strequalrange(other, begin, length);
9113 }
9114 };
9115
9116 class xpath_lexer
9117 {
9118 const char_t* _cur;
9119 const char_t* _cur_lexeme_pos;
9120 xpath_lexer_string _cur_lexeme_contents;
9121
9122 lexeme_t _cur_lexeme;
9123
9124 public:
9125 explicit xpath_lexer(const char_t* query): _cur(query)
9126 {
9127 next();
9128 }
9129
9130 const char_t* state() const
9131 {
9132 return _cur;
9133 }
9134
9135 void next()
9136 {
9137 const char_t* cur = _cur;
9138
9139 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
9140
9141 // save lexeme position for error reporting
9142 _cur_lexeme_pos = cur;
9143
9144 switch (*cur)
9145 {
9146 case 0:
9147 _cur_lexeme = lex_eof;
9148 break;
9149
9150 case '>':
9151 if (*(cur+1) == '=')
9152 {
9153 cur += 2;
9154 _cur_lexeme = lex_greater_or_equal;
9155 }
9156 else
9157 {
9158 cur += 1;
9159 _cur_lexeme = lex_greater;
9160 }
9161 break;
9162
9163 case '<':
9164 if (*(cur+1) == '=')
9165 {
9166 cur += 2;
9167 _cur_lexeme = lex_less_or_equal;
9168 }
9169 else
9170 {
9171 cur += 1;
9172 _cur_lexeme = lex_less;
9173 }
9174 break;
9175
9176 case '!':
9177 if (*(cur+1) == '=')
9178 {
9179 cur += 2;
9180 _cur_lexeme = lex_not_equal;
9181 }
9182 else
9183 {
9184 _cur_lexeme = lex_none;
9185 }
9186 break;
9187
9188 case '=':
9189 cur += 1;
9190 _cur_lexeme = lex_equal;
9191
9192 break;
9193
9194 case '+':
9195 cur += 1;
9196 _cur_lexeme = lex_plus;
9197
9198 break;
9199
9200 case '-':
9201 cur += 1;
9202 _cur_lexeme = lex_minus;
9203
9204 break;
9205
9206 case '*':
9207 cur += 1;
9208 _cur_lexeme = lex_multiply;
9209
9210 break;
9211
9212 case '|':
9213 cur += 1;
9214 _cur_lexeme = lex_union;
9215
9216 break;
9217
9218 case '$':
9219 cur += 1;
9220
9221 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9222 {
9223 _cur_lexeme_contents.begin = cur;
9224
9225 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9226
9227 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9228 {
9229 cur++; // :
9230
9231 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9232 }
9233
9234 _cur_lexeme_contents.end = cur;
9235
9236 _cur_lexeme = lex_var_ref;
9237 }
9238 else
9239 {
9240 _cur_lexeme = lex_none;
9241 }
9242
9243 break;
9244
9245 case '(':
9246 cur += 1;
9247 _cur_lexeme = lex_open_brace;
9248
9249 break;
9250
9251 case ')':
9252 cur += 1;
9253 _cur_lexeme = lex_close_brace;
9254
9255 break;
9256
9257 case '[':
9258 cur += 1;
9259 _cur_lexeme = lex_open_square_brace;
9260
9261 break;
9262
9263 case ']':
9264 cur += 1;
9265 _cur_lexeme = lex_close_square_brace;
9266
9267 break;
9268
9269 case ',':
9270 cur += 1;
9271 _cur_lexeme = lex_comma;
9272
9273 break;
9274
9275 case '/':
9276 if (*(cur+1) == '/')
9277 {
9278 cur += 2;
9279 _cur_lexeme = lex_double_slash;
9280 }
9281 else
9282 {
9283 cur += 1;
9284 _cur_lexeme = lex_slash;
9285 }
9286 break;
9287
9288 case '.':
9289 if (*(cur+1) == '.')
9290 {
9291 cur += 2;
9292 _cur_lexeme = lex_double_dot;
9293 }
9294 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9295 {
9296 _cur_lexeme_contents.begin = cur; // .
9297
9298 ++cur;
9299
9300 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9301
9302 _cur_lexeme_contents.end = cur;
9303
9304 _cur_lexeme = lex_number;
9305 }
9306 else
9307 {
9308 cur += 1;
9309 _cur_lexeme = lex_dot;
9310 }
9311 break;
9312
9313 case '@':
9314 cur += 1;
9315 _cur_lexeme = lex_axis_attribute;
9316
9317 break;
9318
9319 case '"':
9320 case '\'':
9321 {
9322 char_t terminator = *cur;
9323
9324 ++cur;
9325
9326 _cur_lexeme_contents.begin = cur;
9327 while (*cur && *cur != terminator) cur++;
9328 _cur_lexeme_contents.end = cur;
9329
9330 if (!*cur)
9331 _cur_lexeme = lex_none;
9332 else
9333 {
9334 cur += 1;
9335 _cur_lexeme = lex_quoted_string;
9336 }
9337
9338 break;
9339 }
9340
9341 case ':':
9342 if (*(cur+1) == ':')
9343 {
9344 cur += 2;
9345 _cur_lexeme = lex_double_colon;
9346 }
9347 else
9348 {
9349 _cur_lexeme = lex_none;
9350 }
9351 break;
9352
9353 default:
9354 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9355 {
9356 _cur_lexeme_contents.begin = cur;
9357
9358 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9359
9360 if (*cur == '.')
9361 {
9362 cur++;
9363
9364 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9365 }
9366
9367 _cur_lexeme_contents.end = cur;
9368
9369 _cur_lexeme = lex_number;
9370 }
9371 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9372 {
9373 _cur_lexeme_contents.begin = cur;
9374
9375 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9376
9377 if (cur[0] == ':')
9378 {
9379 if (cur[1] == '*') // namespace test ncname:*
9380 {
9381 cur += 2; // :*
9382 }
9383 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9384 {
9385 cur++; // :
9386
9387 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9388 }
9389 }
9390
9391 _cur_lexeme_contents.end = cur;
9392
9393 _cur_lexeme = lex_string;
9394 }
9395 else
9396 {
9397 _cur_lexeme = lex_none;
9398 }
9399 }
9400
9401 _cur = cur;
9402 }
9403
9404 lexeme_t current() const
9405 {
9406 return _cur_lexeme;
9407 }
9408
9409 const char_t* current_pos() const
9410 {
9411 return _cur_lexeme_pos;
9412 }
9413
9414 const xpath_lexer_string& contents() const
9415 {
9416 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9417
9418 return _cur_lexeme_contents;
9419 }
9420 };
9421
9422 enum ast_type_t
9423 {
9424 ast_unknown,
9425 ast_op_or, // left or right
9426 ast_op_and, // left and right
9427 ast_op_equal, // left = right
9428 ast_op_not_equal, // left != right
9429 ast_op_less, // left < right
9430 ast_op_greater, // left > right
9431 ast_op_less_or_equal, // left <= right
9432 ast_op_greater_or_equal, // left >= right
9433 ast_op_add, // left + right
9434 ast_op_subtract, // left - right
9435 ast_op_multiply, // left * right
9436 ast_op_divide, // left / right
9437 ast_op_mod, // left % right
9438 ast_op_negate, // left - right
9439 ast_op_union, // left | right
9440 ast_predicate, // apply predicate to set; next points to next predicate
9441 ast_filter, // select * from left where right
9442 ast_string_constant, // string constant
9443 ast_number_constant, // number constant
9444 ast_variable, // variable
9445 ast_func_last, // last()
9446 ast_func_position, // position()
9447 ast_func_count, // count(left)
9448 ast_func_id, // id(left)
9449 ast_func_local_name_0, // local-name()
9450 ast_func_local_name_1, // local-name(left)
9451 ast_func_namespace_uri_0, // namespace-uri()
9452 ast_func_namespace_uri_1, // namespace-uri(left)
9453 ast_func_name_0, // name()
9454 ast_func_name_1, // name(left)
9455 ast_func_string_0, // string()
9456 ast_func_string_1, // string(left)
9457 ast_func_concat, // concat(left, right, siblings)
9458 ast_func_starts_with, // starts_with(left, right)
9459 ast_func_contains, // contains(left, right)
9460 ast_func_substring_before, // substring-before(left, right)
9461 ast_func_substring_after, // substring-after(left, right)
9462 ast_func_substring_2, // substring(left, right)
9463 ast_func_substring_3, // substring(left, right, third)
9464 ast_func_string_length_0, // string-length()
9465 ast_func_string_length_1, // string-length(left)
9466 ast_func_normalize_space_0, // normalize-space()
9467 ast_func_normalize_space_1, // normalize-space(left)
9468 ast_func_translate, // translate(left, right, third)
9469 ast_func_boolean, // boolean(left)
9470 ast_func_not, // not(left)
9471 ast_func_true, // true()
9472 ast_func_false, // false()
9473 ast_func_lang, // lang(left)
9474 ast_func_number_0, // number()
9475 ast_func_number_1, // number(left)
9476 ast_func_sum, // sum(left)
9477 ast_func_floor, // floor(left)
9478 ast_func_ceiling, // ceiling(left)
9479 ast_func_round, // round(left)
9480 ast_step, // process set left with step
9481 ast_step_root, // select root node
9482
9483 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9484 ast_opt_compare_attribute // @name = 'string'
9485 };
9486
9487 enum axis_t
9488 {
9489 axis_ancestor,
9490 axis_ancestor_or_self,
9491 axis_attribute,
9492 axis_child,
9493 axis_descendant,
9494 axis_descendant_or_self,
9495 axis_following,
9496 axis_following_sibling,
9497 axis_namespace,
9498 axis_parent,
9499 axis_preceding,
9500 axis_preceding_sibling,
9501 axis_self
9502 };
9503
9504 enum nodetest_t
9505 {
9506 nodetest_none,
9507 nodetest_name,
9508 nodetest_type_node,
9509 nodetest_type_comment,
9510 nodetest_type_pi,
9511 nodetest_type_text,
9512 nodetest_pi,
9513 nodetest_all,
9514 nodetest_all_in_namespace
9515 };
9516
9517 enum predicate_t
9518 {
9519 predicate_default,
9520 predicate_posinv,
9521 predicate_constant,
9522 predicate_constant_one
9523 };
9524
9525 enum nodeset_eval_t
9526 {
9527 nodeset_eval_all,
9528 nodeset_eval_any,
9529 nodeset_eval_first
9530 };
9531
9532 template <axis_t N> struct axis_to_type
9533 {
9534 static const axis_t axis;
9535 };
9536
9537 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9538
9539 class xpath_ast_node
9540 {
9541 private:
9542 // node type
9543 char _type;
9544 char _rettype;
9545
9546 // for ast_step
9547 char _axis;
9548
9549 // for ast_step/ast_predicate/ast_filter
9550 char _test;
9551
9552 // tree node structure
9553 xpath_ast_node* _left;
9554 xpath_ast_node* _right;
9555 xpath_ast_node* _next;
9556
9557 union
9558 {
9559 // value for ast_string_constant
9560 const char_t* string;
9561 // value for ast_number_constant
9562 double number;
9563 // variable for ast_variable
9564 xpath_variable* variable;
9565 // node test for ast_step (node name/namespace/node type/pi target)
9566 const char_t* nodetest;
9567 // table for ast_opt_translate_table
9568 const unsigned char* table;
9569 } _data;
9570
9571 xpath_ast_node(const xpath_ast_node&);
9572 xpath_ast_node& operator=(const xpath_ast_node&);
9573
9574 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9575 {
9576 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9577
9578 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9579 {
9580 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9581 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9582 else if (lt == xpath_type_number || rt == xpath_type_number)
9583 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9584 else if (lt == xpath_type_string || rt == xpath_type_string)
9585 {
9586 xpath_allocator_capture cr(stack.result);
9587
9588 xpath_string ls = lhs->eval_string(c, stack);
9589 xpath_string rs = rhs->eval_string(c, stack);
9590
9591 return comp(ls, rs);
9592 }
9593 }
9594 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9595 {
9596 xpath_allocator_capture cr(stack.result);
9597
9598 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9599 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9600
9601 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9602 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9603 {
9604 xpath_allocator_capture cri(stack.result);
9605
9606 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9607 return true;
9608 }
9609
9610 return false;
9611 }
9612 else
9613 {
9614 if (lt == xpath_type_node_set)
9615 {
9616 swap(lhs, rhs);
9617 swap(lt, rt);
9618 }
9619
9620 if (lt == xpath_type_boolean)
9621 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9622 else if (lt == xpath_type_number)
9623 {
9624 xpath_allocator_capture cr(stack.result);
9625
9626 double l = lhs->eval_number(c, stack);
9627 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9628
9629 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9630 {
9631 xpath_allocator_capture cri(stack.result);
9632
9633 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9634 return true;
9635 }
9636
9637 return false;
9638 }
9639 else if (lt == xpath_type_string)
9640 {
9641 xpath_allocator_capture cr(stack.result);
9642
9643 xpath_string l = lhs->eval_string(c, stack);
9644 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9645
9646 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9647 {
9648 xpath_allocator_capture cri(stack.result);
9649
9650 if (comp(l, string_value(*ri, stack.result)))
9651 return true;
9652 }
9653
9654 return false;
9655 }
9656 }
9657
9658 assert(false && "Wrong types"); // unreachable
9659 return false;
9660 }
9661
9662 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9663 {
9664 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9665 }
9666
9667 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9668 {
9669 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9670
9671 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9672 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9673 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9674 {
9675 xpath_allocator_capture cr(stack.result);
9676
9677 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9678 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9679
9680 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9681 {
9682 xpath_allocator_capture cri(stack.result);
9683
9684 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9685
9686 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9687 {
9688 xpath_allocator_capture crii(stack.result);
9689
9690 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9691 return true;
9692 }
9693 }
9694
9695 return false;
9696 }
9697 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9698 {
9699 xpath_allocator_capture cr(stack.result);
9700
9701 double l = lhs->eval_number(c, stack);
9702 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9703
9704 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9705 {
9706 xpath_allocator_capture cri(stack.result);
9707
9708 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9709 return true;
9710 }
9711
9712 return false;
9713 }
9714 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9715 {
9716 xpath_allocator_capture cr(stack.result);
9717
9718 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9719 double r = rhs->eval_number(c, stack);
9720
9721 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9722 {
9723 xpath_allocator_capture cri(stack.result);
9724
9725 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9726 return true;
9727 }
9728
9729 return false;
9730 }
9731 else
9732 {
9733 assert(false && "Wrong types"); // unreachable
9734 return false;
9735 }
9736 }
9737
9738 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9739 {
9740 assert(ns.size() >= first);
9741 assert(expr->rettype() != xpath_type_number);
9742
9743 size_t i = 1;
9744 size_t size = ns.size() - first;
9745
9746 xpath_node* last = ns.begin() + first;
9747
9748 // remove_if... or well, sort of
9749 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9750 {
9751 xpath_context c(*it, i, size);
9752
9753 if (expr->eval_boolean(c, stack))
9754 {
9755 *last++ = *it;
9756
9757 if (once) break;
9758 }
9759 }
9760
9761 ns.truncate(last);
9762 }
9763
9764 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9765 {
9766 assert(ns.size() >= first);
9767 assert(expr->rettype() == xpath_type_number);
9768
9769 size_t i = 1;
9770 size_t size = ns.size() - first;
9771
9772 xpath_node* last = ns.begin() + first;
9773
9774 // remove_if... or well, sort of
9775 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9776 {
9777 xpath_context c(*it, i, size);
9778
9779 if (expr->eval_number(c, stack) == static_cast<double>(i))
9780 {
9781 *last++ = *it;
9782
9783 if (once) break;
9784 }
9785 }
9786
9787 ns.truncate(last);
9788 }
9789
9790 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9791 {
9792 assert(ns.size() >= first);
9793 assert(expr->rettype() == xpath_type_number);
9794
9795 size_t size = ns.size() - first;
9796
9797 xpath_node* last = ns.begin() + first;
9798
9799 xpath_context c(xpath_node(), 1, size);
9800
9801 double er = expr->eval_number(c, stack);
9802
9803 if (er >= 1.0 && er <= static_cast<double>(size))
9804 {
9805 size_t eri = static_cast<size_t>(er);
9806
9807 if (er == static_cast<double>(eri))
9808 {
9809 xpath_node r = last[eri - 1];
9810
9811 *last++ = r;
9812 }
9813 }
9814
9815 ns.truncate(last);
9816 }
9817
9818 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9819 {
9820 if (ns.size() == first) return;
9821
9822 assert(_type == ast_filter || _type == ast_predicate);
9823
9824 if (_test == predicate_constant || _test == predicate_constant_one)
9825 apply_predicate_number_const(ns, first, _right, stack);
9826 else if (_right->rettype() == xpath_type_number)
9827 apply_predicate_number(ns, first, _right, stack, once);
9828 else
9829 apply_predicate_boolean(ns, first, _right, stack, once);
9830 }
9831
9832 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9833 {
9834 if (ns.size() == first) return;
9835
9836 bool last_once = eval_once(ns.type(), eval);
9837
9838 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9839 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9840 }
9841
9842 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9843 {
9844 assert(a);
9845
9846 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9847
9848 switch (_test)
9849 {
9850 case nodetest_name:
9851 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9852 {
9853 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9854 return true;
9855 }
9856 break;
9857
9858 case nodetest_type_node:
9859 case nodetest_all:
9860 if (is_xpath_attribute(name))
9861 {
9862 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9863 return true;
9864 }
9865 break;
9866
9867 case nodetest_all_in_namespace:
9868 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9869 {
9870 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9871 return true;
9872 }
9873 break;
9874
9875 default:
9876 ;
9877 }
9878
9879 return false;
9880 }
9881
9882 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9883 {
9884 assert(n);
9885
9886 xml_node_type type = PUGI__NODETYPE(n);
9887
9888 switch (_test)
9889 {
9890 case nodetest_name:
9891 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9892 {
9893 ns.push_back(xml_node(n), alloc);
9894 return true;
9895 }
9896 break;
9897
9898 case nodetest_type_node:
9899 ns.push_back(xml_node(n), alloc);
9900 return true;
9901
9902 case nodetest_type_comment:
9903 if (type == node_comment)
9904 {
9905 ns.push_back(xml_node(n), alloc);
9906 return true;
9907 }
9908 break;
9909
9910 case nodetest_type_text:
9911 if (type == node_pcdata || type == node_cdata)
9912 {
9913 ns.push_back(xml_node(n), alloc);
9914 return true;
9915 }
9916 break;
9917
9918 case nodetest_type_pi:
9919 if (type == node_pi)
9920 {
9921 ns.push_back(xml_node(n), alloc);
9922 return true;
9923 }
9924 break;
9925
9926 case nodetest_pi:
9927 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9928 {
9929 ns.push_back(xml_node(n), alloc);
9930 return true;
9931 }
9932 break;
9933
9934 case nodetest_all:
9935 if (type == node_element)
9936 {
9937 ns.push_back(xml_node(n), alloc);
9938 return true;
9939 }
9940 break;
9941
9942 case nodetest_all_in_namespace:
9943 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9944 {
9945 ns.push_back(xml_node(n), alloc);
9946 return true;
9947 }
9948 break;
9949
9950 default:
9951 assert(false && "Unknown axis"); // unreachable
9952 }
9953
9954 return false;
9955 }
9956
9957 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9958 {
9959 const axis_t axis = T::axis;
9960
9961 switch (axis)
9962 {
9963 case axis_attribute:
9964 {
9965 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9966 if (step_push(ns, a, n, alloc) & once)
9967 return;
9968
9969 break;
9970 }
9971
9972 case axis_child:
9973 {
9974 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9975 if (step_push(ns, c, alloc) & once)
9976 return;
9977
9978 break;
9979 }
9980
9981 case axis_descendant:
9982 case axis_descendant_or_self:
9983 {
9984 if (axis == axis_descendant_or_self)
9985 if (step_push(ns, n, alloc) & once)
9986 return;
9987
9988 xml_node_struct* cur = n->first_child;
9989
9990 while (cur)
9991 {
9992 if (step_push(ns, cur, alloc) & once)
9993 return;
9994
9995 if (cur->first_child)
9996 cur = cur->first_child;
9997 else
9998 {
9999 while (!cur->next_sibling)
10000 {
10001 cur = cur->parent;
10002
10003 if (cur == n) return;
10004 }
10005
10006 cur = cur->next_sibling;
10007 }
10008 }
10009
10010 break;
10011 }
10012
10013 case axis_following_sibling:
10014 {
10015 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10016 if (step_push(ns, c, alloc) & once)
10017 return;
10018
10019 break;
10020 }
10021
10022 case axis_preceding_sibling:
10023 {
10024 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10025 if (step_push(ns, c, alloc) & once)
10026 return;
10027
10028 break;
10029 }
10030
10031 case axis_following:
10032 {
10033 xml_node_struct* cur = n;
10034
10035 // exit from this node so that we don't include descendants
10036 while (!cur->next_sibling)
10037 {
10038 cur = cur->parent;
10039
10040 if (!cur) return;
10041 }
10042
10043 cur = cur->next_sibling;
10044
10045 while (cur)
10046 {
10047 if (step_push(ns, cur, alloc) & once)
10048 return;
10049
10050 if (cur->first_child)
10051 cur = cur->first_child;
10052 else
10053 {
10054 while (!cur->next_sibling)
10055 {
10056 cur = cur->parent;
10057
10058 if (!cur) return;
10059 }
10060
10061 cur = cur->next_sibling;
10062 }
10063 }
10064
10065 break;
10066 }
10067
10068 case axis_preceding:
10069 {
10070 xml_node_struct* cur = n;
10071
10072 // exit from this node so that we don't include descendants
10073 while (!cur->prev_sibling_c->next_sibling)
10074 {
10075 cur = cur->parent;
10076
10077 if (!cur) return;
10078 }
10079
10080 cur = cur->prev_sibling_c;
10081
10082 while (cur)
10083 {
10084 if (cur->first_child)
10085 cur = cur->first_child->prev_sibling_c;
10086 else
10087 {
10088 // leaf node, can't be ancestor
10089 if (step_push(ns, cur, alloc) & once)
10090 return;
10091
10092 while (!cur->prev_sibling_c->next_sibling)
10093 {
10094 cur = cur->parent;
10095
10096 if (!cur) return;
10097
10098 if (!node_is_ancestor(cur, n))
10099 if (step_push(ns, cur, alloc) & once)
10100 return;
10101 }
10102
10103 cur = cur->prev_sibling_c;
10104 }
10105 }
10106
10107 break;
10108 }
10109
10110 case axis_ancestor:
10111 case axis_ancestor_or_self:
10112 {
10113 if (axis == axis_ancestor_or_self)
10114 if (step_push(ns, n, alloc) & once)
10115 return;
10116
10117 xml_node_struct* cur = n->parent;
10118
10119 while (cur)
10120 {
10121 if (step_push(ns, cur, alloc) & once)
10122 return;
10123
10124 cur = cur->parent;
10125 }
10126
10127 break;
10128 }
10129
10130 case axis_self:
10131 {
10132 step_push(ns, n, alloc);
10133
10134 break;
10135 }
10136
10137 case axis_parent:
10138 {
10139 if (n->parent)
10140 step_push(ns, n->parent, alloc);
10141
10142 break;
10143 }
10144
10145 default:
10146 assert(false && "Unimplemented axis"); // unreachable
10147 }
10148 }
10149
10150 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10151 {
10152 const axis_t axis = T::axis;
10153
10154 switch (axis)
10155 {
10156 case axis_ancestor:
10157 case axis_ancestor_or_self:
10158 {
10159 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10160 if (step_push(ns, a, p, alloc) & once)
10161 return;
10162
10163 xml_node_struct* cur = p;
10164
10165 while (cur)
10166 {
10167 if (step_push(ns, cur, alloc) & once)
10168 return;
10169
10170 cur = cur->parent;
10171 }
10172
10173 break;
10174 }
10175
10176 case axis_descendant_or_self:
10177 case axis_self:
10178 {
10179 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10180 step_push(ns, a, p, alloc);
10181
10182 break;
10183 }
10184
10185 case axis_following:
10186 {
10187 xml_node_struct* cur = p;
10188
10189 while (cur)
10190 {
10191 if (cur->first_child)
10192 cur = cur->first_child;
10193 else
10194 {
10195 while (!cur->next_sibling)
10196 {
10197 cur = cur->parent;
10198
10199 if (!cur) return;
10200 }
10201
10202 cur = cur->next_sibling;
10203 }
10204
10205 if (step_push(ns, cur, alloc) & once)
10206 return;
10207 }
10208
10209 break;
10210 }
10211
10212 case axis_parent:
10213 {
10214 step_push(ns, p, alloc);
10215
10216 break;
10217 }
10218
10219 case axis_preceding:
10220 {
10221 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10222 step_fill(ns, p, alloc, once, v);
10223 break;
10224 }
10225
10226 default:
10227 assert(false && "Unimplemented axis"); // unreachable
10228 }
10229 }
10230
10231 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10232 {
10233 const axis_t axis = T::axis;
10234 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10235
10236 if (xn.node())
10237 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10238 else if (axis_has_attributes && xn.attribute() && xn.parent())
10239 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10240 }
10241
10242 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10243 {
10244 const axis_t axis = T::axis;
10245 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10246 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10247
10248 bool once =
10249 (axis == axis_attribute && _test == nodetest_name) ||
10250 (!_right && eval_once(axis_type, eval)) ||
10251 // coverity[mixed_enums]
10252 (_right && !_right->_next && _right->_test == predicate_constant_one);
10253
10254 xpath_node_set_raw ns;
10255 ns.set_type(axis_type);
10256
10257 if (_left)
10258 {
10259 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10260
10261 // self axis preserves the original order
10262 if (axis == axis_self) ns.set_type(s.type());
10263
10264 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10265 {
10266 size_t size = ns.size();
10267
10268 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10269 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10270
10271 step_fill(ns, *it, stack.result, once, v);
10272 if (_right) apply_predicates(ns, size, stack, eval);
10273 }
10274 }
10275 else
10276 {
10277 step_fill(ns, c.n, stack.result, once, v);
10278 if (_right) apply_predicates(ns, 0, stack, eval);
10279 }
10280
10281 // child, attribute and self axes always generate unique set of nodes
10282 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10283 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10284 ns.remove_duplicates(stack.temp);
10285
10286 return ns;
10287 }
10288
10289 public:
10290 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10291 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10292 {
10293 assert(type == ast_string_constant);
10294 _data.string = value;
10295 }
10296
10297 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10298 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10299 {
10300 assert(type == ast_number_constant);
10301 _data.number = value;
10302 }
10303
10304 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10305 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10306 {
10307 assert(type == ast_variable);
10308 _data.variable = value;
10309 }
10310
10311 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10312 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10313 {
10314 }
10315
10316 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10317 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10318 {
10319 assert(type == ast_step);
10320 _data.nodetest = contents;
10321 }
10322
10323 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10324 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10325 {
10326 assert(type == ast_filter || type == ast_predicate);
10327 }
10328
10329 void set_next(xpath_ast_node* value)
10330 {
10331 _next = value;
10332 }
10333
10334 void set_right(xpath_ast_node* value)
10335 {
10336 _right = value;
10337 }
10338
10339 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10340 {
10341 switch (_type)
10342 {
10343 case ast_op_or:
10344 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10345
10346 case ast_op_and:
10347 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10348
10349 case ast_op_equal:
10350 return compare_eq(_left, _right, c, stack, equal_to());
10351
10352 case ast_op_not_equal:
10353 return compare_eq(_left, _right, c, stack, not_equal_to());
10354
10355 case ast_op_less:
10356 return compare_rel(_left, _right, c, stack, less());
10357
10358 case ast_op_greater:
10359 return compare_rel(_right, _left, c, stack, less());
10360
10361 case ast_op_less_or_equal:
10362 return compare_rel(_left, _right, c, stack, less_equal());
10363
10364 case ast_op_greater_or_equal:
10365 return compare_rel(_right, _left, c, stack, less_equal());
10366
10367 case ast_func_starts_with:
10368 {
10369 xpath_allocator_capture cr(stack.result);
10370
10371 xpath_string lr = _left->eval_string(c, stack);
10372 xpath_string rr = _right->eval_string(c, stack);
10373
10374 return starts_with(lr.c_str(), rr.c_str());
10375 }
10376
10377 case ast_func_contains:
10378 {
10379 xpath_allocator_capture cr(stack.result);
10380
10381 xpath_string lr = _left->eval_string(c, stack);
10382 xpath_string rr = _right->eval_string(c, stack);
10383
10384 return find_substring(lr.c_str(), rr.c_str()) != 0;
10385 }
10386
10387 case ast_func_boolean:
10388 return _left->eval_boolean(c, stack);
10389
10390 case ast_func_not:
10391 return !_left->eval_boolean(c, stack);
10392
10393 case ast_func_true:
10394 return true;
10395
10396 case ast_func_false:
10397 return false;
10398
10399 case ast_func_lang:
10400 {
10401 if (c.n.attribute()) return false;
10402
10403 xpath_allocator_capture cr(stack.result);
10404
10405 xpath_string lang = _left->eval_string(c, stack);
10406
10407 for (xml_node n = c.n.node(); n; n = n.parent())
10408 {
10409 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10410
10411 if (a)
10412 {
10413 const char_t* value = a.value();
10414
10415 // strnicmp / strncasecmp is not portable
10416 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10417 {
10418 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10419 ++value;
10420 }
10421
10422 return *value == 0 || *value == '-';
10423 }
10424 }
10425
10426 return false;
10427 }
10428
10429 case ast_opt_compare_attribute:
10430 {
10431 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10432
10433 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10434
10435 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10436 }
10437
10438 case ast_variable:
10439 {
10440 assert(_rettype == _data.variable->type());
10441
10442 if (_rettype == xpath_type_boolean)
10443 return _data.variable->get_boolean();
10444
10445 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10446 break;
10447 }
10448
10449 default:
10450 ;
10451 }
10452
10453 // none of the ast types that return the value directly matched, we need to perform type conversion
10454 switch (_rettype)
10455 {
10456 case xpath_type_number:
10457 return convert_number_to_boolean(eval_number(c, stack));
10458
10459 case xpath_type_string:
10460 {
10461 xpath_allocator_capture cr(stack.result);
10462
10463 return !eval_string(c, stack).empty();
10464 }
10465
10466 case xpath_type_node_set:
10467 {
10468 xpath_allocator_capture cr(stack.result);
10469
10470 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10471 }
10472
10473 default:
10474 assert(false && "Wrong expression for return type boolean"); // unreachable
10475 return false;
10476 }
10477 }
10478
10479 double eval_number(const xpath_context& c, const xpath_stack& stack)
10480 {
10481 switch (_type)
10482 {
10483 case ast_op_add:
10484 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10485
10486 case ast_op_subtract:
10487 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10488
10489 case ast_op_multiply:
10490 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10491
10492 case ast_op_divide:
10493 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10494
10495 case ast_op_mod:
10496 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10497
10498 case ast_op_negate:
10499 return -_left->eval_number(c, stack);
10500
10501 case ast_number_constant:
10502 return _data.number;
10503
10504 case ast_func_last:
10505 return static_cast<double>(c.size);
10506
10507 case ast_func_position:
10508 return static_cast<double>(c.position);
10509
10510 case ast_func_count:
10511 {
10512 xpath_allocator_capture cr(stack.result);
10513
10514 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10515 }
10516
10517 case ast_func_string_length_0:
10518 {
10519 xpath_allocator_capture cr(stack.result);
10520
10521 return static_cast<double>(string_value(c.n, stack.result).length());
10522 }
10523
10524 case ast_func_string_length_1:
10525 {
10526 xpath_allocator_capture cr(stack.result);
10527
10528 return static_cast<double>(_left->eval_string(c, stack).length());
10529 }
10530
10531 case ast_func_number_0:
10532 {
10533 xpath_allocator_capture cr(stack.result);
10534
10535 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10536 }
10537
10538 case ast_func_number_1:
10539 return _left->eval_number(c, stack);
10540
10541 case ast_func_sum:
10542 {
10543 xpath_allocator_capture cr(stack.result);
10544
10545 double r = 0;
10546
10547 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10548
10549 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10550 {
10551 xpath_allocator_capture cri(stack.result);
10552
10553 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10554 }
10555
10556 return r;
10557 }
10558
10559 case ast_func_floor:
10560 {
10561 double r = _left->eval_number(c, stack);
10562
10563 return r == r ? floor(r) : r;
10564 }
10565
10566 case ast_func_ceiling:
10567 {
10568 double r = _left->eval_number(c, stack);
10569
10570 return r == r ? ceil(r) : r;
10571 }
10572
10573 case ast_func_round:
10574 return round_nearest_nzero(_left->eval_number(c, stack));
10575
10576 case ast_variable:
10577 {
10578 assert(_rettype == _data.variable->type());
10579
10580 if (_rettype == xpath_type_number)
10581 return _data.variable->get_number();
10582
10583 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10584 break;
10585 }
10586
10587 default:
10588 ;
10589 }
10590
10591 // none of the ast types that return the value directly matched, we need to perform type conversion
10592 switch (_rettype)
10593 {
10594 case xpath_type_boolean:
10595 return eval_boolean(c, stack) ? 1 : 0;
10596
10597 case xpath_type_string:
10598 {
10599 xpath_allocator_capture cr(stack.result);
10600
10601 return convert_string_to_number(eval_string(c, stack).c_str());
10602 }
10603
10604 case xpath_type_node_set:
10605 {
10606 xpath_allocator_capture cr(stack.result);
10607
10608 return convert_string_to_number(eval_string(c, stack).c_str());
10609 }
10610
10611 default:
10612 assert(false && "Wrong expression for return type number"); // unreachable
10613 return 0;
10614 }
10615 }
10616
10617 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10618 {
10619 assert(_type == ast_func_concat);
10620
10621 xpath_allocator_capture ct(stack.temp);
10622
10623 // count the string number
10624 size_t count = 1;
10625 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10626
10627 // allocate a buffer for temporary string objects
10628 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10629 if (!buffer) return xpath_string();
10630
10631 // evaluate all strings to temporary stack
10632 xpath_stack swapped_stack = {stack.temp, stack.result};
10633
10634 buffer[0] = _left->eval_string(c, swapped_stack);
10635
10636 size_t pos = 1;
10637 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10638 assert(pos == count);
10639
10640 // get total length
10641 size_t length = 0;
10642 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10643
10644 // create final string
10645 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10646 if (!result) return xpath_string();
10647
10648 char_t* ri = result;
10649
10650 for (size_t j = 0; j < count; ++j)
10651 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10652 *ri++ = *bi;
10653
10654 *ri = 0;
10655
10656 return xpath_string::from_heap_preallocated(result, ri);
10657 }
10658
10659 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10660 {
10661 switch (_type)
10662 {
10663 case ast_string_constant:
10664 return xpath_string::from_const(_data.string);
10665
10666 case ast_func_local_name_0:
10667 {
10668 xpath_node na = c.n;
10669
10670 return xpath_string::from_const(local_name(na));
10671 }
10672
10673 case ast_func_local_name_1:
10674 {
10675 xpath_allocator_capture cr(stack.result);
10676
10677 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10678 xpath_node na = ns.first();
10679
10680 return xpath_string::from_const(local_name(na));
10681 }
10682
10683 case ast_func_name_0:
10684 {
10685 xpath_node na = c.n;
10686
10687 return xpath_string::from_const(qualified_name(na));
10688 }
10689
10690 case ast_func_name_1:
10691 {
10692 xpath_allocator_capture cr(stack.result);
10693
10694 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10695 xpath_node na = ns.first();
10696
10697 return xpath_string::from_const(qualified_name(na));
10698 }
10699
10700 case ast_func_namespace_uri_0:
10701 {
10702 xpath_node na = c.n;
10703
10704 return xpath_string::from_const(namespace_uri(na));
10705 }
10706
10707 case ast_func_namespace_uri_1:
10708 {
10709 xpath_allocator_capture cr(stack.result);
10710
10711 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10712 xpath_node na = ns.first();
10713
10714 return xpath_string::from_const(namespace_uri(na));
10715 }
10716
10717 case ast_func_string_0:
10718 return string_value(c.n, stack.result);
10719
10720 case ast_func_string_1:
10721 return _left->eval_string(c, stack);
10722
10723 case ast_func_concat:
10724 return eval_string_concat(c, stack);
10725
10726 case ast_func_substring_before:
10727 {
10728 xpath_allocator_capture cr(stack.temp);
10729
10730 xpath_stack swapped_stack = {stack.temp, stack.result};
10731
10732 xpath_string s = _left->eval_string(c, swapped_stack);
10733 xpath_string p = _right->eval_string(c, swapped_stack);
10734
10735 const char_t* pos = find_substring(s.c_str(), p.c_str());
10736
10737 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10738 }
10739
10740 case ast_func_substring_after:
10741 {
10742 xpath_allocator_capture cr(stack.temp);
10743
10744 xpath_stack swapped_stack = {stack.temp, stack.result};
10745
10746 xpath_string s = _left->eval_string(c, swapped_stack);
10747 xpath_string p = _right->eval_string(c, swapped_stack);
10748
10749 const char_t* pos = find_substring(s.c_str(), p.c_str());
10750 if (!pos) return xpath_string();
10751
10752 const char_t* rbegin = pos + p.length();
10753 const char_t* rend = s.c_str() + s.length();
10754
10755 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10756 }
10757
10758 case ast_func_substring_2:
10759 {
10760 xpath_allocator_capture cr(stack.temp);
10761
10762 xpath_stack swapped_stack = {stack.temp, stack.result};
10763
10764 xpath_string s = _left->eval_string(c, swapped_stack);
10765 size_t s_length = s.length();
10766
10767 double first = round_nearest(_right->eval_number(c, stack));
10768
10769 if (is_nan(first)) return xpath_string(); // NaN
10770 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10771
10772 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10773 assert(1 <= pos && pos <= s_length + 1);
10774
10775 const char_t* rbegin = s.c_str() + (pos - 1);
10776 const char_t* rend = s.c_str() + s.length();
10777
10778 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10779 }
10780
10781 case ast_func_substring_3:
10782 {
10783 xpath_allocator_capture cr(stack.temp);
10784
10785 xpath_stack swapped_stack = {stack.temp, stack.result};
10786
10787 xpath_string s = _left->eval_string(c, swapped_stack);
10788 size_t s_length = s.length();
10789
10790 double first = round_nearest(_right->eval_number(c, stack));
10791 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10792
10793 if (is_nan(first) || is_nan(last)) return xpath_string();
10794 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10795 else if (first >= last) return xpath_string();
10796 else if (last < 1) return xpath_string();
10797
10798 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10799 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10800
10801 assert(1 <= pos && pos <= end && end <= s_length + 1);
10802 const char_t* rbegin = s.c_str() + (pos - 1);
10803 const char_t* rend = s.c_str() + (end - 1);
10804
10805 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10806 }
10807
10808 case ast_func_normalize_space_0:
10809 {
10810 xpath_string s = string_value(c.n, stack.result);
10811
10812 char_t* begin = s.data(stack.result);
10813 if (!begin) return xpath_string();
10814
10815 char_t* end = normalize_space(begin);
10816
10817 return xpath_string::from_heap_preallocated(begin, end);
10818 }
10819
10820 case ast_func_normalize_space_1:
10821 {
10822 xpath_string s = _left->eval_string(c, stack);
10823
10824 char_t* begin = s.data(stack.result);
10825 if (!begin) return xpath_string();
10826
10827 char_t* end = normalize_space(begin);
10828
10829 return xpath_string::from_heap_preallocated(begin, end);
10830 }
10831
10832 case ast_func_translate:
10833 {
10834 xpath_allocator_capture cr(stack.temp);
10835
10836 xpath_stack swapped_stack = {stack.temp, stack.result};
10837
10838 xpath_string s = _left->eval_string(c, stack);
10839 xpath_string from = _right->eval_string(c, swapped_stack);
10840 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10841
10842 char_t* begin = s.data(stack.result);
10843 if (!begin) return xpath_string();
10844
10845 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10846
10847 return xpath_string::from_heap_preallocated(begin, end);
10848 }
10849
10850 case ast_opt_translate_table:
10851 {
10852 xpath_string s = _left->eval_string(c, stack);
10853
10854 char_t* begin = s.data(stack.result);
10855 if (!begin) return xpath_string();
10856
10857 char_t* end = translate_table(begin, _data.table);
10858
10859 return xpath_string::from_heap_preallocated(begin, end);
10860 }
10861
10862 case ast_variable:
10863 {
10864 assert(_rettype == _data.variable->type());
10865
10866 if (_rettype == xpath_type_string)
10867 return xpath_string::from_const(_data.variable->get_string());
10868
10869 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10870 break;
10871 }
10872
10873 default:
10874 ;
10875 }
10876
10877 // none of the ast types that return the value directly matched, we need to perform type conversion
10878 switch (_rettype)
10879 {
10880 case xpath_type_boolean:
10881 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10882
10883 case xpath_type_number:
10884 return convert_number_to_string(eval_number(c, stack), stack.result);
10885
10886 case xpath_type_node_set:
10887 {
10888 xpath_allocator_capture cr(stack.temp);
10889
10890 xpath_stack swapped_stack = {stack.temp, stack.result};
10891
10892 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10893 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10894 }
10895
10896 default:
10897 assert(false && "Wrong expression for return type string"); // unreachable
10898 return xpath_string();
10899 }
10900 }
10901
10902 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10903 {
10904 switch (_type)
10905 {
10906 case ast_op_union:
10907 {
10908 xpath_allocator_capture cr(stack.temp);
10909
10910 xpath_stack swapped_stack = {stack.temp, stack.result};
10911
10912 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
10913 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
10914
10915 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10916 ls.set_type(xpath_node_set::type_unsorted);
10917
10918 ls.append(rs.begin(), rs.end(), stack.result);
10919 ls.remove_duplicates(stack.temp);
10920
10921 return ls;
10922 }
10923
10924 case ast_filter:
10925 {
10926 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10927
10928 // either expression is a number or it contains position() call; sort by document order
10929 if (_test != predicate_posinv) set.sort_do();
10930
10931 bool once = eval_once(set.type(), eval);
10932
10933 apply_predicate(set, 0, stack, once);
10934
10935 return set;
10936 }
10937
10938 case ast_func_id:
10939 return xpath_node_set_raw();
10940
10941 case ast_step:
10942 {
10943 switch (_axis)
10944 {
10945 case axis_ancestor:
10946 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10947
10948 case axis_ancestor_or_self:
10949 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10950
10951 case axis_attribute:
10952 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10953
10954 case axis_child:
10955 return step_do(c, stack, eval, axis_to_type<axis_child>());
10956
10957 case axis_descendant:
10958 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10959
10960 case axis_descendant_or_self:
10961 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10962
10963 case axis_following:
10964 return step_do(c, stack, eval, axis_to_type<axis_following>());
10965
10966 case axis_following_sibling:
10967 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10968
10969 case axis_namespace:
10970 // namespaced axis is not supported
10971 return xpath_node_set_raw();
10972
10973 case axis_parent:
10974 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10975
10976 case axis_preceding:
10977 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10978
10979 case axis_preceding_sibling:
10980 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10981
10982 case axis_self:
10983 return step_do(c, stack, eval, axis_to_type<axis_self>());
10984
10985 default:
10986 assert(false && "Unknown axis"); // unreachable
10987 return xpath_node_set_raw();
10988 }
10989 }
10990
10991 case ast_step_root:
10992 {
10993 assert(!_right); // root step can't have any predicates
10994
10995 xpath_node_set_raw ns;
10996
10997 ns.set_type(xpath_node_set::type_sorted);
10998
10999 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
11000 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
11001
11002 return ns;
11003 }
11004
11005 case ast_variable:
11006 {
11007 assert(_rettype == _data.variable->type());
11008
11009 if (_rettype == xpath_type_node_set)
11010 {
11011 const xpath_node_set& s = _data.variable->get_node_set();
11012
11013 xpath_node_set_raw ns;
11014
11015 ns.set_type(s.type());
11016 ns.append(s.begin(), s.end(), stack.result);
11017
11018 return ns;
11019 }
11020
11021 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11022 break;
11023 }
11024
11025 default:
11026 ;
11027 }
11028
11029 // none of the ast types that return the value directly matched, but conversions to node set are invalid
11030 assert(false && "Wrong expression for return type node set"); // unreachable
11031 return xpath_node_set_raw();
11032 }
11033
11034 void optimize(xpath_allocator* alloc)
11035 {
11036 if (_left)
11037 _left->optimize(alloc);
11038
11039 if (_right)
11040 _right->optimize(alloc);
11041
11042 if (_next)
11043 _next->optimize(alloc);
11044
11045 // coverity[var_deref_model]
11046 optimize_self(alloc);
11047 }
11048
11049 void optimize_self(xpath_allocator* alloc)
11050 {
11051 // Rewrite [position()=expr] with [expr]
11052 // Note that this step has to go before classification to recognize [position()=1]
11053 if ((_type == ast_filter || _type == ast_predicate) &&
11054 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11055 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11056 {
11057 _right = _right->_right;
11058 }
11059
11060 // Classify filter/predicate ops to perform various optimizations during evaluation
11061 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11062 {
11063 assert(_test == predicate_default);
11064
11065 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11066 _test = predicate_constant_one;
11067 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11068 _test = predicate_constant;
11069 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11070 _test = predicate_posinv;
11071 }
11072
11073 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
11074 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11075 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11076 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11077 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11078 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11079 is_posinv_step())
11080 {
11081 if (_axis == axis_child || _axis == axis_descendant)
11082 _axis = axis_descendant;
11083 else
11084 _axis = axis_descendant_or_self;
11085
11086 _left = _left->_left;
11087 }
11088
11089 // Use optimized lookup table implementation for translate() with constant arguments
11090 if (_type == ast_func_translate &&
11091 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11092 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11093 {
11094 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11095
11096 if (table)
11097 {
11098 _type = ast_opt_translate_table;
11099 _data.table = table;
11100 }
11101 }
11102
11103 // Use optimized path for @attr = 'value' or @attr = $value
11104 if (_type == ast_op_equal &&
11105 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11106 // coverity[mixed_enums]
11107 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11108 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11109 {
11110 _type = ast_opt_compare_attribute;
11111 }
11112 }
11113
11114 bool is_posinv_expr() const
11115 {
11116 switch (_type)
11117 {
11118 case ast_func_position:
11119 case ast_func_last:
11120 return false;
11121
11122 case ast_string_constant:
11123 case ast_number_constant:
11124 case ast_variable:
11125 return true;
11126
11127 case ast_step:
11128 case ast_step_root:
11129 return true;
11130
11131 case ast_predicate:
11132 case ast_filter:
11133 return true;
11134
11135 default:
11136 if (_left && !_left->is_posinv_expr()) return false;
11137
11138 for (xpath_ast_node* n = _right; n; n = n->_next)
11139 if (!n->is_posinv_expr()) return false;
11140
11141 return true;
11142 }
11143 }
11144
11145 bool is_posinv_step() const
11146 {
11147 assert(_type == ast_step);
11148
11149 for (xpath_ast_node* n = _right; n; n = n->_next)
11150 {
11151 assert(n->_type == ast_predicate);
11152
11153 if (n->_test != predicate_posinv)
11154 return false;
11155 }
11156
11157 return true;
11158 }
11159
11160 xpath_value_type rettype() const
11161 {
11162 return static_cast<xpath_value_type>(_rettype);
11163 }
11164 };
11165
11166 static const size_t xpath_ast_depth_limit =
11167 #ifdef PUGIXML_XPATH_DEPTH_LIMIT
11168 PUGIXML_XPATH_DEPTH_LIMIT
11169 #else
11170 1024
11171 #endif
11172 ;
11173
11174 struct xpath_parser
11175 {
11176 xpath_allocator* _alloc;
11177 xpath_lexer _lexer;
11178
11179 const char_t* _query;
11180 xpath_variable_set* _variables;
11181
11182 xpath_parse_result* _result;
11183
11184 char_t _scratch[32];
11185
11186 size_t _depth;
11187
11188 xpath_ast_node* error(const char* message)
11189 {
11190 _result->error = message;
11191 _result->offset = _lexer.current_pos() - _query;
11192
11193 return 0;
11194 }
11195
11196 xpath_ast_node* error_oom()
11197 {
11198 assert(_alloc->_error);
11199 *_alloc->_error = true;
11200
11201 return 0;
11202 }
11203
11204 xpath_ast_node* error_rec()
11205 {
11206 return error("Exceeded maximum allowed query depth");
11207 }
11208
11209 void* alloc_node()
11210 {
11211 return _alloc->allocate(sizeof(xpath_ast_node));
11212 }
11213
11214 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11215 {
11216 void* memory = alloc_node();
11217 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11218 }
11219
11220 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11221 {
11222 void* memory = alloc_node();
11223 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11224 }
11225
11226 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11227 {
11228 void* memory = alloc_node();
11229 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11230 }
11231
11232 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11233 {
11234 void* memory = alloc_node();
11235 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11236 }
11237
11238 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11239 {
11240 void* memory = alloc_node();
11241 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11242 }
11243
11244 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11245 {
11246 void* memory = alloc_node();
11247 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11248 }
11249
11250 const char_t* alloc_string(const xpath_lexer_string& value)
11251 {
11252 if (!value.begin)
11253 return PUGIXML_TEXT("");
11254
11255 size_t length = static_cast<size_t>(value.end - value.begin);
11256
11257 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11258 if (!c) return 0;
11259
11260 memcpy(c, value.begin, length * sizeof(char_t));
11261 c[length] = 0;
11262
11263 return c;
11264 }
11265
11266 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11267 {
11268 switch (name.begin[0])
11269 {
11270 case 'b':
11271 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11272 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11273
11274 break;
11275
11276 case 'c':
11277 if (name == PUGIXML_TEXT("count") && argc == 1)
11278 {
11279 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11280 return alloc_node(ast_func_count, xpath_type_number, args[0]);
11281 }
11282 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11283 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11284 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11285 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11286 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11287 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11288
11289 break;
11290
11291 case 'f':
11292 if (name == PUGIXML_TEXT("false") && argc == 0)
11293 return alloc_node(ast_func_false, xpath_type_boolean);
11294 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11295 return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11296
11297 break;
11298
11299 case 'i':
11300 if (name == PUGIXML_TEXT("id") && argc == 1)
11301 return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11302
11303 break;
11304
11305 case 'l':
11306 if (name == PUGIXML_TEXT("last") && argc == 0)
11307 return alloc_node(ast_func_last, xpath_type_number);
11308 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11309 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11310 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11311 {
11312 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11313 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11314 }
11315
11316 break;
11317
11318 case 'n':
11319 if (name == PUGIXML_TEXT("name") && argc <= 1)
11320 {
11321 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11322 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11323 }
11324 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11325 {
11326 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11327 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11328 }
11329 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11330 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11331 else if (name == PUGIXML_TEXT("not") && argc == 1)
11332 return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11333 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11334 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11335
11336 break;
11337
11338 case 'p':
11339 if (name == PUGIXML_TEXT("position") && argc == 0)
11340 return alloc_node(ast_func_position, xpath_type_number);
11341
11342 break;
11343
11344 case 'r':
11345 if (name == PUGIXML_TEXT("round") && argc == 1)
11346 return alloc_node(ast_func_round, xpath_type_number, args[0]);
11347
11348 break;
11349
11350 case 's':
11351 if (name == PUGIXML_TEXT("string") && argc <= 1)
11352 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11353 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11354 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11355 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11356 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11357 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11358 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11359 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11360 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11361 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11362 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11363 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11364 {
11365 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11366 return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11367 }
11368
11369 break;
11370
11371 case 't':
11372 if (name == PUGIXML_TEXT("translate") && argc == 3)
11373 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11374 else if (name == PUGIXML_TEXT("true") && argc == 0)
11375 return alloc_node(ast_func_true, xpath_type_boolean);
11376
11377 break;
11378
11379 default:
11380 break;
11381 }
11382
11383 return error("Unrecognized function or wrong parameter count");
11384 }
11385
11386 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11387 {
11388 specified = true;
11389
11390 switch (name.begin[0])
11391 {
11392 case 'a':
11393 if (name == PUGIXML_TEXT("ancestor"))
11394 return axis_ancestor;
11395 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11396 return axis_ancestor_or_self;
11397 else if (name == PUGIXML_TEXT("attribute"))
11398 return axis_attribute;
11399
11400 break;
11401
11402 case 'c':
11403 if (name == PUGIXML_TEXT("child"))
11404 return axis_child;
11405
11406 break;
11407
11408 case 'd':
11409 if (name == PUGIXML_TEXT("descendant"))
11410 return axis_descendant;
11411 else if (name == PUGIXML_TEXT("descendant-or-self"))
11412 return axis_descendant_or_self;
11413
11414 break;
11415
11416 case 'f':
11417 if (name == PUGIXML_TEXT("following"))
11418 return axis_following;
11419 else if (name == PUGIXML_TEXT("following-sibling"))
11420 return axis_following_sibling;
11421
11422 break;
11423
11424 case 'n':
11425 if (name == PUGIXML_TEXT("namespace"))
11426 return axis_namespace;
11427
11428 break;
11429
11430 case 'p':
11431 if (name == PUGIXML_TEXT("parent"))
11432 return axis_parent;
11433 else if (name == PUGIXML_TEXT("preceding"))
11434 return axis_preceding;
11435 else if (name == PUGIXML_TEXT("preceding-sibling"))
11436 return axis_preceding_sibling;
11437
11438 break;
11439
11440 case 's':
11441 if (name == PUGIXML_TEXT("self"))
11442 return axis_self;
11443
11444 break;
11445
11446 default:
11447 break;
11448 }
11449
11450 specified = false;
11451 return axis_child;
11452 }
11453
11454 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11455 {
11456 switch (name.begin[0])
11457 {
11458 case 'c':
11459 if (name == PUGIXML_TEXT("comment"))
11460 return nodetest_type_comment;
11461
11462 break;
11463
11464 case 'n':
11465 if (name == PUGIXML_TEXT("node"))
11466 return nodetest_type_node;
11467
11468 break;
11469
11470 case 'p':
11471 if (name == PUGIXML_TEXT("processing-instruction"))
11472 return nodetest_type_pi;
11473
11474 break;
11475
11476 case 't':
11477 if (name == PUGIXML_TEXT("text"))
11478 return nodetest_type_text;
11479
11480 break;
11481
11482 default:
11483 break;
11484 }
11485
11486 return nodetest_none;
11487 }
11488
11489 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11490 xpath_ast_node* parse_primary_expression()
11491 {
11492 switch (_lexer.current())
11493 {
11494 case lex_var_ref:
11495 {
11496 xpath_lexer_string name = _lexer.contents();
11497
11498 if (!_variables)
11499 return error("Unknown variable: variable set is not provided");
11500
11501 xpath_variable* var = 0;
11502 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11503 return error_oom();
11504
11505 if (!var)
11506 return error("Unknown variable: variable set does not contain the given name");
11507
11508 _lexer.next();
11509
11510 return alloc_node(ast_variable, var->type(), var);
11511 }
11512
11513 case lex_open_brace:
11514 {
11515 _lexer.next();
11516
11517 xpath_ast_node* n = parse_expression();
11518 if (!n) return 0;
11519
11520 if (_lexer.current() != lex_close_brace)
11521 return error("Expected ')' to match an opening '('");
11522
11523 _lexer.next();
11524
11525 return n;
11526 }
11527
11528 case lex_quoted_string:
11529 {
11530 const char_t* value = alloc_string(_lexer.contents());
11531 if (!value) return 0;
11532
11533 _lexer.next();
11534
11535 return alloc_node(ast_string_constant, xpath_type_string, value);
11536 }
11537
11538 case lex_number:
11539 {
11540 double value = 0;
11541
11542 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11543 return error_oom();
11544
11545 _lexer.next();
11546
11547 return alloc_node(ast_number_constant, xpath_type_number, value);
11548 }
11549
11550 case lex_string:
11551 {
11552 xpath_ast_node* args[2] = {0};
11553 size_t argc = 0;
11554
11555 xpath_lexer_string function = _lexer.contents();
11556 _lexer.next();
11557
11558 xpath_ast_node* last_arg = 0;
11559
11560 if (_lexer.current() != lex_open_brace)
11561 return error("Unrecognized function call");
11562 _lexer.next();
11563
11564 size_t old_depth = _depth;
11565
11566 while (_lexer.current() != lex_close_brace)
11567 {
11568 if (argc > 0)
11569 {
11570 if (_lexer.current() != lex_comma)
11571 return error("No comma between function arguments");
11572 _lexer.next();
11573 }
11574
11575 if (++_depth > xpath_ast_depth_limit)
11576 return error_rec();
11577
11578 xpath_ast_node* n = parse_expression();
11579 if (!n) return 0;
11580
11581 if (argc < 2) args[argc] = n;
11582 else last_arg->set_next(n);
11583
11584 argc++;
11585 last_arg = n;
11586 }
11587
11588 _lexer.next();
11589
11590 _depth = old_depth;
11591
11592 return parse_function(function, argc, args);
11593 }
11594
11595 default:
11596 return error("Unrecognizable primary expression");
11597 }
11598 }
11599
11600 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11601 // Predicate ::= '[' PredicateExpr ']'
11602 // PredicateExpr ::= Expr
11603 xpath_ast_node* parse_filter_expression()
11604 {
11605 xpath_ast_node* n = parse_primary_expression();
11606 if (!n) return 0;
11607
11608 size_t old_depth = _depth;
11609
11610 while (_lexer.current() == lex_open_square_brace)
11611 {
11612 _lexer.next();
11613
11614 if (++_depth > xpath_ast_depth_limit)
11615 return error_rec();
11616
11617 if (n->rettype() != xpath_type_node_set)
11618 return error("Predicate has to be applied to node set");
11619
11620 xpath_ast_node* expr = parse_expression();
11621 if (!expr) return 0;
11622
11623 n = alloc_node(ast_filter, n, expr, predicate_default);
11624 if (!n) return 0;
11625
11626 if (_lexer.current() != lex_close_square_brace)
11627 return error("Expected ']' to match an opening '['");
11628
11629 _lexer.next();
11630 }
11631
11632 _depth = old_depth;
11633
11634 return n;
11635 }
11636
11637 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11638 // AxisSpecifier ::= AxisName '::' | '@'?
11639 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11640 // NameTest ::= '*' | NCName ':' '*' | QName
11641 // AbbreviatedStep ::= '.' | '..'
11642 xpath_ast_node* parse_step(xpath_ast_node* set)
11643 {
11644 if (set && set->rettype() != xpath_type_node_set)
11645 return error("Step has to be applied to node set");
11646
11647 bool axis_specified = false;
11648 axis_t axis = axis_child; // implied child axis
11649
11650 if (_lexer.current() == lex_axis_attribute)
11651 {
11652 axis = axis_attribute;
11653 axis_specified = true;
11654
11655 _lexer.next();
11656 }
11657 else if (_lexer.current() == lex_dot)
11658 {
11659 _lexer.next();
11660
11661 if (_lexer.current() == lex_open_square_brace)
11662 return error("Predicates are not allowed after an abbreviated step");
11663
11664 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11665 }
11666 else if (_lexer.current() == lex_double_dot)
11667 {
11668 _lexer.next();
11669
11670 if (_lexer.current() == lex_open_square_brace)
11671 return error("Predicates are not allowed after an abbreviated step");
11672
11673 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11674 }
11675
11676 nodetest_t nt_type = nodetest_none;
11677 xpath_lexer_string nt_name;
11678
11679 if (_lexer.current() == lex_string)
11680 {
11681 // node name test
11682 nt_name = _lexer.contents();
11683 _lexer.next();
11684
11685 // was it an axis name?
11686 if (_lexer.current() == lex_double_colon)
11687 {
11688 // parse axis name
11689 if (axis_specified)
11690 return error("Two axis specifiers in one step");
11691
11692 axis = parse_axis_name(nt_name, axis_specified);
11693
11694 if (!axis_specified)
11695 return error("Unknown axis");
11696
11697 // read actual node test
11698 _lexer.next();
11699
11700 if (_lexer.current() == lex_multiply)
11701 {
11702 nt_type = nodetest_all;
11703 nt_name = xpath_lexer_string();
11704 _lexer.next();
11705 }
11706 else if (_lexer.current() == lex_string)
11707 {
11708 nt_name = _lexer.contents();
11709 _lexer.next();
11710 }
11711 else
11712 {
11713 return error("Unrecognized node test");
11714 }
11715 }
11716
11717 if (nt_type == nodetest_none)
11718 {
11719 // node type test or processing-instruction
11720 if (_lexer.current() == lex_open_brace)
11721 {
11722 _lexer.next();
11723
11724 if (_lexer.current() == lex_close_brace)
11725 {
11726 _lexer.next();
11727
11728 nt_type = parse_node_test_type(nt_name);
11729
11730 if (nt_type == nodetest_none)
11731 return error("Unrecognized node type");
11732
11733 nt_name = xpath_lexer_string();
11734 }
11735 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11736 {
11737 if (_lexer.current() != lex_quoted_string)
11738 return error("Only literals are allowed as arguments to processing-instruction()");
11739
11740 nt_type = nodetest_pi;
11741 nt_name = _lexer.contents();
11742 _lexer.next();
11743
11744 if (_lexer.current() != lex_close_brace)
11745 return error("Unmatched brace near processing-instruction()");
11746 _lexer.next();
11747 }
11748 else
11749 {
11750 return error("Unmatched brace near node type test");
11751 }
11752 }
11753 // QName or NCName:*
11754 else
11755 {
11756 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11757 {
11758 nt_name.end--; // erase *
11759
11760 nt_type = nodetest_all_in_namespace;
11761 }
11762 else
11763 {
11764 nt_type = nodetest_name;
11765 }
11766 }
11767 }
11768 }
11769 else if (_lexer.current() == lex_multiply)
11770 {
11771 nt_type = nodetest_all;
11772 _lexer.next();
11773 }
11774 else
11775 {
11776 return error("Unrecognized node test");
11777 }
11778
11779 const char_t* nt_name_copy = alloc_string(nt_name);
11780 if (!nt_name_copy) return 0;
11781
11782 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11783 if (!n) return 0;
11784
11785 size_t old_depth = _depth;
11786
11787 xpath_ast_node* last = 0;
11788
11789 while (_lexer.current() == lex_open_square_brace)
11790 {
11791 _lexer.next();
11792
11793 if (++_depth > xpath_ast_depth_limit)
11794 return error_rec();
11795
11796 xpath_ast_node* expr = parse_expression();
11797 if (!expr) return 0;
11798
11799 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11800 if (!pred) return 0;
11801
11802 if (_lexer.current() != lex_close_square_brace)
11803 return error("Expected ']' to match an opening '['");
11804 _lexer.next();
11805
11806 if (last) last->set_next(pred);
11807 else n->set_right(pred);
11808
11809 last = pred;
11810 }
11811
11812 _depth = old_depth;
11813
11814 return n;
11815 }
11816
11817 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11818 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11819 {
11820 xpath_ast_node* n = parse_step(set);
11821 if (!n) return 0;
11822
11823 size_t old_depth = _depth;
11824
11825 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11826 {
11827 lexeme_t l = _lexer.current();
11828 _lexer.next();
11829
11830 if (l == lex_double_slash)
11831 {
11832 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11833 if (!n) return 0;
11834
11835 ++_depth;
11836 }
11837
11838 if (++_depth > xpath_ast_depth_limit)
11839 return error_rec();
11840
11841 n = parse_step(n);
11842 if (!n) return 0;
11843 }
11844
11845 _depth = old_depth;
11846
11847 return n;
11848 }
11849
11850 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11851 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11852 xpath_ast_node* parse_location_path()
11853 {
11854 if (_lexer.current() == lex_slash)
11855 {
11856 _lexer.next();
11857
11858 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11859 if (!n) return 0;
11860
11861 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11862 lexeme_t l = _lexer.current();
11863
11864 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11865 return parse_relative_location_path(n);
11866 else
11867 return n;
11868 }
11869 else if (_lexer.current() == lex_double_slash)
11870 {
11871 _lexer.next();
11872
11873 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11874 if (!n) return 0;
11875
11876 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11877 if (!n) return 0;
11878
11879 return parse_relative_location_path(n);
11880 }
11881
11882 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11883 return parse_relative_location_path(0);
11884 }
11885
11886 // PathExpr ::= LocationPath
11887 // | FilterExpr
11888 // | FilterExpr '/' RelativeLocationPath
11889 // | FilterExpr '//' RelativeLocationPath
11890 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11891 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11892 xpath_ast_node* parse_path_or_unary_expression()
11893 {
11894 // Clarification.
11895 // PathExpr begins with either LocationPath or FilterExpr.
11896 // FilterExpr begins with PrimaryExpr
11897 // PrimaryExpr begins with '$' in case of it being a variable reference,
11898 // '(' in case of it being an expression, string literal, number constant or
11899 // function call.
11900 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11901 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11902 _lexer.current() == lex_string)
11903 {
11904 if (_lexer.current() == lex_string)
11905 {
11906 // This is either a function call, or not - if not, we shall proceed with location path
11907 const char_t* state = _lexer.state();
11908
11909 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11910
11911 if (*state != '(')
11912 return parse_location_path();
11913
11914 // This looks like a function call; however this still can be a node-test. Check it.
11915 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11916 return parse_location_path();
11917 }
11918
11919 xpath_ast_node* n = parse_filter_expression();
11920 if (!n) return 0;
11921
11922 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11923 {
11924 lexeme_t l = _lexer.current();
11925 _lexer.next();
11926
11927 if (l == lex_double_slash)
11928 {
11929 if (n->rettype() != xpath_type_node_set)
11930 return error("Step has to be applied to node set");
11931
11932 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11933 if (!n) return 0;
11934 }
11935
11936 // select from location path
11937 return parse_relative_location_path(n);
11938 }
11939
11940 return n;
11941 }
11942 else if (_lexer.current() == lex_minus)
11943 {
11944 _lexer.next();
11945
11946 // precedence 7+ - only parses union expressions
11947 xpath_ast_node* n = parse_expression(7);
11948 if (!n) return 0;
11949
11950 return alloc_node(ast_op_negate, xpath_type_number, n);
11951 }
11952 else
11953 {
11954 return parse_location_path();
11955 }
11956 }
11957
11958 struct binary_op_t
11959 {
11960 ast_type_t asttype;
11961 xpath_value_type rettype;
11962 int precedence;
11963
11964 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11965 {
11966 }
11967
11968 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11969 {
11970 }
11971
11972 static binary_op_t parse(xpath_lexer& lexer)
11973 {
11974 switch (lexer.current())
11975 {
11976 case lex_string:
11977 if (lexer.contents() == PUGIXML_TEXT("or"))
11978 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11979 else if (lexer.contents() == PUGIXML_TEXT("and"))
11980 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11981 else if (lexer.contents() == PUGIXML_TEXT("div"))
11982 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11983 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11984 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11985 else
11986 return binary_op_t();
11987
11988 case lex_equal:
11989 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11990
11991 case lex_not_equal:
11992 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11993
11994 case lex_less:
11995 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11996
11997 case lex_greater:
11998 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11999
12000 case lex_less_or_equal:
12001 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
12002
12003 case lex_greater_or_equal:
12004 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
12005
12006 case lex_plus:
12007 return binary_op_t(ast_op_add, xpath_type_number, 5);
12008
12009 case lex_minus:
12010 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12011
12012 case lex_multiply:
12013 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12014
12015 case lex_union:
12016 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12017
12018 default:
12019 return binary_op_t();
12020 }
12021 }
12022 };
12023
12024 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12025 {
12026 binary_op_t op = binary_op_t::parse(_lexer);
12027
12028 while (op.asttype != ast_unknown && op.precedence >= limit)
12029 {
12030 _lexer.next();
12031
12032 if (++_depth > xpath_ast_depth_limit)
12033 return error_rec();
12034
12035 xpath_ast_node* rhs = parse_path_or_unary_expression();
12036 if (!rhs) return 0;
12037
12038 binary_op_t nextop = binary_op_t::parse(_lexer);
12039
12040 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12041 {
12042 rhs = parse_expression_rec(rhs, nextop.precedence);
12043 if (!rhs) return 0;
12044
12045 nextop = binary_op_t::parse(_lexer);
12046 }
12047
12048 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12049 return error("Union operator has to be applied to node sets");
12050
12051 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12052 if (!lhs) return 0;
12053
12054 op = binary_op_t::parse(_lexer);
12055 }
12056
12057 return lhs;
12058 }
12059
12060 // Expr ::= OrExpr
12061 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12062 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12063 // EqualityExpr ::= RelationalExpr
12064 // | EqualityExpr '=' RelationalExpr
12065 // | EqualityExpr '!=' RelationalExpr
12066 // RelationalExpr ::= AdditiveExpr
12067 // | RelationalExpr '<' AdditiveExpr
12068 // | RelationalExpr '>' AdditiveExpr
12069 // | RelationalExpr '<=' AdditiveExpr
12070 // | RelationalExpr '>=' AdditiveExpr
12071 // AdditiveExpr ::= MultiplicativeExpr
12072 // | AdditiveExpr '+' MultiplicativeExpr
12073 // | AdditiveExpr '-' MultiplicativeExpr
12074 // MultiplicativeExpr ::= UnaryExpr
12075 // | MultiplicativeExpr '*' UnaryExpr
12076 // | MultiplicativeExpr 'div' UnaryExpr
12077 // | MultiplicativeExpr 'mod' UnaryExpr
12078 xpath_ast_node* parse_expression(int limit = 0)
12079 {
12080 size_t old_depth = _depth;
12081
12082 if (++_depth > xpath_ast_depth_limit)
12083 return error_rec();
12084
12085 xpath_ast_node* n = parse_path_or_unary_expression();
12086 if (!n) return 0;
12087
12088 n = parse_expression_rec(n, limit);
12089
12090 _depth = old_depth;
12091
12092 return n;
12093 }
12094
12095 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12096 {
12097 }
12098
12099 xpath_ast_node* parse()
12100 {
12101 xpath_ast_node* n = parse_expression();
12102 if (!n) return 0;
12103
12104 assert(_depth == 0);
12105
12106 // check if there are unparsed tokens left
12107 if (_lexer.current() != lex_eof)
12108 return error("Incorrect query");
12109
12110 return n;
12111 }
12112
12113 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12114 {
12115 xpath_parser parser(query, variables, alloc, result);
12116
12117 return parser.parse();
12118 }
12119 };
12120
12121 struct xpath_query_impl
12122 {
12123 static xpath_query_impl* create()
12124 {
12125 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12126 if (!memory) return 0;
12127
12128 return new (memory) xpath_query_impl();
12129 }
12130
12131 static void destroy(xpath_query_impl* impl)
12132 {
12133 // free all allocated pages
12134 impl->alloc.release();
12135
12136 // free allocator memory (with the first page)
12137 xml_memory::deallocate(impl);
12138 }
12139
12140 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12141 {
12142 block.next = 0;
12143 block.capacity = sizeof(block.data);
12144 }
12145
12146 xpath_ast_node* root;
12147 xpath_allocator alloc;
12148 xpath_memory_block block;
12149 bool oom;
12150 };
12151
12152 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12153 {
12154 if (!impl) return 0;
12155
12156 if (impl->root->rettype() != xpath_type_node_set)
12157 {
12158 #ifdef PUGIXML_NO_EXCEPTIONS
12159 return 0;
12160 #else
12161 xpath_parse_result res;
12162 res.error = "Expression does not evaluate to node set";
12163
12164 throw xpath_exception(res);
12165 #endif
12166 }
12167
12168 return impl->root;
12169 }
12170PUGI__NS_END
12171
12172namespace pugi
12173{
12174#ifndef PUGIXML_NO_EXCEPTIONS
12175 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12176 {
12177 assert(_result.error);
12178 }
12179
12180 PUGI__FN const char* xpath_exception::what() const throw()
12181 {
12182 return _result.error;
12183 }
12184
12185 PUGI__FN const xpath_parse_result& xpath_exception::result() const
12186 {
12187 return _result;
12188 }
12189#endif
12190
12191 PUGI__FN xpath_node::xpath_node()
12192 {
12193 }
12194
12195 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12196 {
12197 }
12198
12199 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12200 {
12201 }
12202
12203 PUGI__FN xml_node xpath_node::node() const
12204 {
12205 return _attribute ? xml_node() : _node;
12206 }
12207
12208 PUGI__FN xml_attribute xpath_node::attribute() const
12209 {
12210 return _attribute;
12211 }
12212
12213 PUGI__FN xml_node xpath_node::parent() const
12214 {
12215 return _attribute ? _node : _node.parent();
12216 }
12217
12218 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
12219 {
12220 }
12221
12222 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
12223 {
12224 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12225 }
12226
12227 PUGI__FN bool xpath_node::operator!() const
12228 {
12229 return !(_node || _attribute);
12230 }
12231
12232 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
12233 {
12234 return _node == n._node && _attribute == n._attribute;
12235 }
12236
12237 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
12238 {
12239 return _node != n._node || _attribute != n._attribute;
12240 }
12241
12242#ifdef __BORLANDC__
12243 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12244 {
12245 return (bool)lhs && rhs;
12246 }
12247
12248 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12249 {
12250 return (bool)lhs || rhs;
12251 }
12252#endif
12253
12254 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12255 {
12256 assert(begin_ <= end_);
12257
12258 size_t size_ = static_cast<size_t>(end_ - begin_);
12259
12260 // use internal buffer for 0 or 1 elements, heap buffer otherwise
12261 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12262
12263 if (!storage)
12264 {
12265 #ifdef PUGIXML_NO_EXCEPTIONS
12266 return;
12267 #else
12268 throw std::bad_alloc();
12269 #endif
12270 }
12271
12272 // deallocate old buffer
12273 if (_begin != _storage)
12274 impl::xml_memory::deallocate(_begin);
12275
12276 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12277 if (size_)
12278 memcpy(storage, begin_, size_ * sizeof(xpath_node));
12279
12280 _begin = storage;
12281 _end = storage + size_;
12282 _type = type_;
12283 }
12284
12285#ifdef PUGIXML_HAS_MOVE
12286 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12287 {
12288 _type = rhs._type;
12289 _storage[0] = rhs._storage[0];
12290 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12291 _end = _begin + (rhs._end - rhs._begin);
12292
12293 rhs._type = type_unsorted;
12294 rhs._begin = rhs._storage;
12295 rhs._end = rhs._storage;
12296 }
12297#endif
12298
12299 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12300 {
12301 }
12302
12303 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12304 {
12305 _assign(begin_, end_, type_);
12306 }
12307
12308 PUGI__FN xpath_node_set::~xpath_node_set()
12309 {
12310 if (_begin != _storage)
12311 impl::xml_memory::deallocate(_begin);
12312 }
12313
12314 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12315 {
12316 _assign(ns._begin, ns._end, ns._type);
12317 }
12318
12319 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12320 {
12321 if (this == &ns) return *this;
12322
12323 _assign(ns._begin, ns._end, ns._type);
12324
12325 return *this;
12326 }
12327
12328#ifdef PUGIXML_HAS_MOVE
12329 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12330 {
12331 _move(rhs);
12332 }
12333
12334 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12335 {
12336 if (this == &rhs) return *this;
12337
12338 if (_begin != _storage)
12339 impl::xml_memory::deallocate(_begin);
12340
12341 _move(rhs);
12342
12343 return *this;
12344 }
12345#endif
12346
12347 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12348 {
12349 return _type;
12350 }
12351
12352 PUGI__FN size_t xpath_node_set::size() const
12353 {
12354 return _end - _begin;
12355 }
12356
12357 PUGI__FN bool xpath_node_set::empty() const
12358 {
12359 return _begin == _end;
12360 }
12361
12362 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12363 {
12364 assert(index < size());
12365 return _begin[index];
12366 }
12367
12368 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12369 {
12370 return _begin;
12371 }
12372
12373 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12374 {
12375 return _end;
12376 }
12377
12378 PUGI__FN void xpath_node_set::sort(bool reverse)
12379 {
12380 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12381 }
12382
12383 PUGI__FN xpath_node xpath_node_set::first() const
12384 {
12385 return impl::xpath_first(_begin, _end, _type);
12386 }
12387
12388 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12389 {
12390 }
12391
12392 PUGI__FN xpath_parse_result::operator bool() const
12393 {
12394 return error == 0;
12395 }
12396
12397 PUGI__FN const char* xpath_parse_result::description() const
12398 {
12399 return error ? error : "No error";
12400 }
12401
12402 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12403 {
12404 }
12405
12406 PUGI__FN const char_t* xpath_variable::name() const
12407 {
12408 switch (_type)
12409 {
12410 case xpath_type_node_set:
12411 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12412
12413 case xpath_type_number:
12414 return static_cast<const impl::xpath_variable_number*>(this)->name;
12415
12416 case xpath_type_string:
12417 return static_cast<const impl::xpath_variable_string*>(this)->name;
12418
12419 case xpath_type_boolean:
12420 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12421
12422 default:
12423 assert(false && "Invalid variable type"); // unreachable
12424 return 0;
12425 }
12426 }
12427
12428 PUGI__FN xpath_value_type xpath_variable::type() const
12429 {
12430 return _type;
12431 }
12432
12433 PUGI__FN bool xpath_variable::get_boolean() const
12434 {
12435 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12436 }
12437
12438 PUGI__FN double xpath_variable::get_number() const
12439 {
12440 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12441 }
12442
12443 PUGI__FN const char_t* xpath_variable::get_string() const
12444 {
12445 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12446 return value ? value : PUGIXML_TEXT("");
12447 }
12448
12449 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12450 {
12451 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12452 }
12453
12454 PUGI__FN bool xpath_variable::set(bool value)
12455 {
12456 if (_type != xpath_type_boolean) return false;
12457
12458 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12459 return true;
12460 }
12461
12462 PUGI__FN bool xpath_variable::set(double value)
12463 {
12464 if (_type != xpath_type_number) return false;
12465
12466 static_cast<impl::xpath_variable_number*>(this)->value = value;
12467 return true;
12468 }
12469
12470 PUGI__FN bool xpath_variable::set(const char_t* value)
12471 {
12472 if (_type != xpath_type_string) return false;
12473
12474 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12475
12476 // duplicate string
12477 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12478
12479 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12480 if (!copy) return false;
12481
12482 memcpy(copy, value, size);
12483
12484 // replace old string
12485 if (var->value) impl::xml_memory::deallocate(var->value);
12486 var->value = copy;
12487
12488 return true;
12489 }
12490
12491 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12492 {
12493 if (_type != xpath_type_node_set) return false;
12494
12495 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12496 return true;
12497 }
12498
12499 PUGI__FN xpath_variable_set::xpath_variable_set()
12500 {
12501 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12502 _data[i] = 0;
12503 }
12504
12505 PUGI__FN xpath_variable_set::~xpath_variable_set()
12506 {
12507 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12508 _destroy(_data[i]);
12509 }
12510
12511 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12512 {
12513 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12514 _data[i] = 0;
12515
12516 _assign(rhs);
12517 }
12518
12519 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12520 {
12521 if (this == &rhs) return *this;
12522
12523 _assign(rhs);
12524
12525 return *this;
12526 }
12527
12528#ifdef PUGIXML_HAS_MOVE
12529 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12530 {
12531 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12532 {
12533 _data[i] = rhs._data[i];
12534 rhs._data[i] = 0;
12535 }
12536 }
12537
12538 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12539 {
12540 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12541 {
12542 _destroy(_data[i]);
12543
12544 _data[i] = rhs._data[i];
12545 rhs._data[i] = 0;
12546 }
12547
12548 return *this;
12549 }
12550#endif
12551
12552 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12553 {
12554 xpath_variable_set temp;
12555
12556 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12557 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12558 return;
12559
12560 _swap(temp);
12561 }
12562
12563 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12564 {
12565 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12566 {
12567 xpath_variable* chain = _data[i];
12568
12569 _data[i] = rhs._data[i];
12570 rhs._data[i] = chain;
12571 }
12572 }
12573
12574 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12575 {
12576 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12577 size_t hash = impl::hash_string(name) % hash_size;
12578
12579 // look for existing variable
12580 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12581 if (impl::strequal(var->name(), name))
12582 return var;
12583
12584 return 0;
12585 }
12586
12587 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12588 {
12589 xpath_variable* last = 0;
12590
12591 while (var)
12592 {
12593 // allocate storage for new variable
12594 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12595 if (!nvar) return false;
12596
12597 // link the variable to the result immediately to handle failures gracefully
12598 if (last)
12599 last->_next = nvar;
12600 else
12601 *out_result = nvar;
12602
12603 last = nvar;
12604
12605 // copy the value; this can fail due to out-of-memory conditions
12606 if (!impl::copy_xpath_variable(nvar, var)) return false;
12607
12608 var = var->_next;
12609 }
12610
12611 return true;
12612 }
12613
12614 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12615 {
12616 while (var)
12617 {
12618 xpath_variable* next = var->_next;
12619
12620 impl::delete_xpath_variable(var->_type, var);
12621
12622 var = next;
12623 }
12624 }
12625
12626 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12627 {
12628 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12629 size_t hash = impl::hash_string(name) % hash_size;
12630
12631 // look for existing variable
12632 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12633 if (impl::strequal(var->name(), name))
12634 return var->type() == type ? var : 0;
12635
12636 // add new variable
12637 xpath_variable* result = impl::new_xpath_variable(type, name);
12638
12639 if (result)
12640 {
12641 result->_next = _data[hash];
12642
12643 _data[hash] = result;
12644 }
12645
12646 return result;
12647 }
12648
12649 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12650 {
12651 xpath_variable* var = add(name, xpath_type_boolean);
12652 return var ? var->set(value) : false;
12653 }
12654
12655 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12656 {
12657 xpath_variable* var = add(name, xpath_type_number);
12658 return var ? var->set(value) : false;
12659 }
12660
12661 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12662 {
12663 xpath_variable* var = add(name, xpath_type_string);
12664 return var ? var->set(value) : false;
12665 }
12666
12667 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12668 {
12669 xpath_variable* var = add(name, xpath_type_node_set);
12670 return var ? var->set(value) : false;
12671 }
12672
12673 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12674 {
12675 return _find(name);
12676 }
12677
12678 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12679 {
12680 return _find(name);
12681 }
12682
12683 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12684 {
12685 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12686
12687 if (!qimpl)
12688 {
12689 #ifdef PUGIXML_NO_EXCEPTIONS
12690 _result.error = "Out of memory";
12691 #else
12692 throw std::bad_alloc();
12693 #endif
12694 }
12695 else
12696 {
12697 using impl::auto_deleter; // MSVC7 workaround
12698 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12699
12700 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12701
12702 if (qimpl->root)
12703 {
12704 qimpl->root->optimize(&qimpl->alloc);
12705
12706 _impl = impl.release();
12707 _result.error = 0;
12708 }
12709 else
12710 {
12711 #ifdef PUGIXML_NO_EXCEPTIONS
12712 if (qimpl->oom) _result.error = "Out of memory";
12713 #else
12714 if (qimpl->oom) throw std::bad_alloc();
12715 throw xpath_exception(_result);
12716 #endif
12717 }
12718 }
12719 }
12720
12721 PUGI__FN xpath_query::xpath_query(): _impl(0)
12722 {
12723 }
12724
12725 PUGI__FN xpath_query::~xpath_query()
12726 {
12727 if (_impl)
12728 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12729 }
12730
12731#ifdef PUGIXML_HAS_MOVE
12732 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12733 {
12734 _impl = rhs._impl;
12735 _result = rhs._result;
12736 rhs._impl = 0;
12737 rhs._result = xpath_parse_result();
12738 }
12739
12740 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12741 {
12742 if (this == &rhs) return *this;
12743
12744 if (_impl)
12745 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12746
12747 _impl = rhs._impl;
12748 _result = rhs._result;
12749 rhs._impl = 0;
12750 rhs._result = xpath_parse_result();
12751
12752 return *this;
12753 }
12754#endif
12755
12756 PUGI__FN xpath_value_type xpath_query::return_type() const
12757 {
12758 if (!_impl) return xpath_type_none;
12759
12760 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12761 }
12762
12763 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12764 {
12765 if (!_impl) return false;
12766
12767 impl::xpath_context c(n, 1, 1);
12768 impl::xpath_stack_data sd;
12769
12770 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12771
12772 if (sd.oom)
12773 {
12774 #ifdef PUGIXML_NO_EXCEPTIONS
12775 return false;
12776 #else
12777 throw std::bad_alloc();
12778 #endif
12779 }
12780
12781 return r;
12782 }
12783
12784 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12785 {
12786 if (!_impl) return impl::gen_nan();
12787
12788 impl::xpath_context c(n, 1, 1);
12789 impl::xpath_stack_data sd;
12790
12791 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12792
12793 if (sd.oom)
12794 {
12795 #ifdef PUGIXML_NO_EXCEPTIONS
12796 return impl::gen_nan();
12797 #else
12798 throw std::bad_alloc();
12799 #endif
12800 }
12801
12802 return r;
12803 }
12804
12805#ifndef PUGIXML_NO_STL
12806 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12807 {
12808 if (!_impl) return string_t();
12809
12810 impl::xpath_context c(n, 1, 1);
12811 impl::xpath_stack_data sd;
12812
12813 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12814
12815 if (sd.oom)
12816 {
12817 #ifdef PUGIXML_NO_EXCEPTIONS
12818 return string_t();
12819 #else
12820 throw std::bad_alloc();
12821 #endif
12822 }
12823
12824 return string_t(r.c_str(), r.length());
12825 }
12826#endif
12827
12828 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12829 {
12830 impl::xpath_context c(n, 1, 1);
12831 impl::xpath_stack_data sd;
12832
12833 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12834
12835 if (sd.oom)
12836 {
12837 #ifdef PUGIXML_NO_EXCEPTIONS
12838 r = impl::xpath_string();
12839 #else
12840 throw std::bad_alloc();
12841 #endif
12842 }
12843
12844 size_t full_size = r.length() + 1;
12845
12846 if (capacity > 0)
12847 {
12848 size_t size = (full_size < capacity) ? full_size : capacity;
12849 assert(size > 0);
12850
12851 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12852 buffer[size - 1] = 0;
12853 }
12854
12855 return full_size;
12856 }
12857
12858 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12859 {
12860 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12861 if (!root) return xpath_node_set();
12862
12863 impl::xpath_context c(n, 1, 1);
12864 impl::xpath_stack_data sd;
12865
12866 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12867
12868 if (sd.oom)
12869 {
12870 #ifdef PUGIXML_NO_EXCEPTIONS
12871 return xpath_node_set();
12872 #else
12873 throw std::bad_alloc();
12874 #endif
12875 }
12876
12877 return xpath_node_set(r.begin(), r.end(), r.type());
12878 }
12879
12880 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12881 {
12882 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12883 if (!root) return xpath_node();
12884
12885 impl::xpath_context c(n, 1, 1);
12886 impl::xpath_stack_data sd;
12887
12888 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12889
12890 if (sd.oom)
12891 {
12892 #ifdef PUGIXML_NO_EXCEPTIONS
12893 return xpath_node();
12894 #else
12895 throw std::bad_alloc();
12896 #endif
12897 }
12898
12899 return r.first();
12900 }
12901
12902 PUGI__FN const xpath_parse_result& xpath_query::result() const
12903 {
12904 return _result;
12905 }
12906
12907 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12908 {
12909 }
12910
12911 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12912 {
12913 return _impl ? unspecified_bool_xpath_query : 0;
12914 }
12915
12916 PUGI__FN bool xpath_query::operator!() const
12917 {
12918 return !_impl;
12919 }
12920
12921 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12922 {
12923 xpath_query q(query, variables);
12924 return q.evaluate_node(*this);
12925 }
12926
12927 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12928 {
12929 return query.evaluate_node(*this);
12930 }
12931
12932 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12933 {
12934 xpath_query q(query, variables);
12935 return q.evaluate_node_set(*this);
12936 }
12937
12938 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12939 {
12940 return query.evaluate_node_set(*this);
12941 }
12942
12943 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12944 {
12945 xpath_query q(query, variables);
12946 return q.evaluate_node(*this);
12947 }
12948
12949 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12950 {
12951 return query.evaluate_node(*this);
12952 }
12953}
12954
12955#endif
12956
12957#ifdef __BORLANDC__
12958# pragma option pop
12959#endif
12960
12961// Intel C++ does not properly keep warning state for function templates,
12962// so popping warning state at the end of translation unit leads to warnings in the middle.
12963#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12964# pragma warning(pop)
12965#endif
12966
12967#if defined(_MSC_VER) && defined(__c2__)
12968# pragma clang diagnostic pop
12969#endif
12970
12971// Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12972#undef PUGI__NO_INLINE
12973#undef PUGI__UNLIKELY
12974#undef PUGI__STATIC_ASSERT
12975#undef PUGI__DMC_VOLATILE
12976#undef PUGI__UNSIGNED_OVERFLOW
12977#undef PUGI__MSVC_CRT_VERSION
12978#undef PUGI__SNPRINTF
12979#undef PUGI__NS_BEGIN
12980#undef PUGI__NS_END
12981#undef PUGI__FN
12982#undef PUGI__FN_NO_INLINE
12983#undef PUGI__GETHEADER_IMPL
12984#undef PUGI__GETPAGE_IMPL
12985#undef PUGI__GETPAGE
12986#undef PUGI__NODETYPE
12987#undef PUGI__IS_CHARTYPE_IMPL
12988#undef PUGI__IS_CHARTYPE
12989#undef PUGI__IS_CHARTYPEX
12990#undef PUGI__ENDSWITH
12991#undef PUGI__SKIPWS
12992#undef PUGI__OPTSET
12993#undef PUGI__PUSHNODE
12994#undef PUGI__POPNODE
12995#undef PUGI__SCANFOR
12996#undef PUGI__SCANWHILE
12997#undef PUGI__SCANWHILE_UNROLL
12998#undef PUGI__ENDSEG
12999#undef PUGI__THROW_ERROR
13000#undef PUGI__CHECK_ERROR
13001
13002#endif
13003
STL class.