diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2023-08-24 17:00:31 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2023-08-24 17:00:31 -0400 |
| commit | 076e104b8672924c3d80cd1da2fdb5ebee1766ac (patch) | |
| tree | e33a8081c61899c5d1a471401605e55716ca3ff4 /include | |
| parent | 1cb522b36382381ef3f1494f24b0c6a98f8843a9 (diff) | |
| download | dynamic-extension-076e104b8672924c3d80cd1da2fdb5ebee1766ac.tar.gz | |
Migrated over to using psudb-common utilities/headers
Diffstat (limited to 'include')
| -rw-r--r-- | include/ds/Alias.h | 72 | ||||
| -rw-r--r-- | include/ds/BTree.h | 3924 | ||||
| -rw-r--r-- | include/ds/BitArray.h | 67 | ||||
| -rw-r--r-- | include/ds/BloomFilter.h | 75 | ||||
| -rw-r--r-- | include/ds/PriorityQueue.h | 162 | ||||
| -rw-r--r-- | include/framework/DynamicExtension.h | 4 | ||||
| -rw-r--r-- | include/framework/MutableBuffer.h | 15 | ||||
| -rw-r--r-- | include/framework/RecordInterface.h | 5 | ||||
| -rw-r--r-- | include/shard/Alex.h | 10 | ||||
| -rw-r--r-- | include/shard/MemISAM.h | 12 | ||||
| -rw-r--r-- | include/shard/PGM.h | 10 | ||||
| -rw-r--r-- | include/shard/TrieSpline.h | 10 | ||||
| -rw-r--r-- | include/shard/VPTree.h | 10 | ||||
| -rw-r--r-- | include/shard/WIRS.h | 12 | ||||
| -rw-r--r-- | include/shard/WSS.h | 12 | ||||
| -rw-r--r-- | include/util/Cursor.h | 14 | ||||
| -rw-r--r-- | include/util/base.h | 73 | ||||
| -rw-r--r-- | include/util/bf_config.h | 2 | ||||
| -rw-r--r-- | include/util/hash.h | 61 | ||||
| -rw-r--r-- | include/util/timer.h | 37 | ||||
| -rw-r--r-- | include/util/types.h | 2 |
21 files changed, 77 insertions, 4512 deletions
diff --git a/include/ds/Alias.h b/include/ds/Alias.h deleted file mode 100644 index 855dc75..0000000 --- a/include/ds/Alias.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * include/ds/Alias.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <gsl/gsl_rng.h> -#include <vector> - -namespace de { - -/* - * An implementation of Walker's Alias Structure for Weighted Set Sampling. Requires - * that the input weight vector is already normalized. - */ -class Alias { -public: - Alias(const std::vector<double>& weights) - : m_alias(weights.size()), m_cutoff(weights.size()) { - size_t n = weights.size(); - auto overfull = std::vector<size_t>(); - auto underfull = std::vector<size_t>(); - overfull.reserve(n); - underfull.reserve(n); - - // initialize the probability_table with n*p(i) as well as the overfull and - // underfull lists. - for (size_t i = 0; i < n; i++) { - m_cutoff[i] = (double) n * weights[i]; - if (m_cutoff[i] > 1) { - overfull.emplace_back(i); - } else if (m_cutoff[i] < 1) { - underfull.emplace_back(i); - } else { - m_alias[i] = i; - } - } - - while (overfull.size() > 0 && underfull.size() > 0) { - auto i = overfull.back(); overfull.pop_back(); - auto j = underfull.back(); underfull.pop_back(); - - m_alias[j] = i; - m_cutoff[i] = m_cutoff[i] + m_cutoff[j] - 1.0; - - if (m_cutoff[i] > 1.0) { - overfull.emplace_back(i); - } else if (m_cutoff[i] < 1.0) { - underfull.emplace_back(i); - } - } - } - - size_t get(const gsl_rng* rng) { - double coin1 = gsl_rng_uniform(rng); - double coin2 = gsl_rng_uniform(rng); - - size_t k = ((double) m_alias.size()) * coin1; - return coin2 < m_cutoff[k] ? k : m_alias[k]; - } - -private: - std::vector<size_t> m_alias; - std::vector<double> m_cutoff; -}; - -} diff --git a/include/ds/BTree.h b/include/ds/BTree.h deleted file mode 100644 index 4cecd99..0000000 --- a/include/ds/BTree.h +++ /dev/null @@ -1,3924 +0,0 @@ -/******************************************************************************* - * tlx/container/btree.hpp - * - * Part of tlx - http://panthema.net/tlx - * - * Copyright (C) 2008-2017 Timo Bingmann <tb@panthema.net> - * - * Range Query functionality added by Dong Xie - * - * All rights reserved. Published under the Boost Software License, Version 1.0 - ******************************************************************************/ - -#ifndef TLX_CONTAINER_BTREE_HEADER -#define TLX_CONTAINER_BTREE_HEADER - -// *** Required Headers from the STL - -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <functional> -#include <istream> -#include <memory> -#include <ostream> -#include <utility> -#include <iostream> -#include <numeric> - -#include <gsl/gsl_rng.h> - - -namespace tlx { - -//! \addtogroup tlx_container -//! \{ -//! \defgroup tlx_container_btree B+ Trees -//! B+ tree variants -//! \{ - -// *** Debugging Macros - -#ifdef TLX_BTREE_DEBUG - -#include <iostream> - -//! Print out debug information to std::cout if TLX_BTREE_DEBUG is defined. -#define TLX_BTREE_PRINT(x) \ - do { if (debug) (std::cout << x << std::endl); } while (0) - -//! Assertion only if TLX_BTREE_DEBUG is defined. This is not used in verify(). -#define TLX_BTREE_ASSERT(x) \ - do { assert(x); } while (0) - -#else - -//! Print out debug information to std::cout if TLX_BTREE_DEBUG is defined. -#define TLX_BTREE_PRINT(x) do { } while (0) - -//! Assertion only if TLX_BTREE_DEBUG is defined. This is not used in verify(). -#define TLX_BTREE_ASSERT(x) do { } while (0) - -#endif - -//! The maximum of a and b. Used in some compile-time formulas. -#define TLX_BTREE_MAX(a, b) ((a) < (b) ? (b) : (a)) - -#ifndef TLX_BTREE_FRIENDS -//! The macro TLX_BTREE_FRIENDS can be used by outside class to access the B+ -//! tree internals. This was added for wxBTreeDemo to be able to draw the -//! tree. -#define TLX_BTREE_FRIENDS friend class btree_friend -#endif - -/*! - * Generates default traits for a B+ tree used as a set or map. It estimates - * leaf and inner node sizes by assuming a cache line multiple of 256 bytes. -*/ -template <typename Key, typename Value> -struct btree_default_traits { - //! If true, the tree will self verify its invariants after each insert() or - //! erase(). The header must have been compiled with TLX_BTREE_DEBUG - //! defined. - static const bool self_verify = false; - - //! If true, the tree will print out debug information and a tree dump - //! during insert() or erase() operation. The header must have been - //! compiled with TLX_BTREE_DEBUG defined and key_type must be std::ostream - //! printable. - static const bool debug = false; - - //! Number of slots in each leaf of the tree. Estimated so that each node - //! has a size of about 256 bytes. - static const int leaf_slots = 16; - //TLX_BTREE_MAX(4, 256 / (sizeof(Value))); - - //! Number of slots in each inner node of the tree. Estimated so that each - //! node has a size of about 256 bytes. - static const int inner_slots = 16; - //TLX_BTREE_MAX(4, 256 / (sizeof(Key) + sizeof(void*))); - - //! As of stx-btree-0.9, the code does linear search in find_lower() and - //! find_upper() instead of binary_search, unless the node size is larger - //! than this threshold. See notes at - //! http://panthema.net/2013/0504-STX-B+Tree-Binary-vs-Linear-Search - static const size_t binsearch_threshold = 256; -}; - -/*! - * Basic class implementing a B+ tree data structure in memory. - * - * The base implementation of an in-memory B+ tree. It is based on the - * implementation in Cormen's Introduction into Algorithms, Jan Jannink's paper - * and other algorithm resources. Almost all STL-required function calls are - * implemented. The asymptotic time requirements of the STL are not always - * fulfilled in theory, however, in practice this B+ tree performs better than a - * red-black tree and almost always uses less memory. The insertion function - * splits the nodes on the recursion unroll. Erase is largely based on Jannink's - * ideas. - * - * This class is specialized into btree_set, btree_multiset, btree_map and - * btree_multimap using default template parameters and facade functions. - */ -template <typename Key, typename Value, - typename KeyOfValue, - typename Compare = std::less<Key>, - typename Traits = btree_default_traits<Key, Value>, - bool Duplicates = true, - typename Allocator = std::allocator<Value> > -class BTree -{ -public: - //! \name Template Parameter Types - //! \{ - - //! First template parameter: The key type of the B+ tree. This is stored in - //! inner nodes. - typedef Key key_type; - - //! Second template parameter: Composition pair of key and data types, or - //! just the key for set containers. This data type is stored in the leaves. - typedef Value value_type; - - //! Third template: key extractor class to pull key_type from value_type. - typedef KeyOfValue key_of_value; - - //! Fourth template parameter: key_type comparison function object - typedef Compare key_compare; - - //! Fifth template parameter: Traits object used to define more parameters - //! of the B+ tree - typedef Traits traits; - - //! Sixth template parameter: Allow duplicate keys in the B+ tree. Used to - //! implement multiset and multimap. - static const bool allow_duplicates = Duplicates; - - //! Seventh template parameter: STL allocator for tree nodes - typedef Allocator allocator_type; - - //! \} - - // The macro TLX_BTREE_FRIENDS can be used by outside class to access the B+ - // tree internals. This was added for wxBTreeDemo to be able to draw the - // tree. - TLX_BTREE_FRIENDS; - -public: - //! \name Constructed Types - //! \{ - - //! Typedef of our own type - typedef BTree<key_type, value_type, key_of_value, key_compare, - traits, allow_duplicates, allocator_type> Self; - - //! Size type used to count keys - typedef size_t size_type; - - //! \} - -public: - //! \name Static Constant Options and Values of the B+ Tree - //! \{ - - //! Base B+ tree parameter: The number of key/data slots in each leaf - static const unsigned short leaf_slotmax = traits::leaf_slots; - - //! Base B+ tree parameter: The number of key slots in each inner node, - //! this can differ from slots in each leaf. - static const unsigned short inner_slotmax = traits::inner_slots; - - //! Computed B+ tree parameter: The minimum number of key/data slots used - //! in a leaf. If fewer slots are used, the leaf will be merged or slots - //! shifted from it's siblings. - static const unsigned short leaf_slotmin = (leaf_slotmax / 2); - - //! Computed B+ tree parameter: The minimum number of key slots used - //! in an inner node. If fewer slots are used, the inner node will be - //! merged or slots shifted from it's siblings. - static const unsigned short inner_slotmin = (inner_slotmax / 2); - - //! Debug parameter: Enables expensive and thorough checking of the B+ tree - //! invariants after each insert/erase operation. - static const bool self_verify = traits::self_verify; - - //! Debug parameter: Prints out lots of debug information about how the - //! algorithms change the tree. Requires the header file to be compiled - //! with TLX_BTREE_DEBUG and the key type must be std::ostream printable. - static const bool debug = traits::debug; - - //! \} - -private: - //! \name Node Classes for In-Memory Nodes - //! \{ - - //! The header structure of each node in-memory. This structure is extended - //! by InnerNode or LeafNode. - struct node { - //! Level in the b-tree, if level == 0 -> leaf node - unsigned short level; - - //! Number of key slotuse use, so the number of valid children or data - //! pointers - unsigned short slotuse; - - //! Delayed initialisation of constructed node. - void initialize(const unsigned short l) { - level = l; - slotuse = 0; - } - - //! True if this is a leaf node. - bool is_leafnode() const { - return (level == 0); - } - }; - - //! Extended structure of a inner node in-memory. Contains only keys and no - //! data items. - struct InnerNode : public node { - //! Define an related allocator for the InnerNode structs. - typedef typename std::allocator_traits<Allocator>::template rebind_alloc<InnerNode> alloc_type; - - //! Keys of children or data pointers - key_type slotkey[inner_slotmax]; // NOLINT - - //! Pointers to children - node* childid[inner_slotmax + 1]; // NOLINT - - double weight[inner_slotmax + 1]; - - //! Set variables to initial values. - void initialize(const unsigned short l) { - node::initialize(l); - weight[0] = 0.0; - } - - //! Return key in slot s - const key_type& key(size_t s) const { - return slotkey[s]; - } - - //! True if the node's slots are full. - bool is_full() const { - return (node::slotuse == inner_slotmax); - } - - //! True if few used entries, less than half full. - bool is_few() const { - return (node::slotuse <= inner_slotmin); - } - - //! True if node has too few entries. - bool is_underflow() const { - return (node::slotuse < inner_slotmin); - } - }; - - //! Extended structure of a leaf node in memory. Contains pairs of keys and - //! data items. Key and data slots are kept together in value_type. - struct LeafNode : public node { - //! Define an related allocator for the LeafNode structs. - typedef typename std::allocator_traits<Allocator>::template rebind_alloc<LeafNode> alloc_type; - - //! Double linked list pointers to traverse the leaves - LeafNode* prev_leaf; - - //! Double linked list pointers to traverse the leaves - LeafNode* next_leaf; - - //! Array of (key, data) pairs - value_type slotdata[leaf_slotmax]; // NOLINT - - double weight[leaf_slotmax]; - - //! Set variables to initial values - void initialize() { - node::initialize(0); - prev_leaf = next_leaf = nullptr; - } - - //! Return key in slot s. - const key_type& key(size_t s) const { - return key_of_value::get(slotdata[s]); - } - - //! True if the node's slots are full. - bool is_full() const { - return (node::slotuse == leaf_slotmax); - } - - //! True if few used entries, less than half full. - bool is_few() const { - return (node::slotuse <= leaf_slotmin); - } - - //! True if node has too few entries. - bool is_underflow() const { - return (node::slotuse < leaf_slotmin); - } - - //! Set the (key,data) pair in slot. Overloaded function used by - //! bulk_load(). - void set_slot(unsigned short slot, const value_type& value) { - TLX_BTREE_ASSERT(slot < node::slotuse); - slotdata[slot] = value; - } - }; - - static double calculate_weight(const node* n) { - double res = 0.0; - if (!n) return 0; - else if (n->is_leafnode()) { - for (unsigned short s = 0; s < n->slotuse; ++s) - res += static_cast<const LeafNode*>(n)->weight[s]; - } else - for (unsigned short s = 0; s <= n->slotuse; ++s) { - res += static_cast<const InnerNode*>(n)->weight[s]; - } - return res; - } - - //! \} - -public: - //! \name Iterators and Reverse Iterators - //! \{ - - class iterator; - class const_iterator; - class reverse_iterator; - class const_reverse_iterator; - - //! STL-like iterator object for B+ tree items. The iterator points to a - //! specific slot number in a leaf. - class iterator - { - public: - // *** Types - - //! The key type of the btree. Returned by key(). - typedef typename BTree::key_type key_type; - - //! The value type of the btree. Returned by operator*(). - typedef typename BTree::value_type value_type; - - //! Reference to the value_type. STL required. - typedef value_type& reference; - - //! Pointer to the value_type. STL required. - typedef value_type* pointer; - - //! STL-magic iterator category - typedef std::bidirectional_iterator_tag iterator_category; - - //! STL-magic - typedef ptrdiff_t difference_type; - - //! Our own type - typedef iterator self; - - private: - // *** Members - - //! The currently referenced leaf node of the tree - typename BTree::LeafNode* curr_leaf; - - //! Current key/data slot referenced - unsigned short curr_slot; - - //! Friendly to the const_iterator, so it may access the two data items - //! directly. - friend class const_iterator; - - //! Also friendly to the reverse_iterator, so it may access the two - //! data items directly. - friend class reverse_iterator; - - //! Also friendly to the const_reverse_iterator, so it may access the - //! two data items directly. - friend class const_reverse_iterator; - - //! Also friendly to the base btree class, because erase_iter() needs - //! to read the curr_leaf and curr_slot values directly. - friend class BTree<key_type, value_type, key_of_value, key_compare, - traits, allow_duplicates, allocator_type>; - - // The macro TLX_BTREE_FRIENDS can be used by outside class to access - // the B+ tree internals. This was added for wxBTreeDemo to be able to - // draw the tree. - TLX_BTREE_FRIENDS; - - public: - // *** Methods - - //! Default-Constructor of a mutable iterator - iterator() - : curr_leaf(nullptr), curr_slot(0) - { } - - //! Initializing-Constructor of a mutable iterator - iterator(typename BTree::LeafNode* l, unsigned short s) - : curr_leaf(l), curr_slot(s) - { } - - //! Copy-constructor from a reverse iterator - iterator(const reverse_iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Dereference the iterator. - reference operator * () const { - return curr_leaf->slotdata[curr_slot]; - } - - //! Dereference the iterator. - pointer operator -> () const { - return &curr_leaf->slotdata[curr_slot]; - } - - //! Key of the current slot. - const key_type& key() const { - return curr_leaf->key(curr_slot); - } - - //! Prefix++ advance the iterator to the next slot. - iterator& operator ++ () { - if (curr_slot + 1u < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 0; - } - else { - // this is end() - curr_slot = curr_leaf->slotuse; - } - - return *this; - } - - //! Postfix++ advance the iterator to the next slot. - iterator operator ++ (int) { - iterator tmp = *this; // copy ourselves - - if (curr_slot + 1u < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 0; - } - else { - // this is end() - curr_slot = curr_leaf->slotuse; - } - - return tmp; - } - - //! Prefix-- backstep the iterator to the last slot. - iterator& operator -- () { - if (curr_slot > 0) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse - 1; - } - else { - // this is begin() - curr_slot = 0; - } - - return *this; - } - - //! Postfix-- backstep the iterator to the last slot. - iterator operator -- (int) { - iterator tmp = *this; // copy ourselves - - if (curr_slot > 0) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse - 1; - } - else { - // this is begin() - curr_slot = 0; - } - - return tmp; - } - - //! Equality of iterators. - bool operator == (const iterator& x) const { - return (x.curr_leaf == curr_leaf) && (x.curr_slot == curr_slot); - } - - //! Inequality of iterators. - bool operator != (const iterator& x) const { - return (x.curr_leaf != curr_leaf) || (x.curr_slot != curr_slot); - } - }; - - //! STL-like read-only iterator object for B+ tree items. The iterator - //! points to a specific slot number in a leaf. - class const_iterator - { - public: - // *** Types - - //! The key type of the btree. Returned by key(). - typedef typename BTree::key_type key_type; - - //! The value type of the btree. Returned by operator*(). - typedef typename BTree::value_type value_type; - - //! Reference to the value_type. STL required. - typedef const value_type& reference; - - //! Pointer to the value_type. STL required. - typedef const value_type* pointer; - - //! STL-magic iterator category - typedef std::bidirectional_iterator_tag iterator_category; - - //! STL-magic - typedef ptrdiff_t difference_type; - - //! Our own type - typedef const_iterator self; - - private: - // *** Members - - //! The currently referenced leaf node of the tree - const typename BTree::LeafNode* curr_leaf; - - //! Current key/data slot referenced - unsigned short curr_slot; - - //! Friendly to the reverse_const_iterator, so it may access the two - //! data items directly - friend class const_reverse_iterator; - - // The macro TLX_BTREE_FRIENDS can be used by outside class to access - // the B+ tree internals. This was added for wxBTreeDemo to be able to - // draw the tree. - TLX_BTREE_FRIENDS; - - public: - // *** Methods - - //! Default-Constructor of a const iterator - const_iterator() - : curr_leaf(nullptr), curr_slot(0) - { } - - //! Initializing-Constructor of a const iterator - const_iterator(const typename BTree::LeafNode* l, unsigned short s) - : curr_leaf(l), curr_slot(s) - { } - - //! Copy-constructor from a mutable iterator - const_iterator(const iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Copy-constructor from a mutable reverse iterator - const_iterator(const reverse_iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Copy-constructor from a const reverse iterator - const_iterator(const const_reverse_iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Dereference the iterator. - reference operator * () const { - return curr_leaf->slotdata[curr_slot]; - } - - //! Dereference the iterator. - pointer operator -> () const { - return &curr_leaf->slotdata[curr_slot]; - } - - //! Key of the current slot. - const key_type& key() const { - return curr_leaf->key(curr_slot); - } - - //! Prefix++ advance the iterator to the next slot. - const_iterator& operator ++ () { - if (curr_slot + 1u < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 0; - } - else { - // this is end() - curr_slot = curr_leaf->slotuse; - } - - return *this; - } - - //! Postfix++ advance the iterator to the next slot. - const_iterator operator ++ (int) { - const_iterator tmp = *this; // copy ourselves - - if (curr_slot + 1u < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 0; - } - else { - // this is end() - curr_slot = curr_leaf->slotuse; - } - - return tmp; - } - - //! Prefix-- backstep the iterator to the last slot. - const_iterator& operator -- () { - if (curr_slot > 0) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse - 1; - } - else { - // this is begin() - curr_slot = 0; - } - - return *this; - } - - //! Postfix-- backstep the iterator to the last slot. - const_iterator operator -- (int) { - const_iterator tmp = *this; // copy ourselves - - if (curr_slot > 0) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse - 1; - } - else { - // this is begin() - curr_slot = 0; - } - - return tmp; - } - - //! Equality of iterators. - bool operator == (const const_iterator& x) const { - return (x.curr_leaf == curr_leaf) && (x.curr_slot == curr_slot); - } - - //! Inequality of iterators. - bool operator != (const const_iterator& x) const { - return (x.curr_leaf != curr_leaf) || (x.curr_slot != curr_slot); - } - }; - - //! STL-like mutable reverse iterator object for B+ tree items. The - //! iterator points to a specific slot number in a leaf. - class reverse_iterator - { - public: - // *** Types - - //! The key type of the btree. Returned by key(). - typedef typename BTree::key_type key_type; - - //! The value type of the btree. Returned by operator*(). - typedef typename BTree::value_type value_type; - - //! Reference to the value_type. STL required. - typedef value_type& reference; - - //! Pointer to the value_type. STL required. - typedef value_type* pointer; - - //! STL-magic iterator category - typedef std::bidirectional_iterator_tag iterator_category; - - //! STL-magic - typedef ptrdiff_t difference_type; - - //! Our own type - typedef reverse_iterator self; - - private: - // *** Members - - //! The currently referenced leaf node of the tree - typename BTree::LeafNode* curr_leaf; - - //! One slot past the current key/data slot referenced. - unsigned short curr_slot; - - //! Friendly to the const_iterator, so it may access the two data items - //! directly - friend class iterator; - - //! Also friendly to the const_iterator, so it may access the two data - //! items directly - friend class const_iterator; - - //! Also friendly to the const_iterator, so it may access the two data - //! items directly - friend class const_reverse_iterator; - - // The macro TLX_BTREE_FRIENDS can be used by outside class to access - // the B+ tree internals. This was added for wxBTreeDemo to be able to - // draw the tree. - TLX_BTREE_FRIENDS; - - public: - // *** Methods - - //! Default-Constructor of a reverse iterator - reverse_iterator() - : curr_leaf(nullptr), curr_slot(0) - { } - - //! Initializing-Constructor of a mutable reverse iterator - reverse_iterator(typename BTree::LeafNode* l, unsigned short s) - : curr_leaf(l), curr_slot(s) - { } - - //! Copy-constructor from a mutable iterator - reverse_iterator(const iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Dereference the iterator. - reference operator * () const { - TLX_BTREE_ASSERT(curr_slot > 0); - return curr_leaf->slotdata[curr_slot - 1]; - } - - //! Dereference the iterator. - pointer operator -> () const { - TLX_BTREE_ASSERT(curr_slot > 0); - return &curr_leaf->slotdata[curr_slot - 1]; - } - - //! Key of the current slot. - const key_type& key() const { - TLX_BTREE_ASSERT(curr_slot > 0); - return curr_leaf->key(curr_slot - 1); - } - - //! Prefix++ advance the iterator to the next slot. - reverse_iterator& operator ++ () { - if (curr_slot > 1) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse; - } - else { - // this is begin() == rend() - curr_slot = 0; - } - - return *this; - } - - //! Postfix++ advance the iterator to the next slot. - reverse_iterator operator ++ (int) { - reverse_iterator tmp = *this; // copy ourselves - - if (curr_slot > 1) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse; - } - else { - // this is begin() == rend() - curr_slot = 0; - } - - return tmp; - } - - //! Prefix-- backstep the iterator to the last slot. - reverse_iterator& operator -- () { - if (curr_slot < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 1; - } - else { - // this is end() == rbegin() - curr_slot = curr_leaf->slotuse; - } - - return *this; - } - - //! Postfix-- backstep the iterator to the last slot. - reverse_iterator operator -- (int) { - reverse_iterator tmp = *this; // copy ourselves - - if (curr_slot < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 1; - } - else { - // this is end() == rbegin() - curr_slot = curr_leaf->slotuse; - } - - return tmp; - } - - //! Equality of iterators. - bool operator == (const reverse_iterator& x) const { - return (x.curr_leaf == curr_leaf) && (x.curr_slot == curr_slot); - } - - //! Inequality of iterators. - bool operator != (const reverse_iterator& x) const { - return (x.curr_leaf != curr_leaf) || (x.curr_slot != curr_slot); - } - }; - - //! STL-like read-only reverse iterator object for B+ tree items. The - //! iterator points to a specific slot number in a leaf. - class const_reverse_iterator - { - public: - // *** Types - - //! The key type of the btree. Returned by key(). - typedef typename BTree::key_type key_type; - - //! The value type of the btree. Returned by operator*(). - typedef typename BTree::value_type value_type; - - //! Reference to the value_type. STL required. - typedef const value_type& reference; - - //! Pointer to the value_type. STL required. - typedef const value_type* pointer; - - //! STL-magic iterator category - typedef std::bidirectional_iterator_tag iterator_category; - - //! STL-magic - typedef ptrdiff_t difference_type; - - //! Our own type - typedef const_reverse_iterator self; - - private: - // *** Members - - //! The currently referenced leaf node of the tree - const typename BTree::LeafNode* curr_leaf; - - //! One slot past the current key/data slot referenced. - unsigned short curr_slot; - - //! Friendly to the const_iterator, so it may access the two data items - //! directly. - friend class reverse_iterator; - - // The macro TLX_BTREE_FRIENDS can be used by outside class to access - // the B+ tree internals. This was added for wxBTreeDemo to be able to - // draw the tree. - TLX_BTREE_FRIENDS; - - public: - // *** Methods - - //! Default-Constructor of a const reverse iterator. - const_reverse_iterator() - : curr_leaf(nullptr), curr_slot(0) - { } - - //! Initializing-Constructor of a const reverse iterator. - const_reverse_iterator( - const typename BTree::LeafNode* l, unsigned short s) - : curr_leaf(l), curr_slot(s) - { } - - //! Copy-constructor from a mutable iterator. - const_reverse_iterator(const iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Copy-constructor from a const iterator. - const_reverse_iterator(const const_iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Copy-constructor from a mutable reverse iterator. - const_reverse_iterator(const reverse_iterator& it) // NOLINT - : curr_leaf(it.curr_leaf), curr_slot(it.curr_slot) - { } - - //! Dereference the iterator. - reference operator * () const { - TLX_BTREE_ASSERT(curr_slot > 0); - return curr_leaf->slotdata[curr_slot - 1]; - } - - //! Dereference the iterator. - pointer operator -> () const { - TLX_BTREE_ASSERT(curr_slot > 0); - return &curr_leaf->slotdata[curr_slot - 1]; - } - - //! Key of the current slot. - const key_type& key() const { - TLX_BTREE_ASSERT(curr_slot > 0); - return curr_leaf->key(curr_slot - 1); - } - - //! Prefix++ advance the iterator to the previous slot. - const_reverse_iterator& operator ++ () { - if (curr_slot > 1) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse; - } - else { - // this is begin() == rend() - curr_slot = 0; - } - - return *this; - } - - //! Postfix++ advance the iterator to the previous slot. - const_reverse_iterator operator ++ (int) { - const_reverse_iterator tmp = *this; // copy ourselves - - if (curr_slot > 1) { - --curr_slot; - } - else if (curr_leaf->prev_leaf != nullptr) { - curr_leaf = curr_leaf->prev_leaf; - curr_slot = curr_leaf->slotuse; - } - else { - // this is begin() == rend() - curr_slot = 0; - } - - return tmp; - } - - //! Prefix-- backstep the iterator to the next slot. - const_reverse_iterator& operator -- () { - if (curr_slot < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 1; - } - else { - // this is end() == rbegin() - curr_slot = curr_leaf->slotuse; - } - - return *this; - } - - //! Postfix-- backstep the iterator to the next slot. - const_reverse_iterator operator -- (int) { - const_reverse_iterator tmp = *this; // copy ourselves - - if (curr_slot < curr_leaf->slotuse) { - ++curr_slot; - } - else if (curr_leaf->next_leaf != nullptr) { - curr_leaf = curr_leaf->next_leaf; - curr_slot = 1; - } - else { - // this is end() == rbegin() - curr_slot = curr_leaf->slotuse; - } - - return tmp; - } - - //! Equality of iterators. - bool operator == (const const_reverse_iterator& x) const { - return (x.curr_leaf == curr_leaf) && (x.curr_slot == curr_slot); - } - - //! Inequality of iterators. - bool operator != (const const_reverse_iterator& x) const { - return (x.curr_leaf != curr_leaf) || (x.curr_slot != curr_slot); - } - }; - - //! \} - -public: - //! \name Small Statistics Structure - //! \{ - - /*! - * A small struct containing basic statistics about the B+ tree. It can be - * fetched using get_stats(). - */ - struct tree_stats { - //! Number of items in the B+ tree - size_type size; - - //! Number of leaves in the B+ tree - size_type leaves; - - //! Number of inner nodes in the B+ tree - size_type inner_nodes; - - //! Base B+ tree parameter: The number of key/data slots in each leaf - static const unsigned short leaf_slots = Self::leaf_slotmax; - - //! Base B+ tree parameter: The number of key slots in each inner node. - static const unsigned short inner_slots = Self::inner_slotmax; - - //! Zero initialized - tree_stats() - : size(0), - leaves(0), inner_nodes(0) - { } - - //! Return the total number of nodes - size_type nodes() const { - return inner_nodes + leaves; - } - - //! Return the average fill of leaves - double avgfill_leaves() const { - return static_cast<double>(size) / (leaves * leaf_slots); - } - }; - - //! \} - -private: - //! \name Tree Object Data Members - //! \{ - - //! Pointer to the B+ tree's root node, either leaf or inner node. - node* root_; - - //! Pointer to first leaf in the double linked leaf chain. - LeafNode* head_leaf_; - - //! Pointer to last leaf in the double linked leaf chain. - LeafNode* tail_leaf_; - - //! Other small statistics about the B+ tree. - tree_stats stats_; - - //! Key comparison object. More comparison functions are generated from - //! this < relation. - key_compare key_less_; - - //! Memory allocator. - allocator_type allocator_; - - //! \} - -public: - //! \name Constructors and Destructor - //! \{ - - //! Default constructor initializing an empty B+ tree with the standard key - //! comparison function. - explicit BTree(const allocator_type& alloc = allocator_type()) - : root_(nullptr), head_leaf_(nullptr), tail_leaf_(nullptr), - allocator_(alloc) - { } - - //! Constructor initializing an empty B+ tree with a special key - //! comparison object. - explicit BTree(const key_compare& kcf, - const allocator_type& alloc = allocator_type()) - : root_(nullptr), head_leaf_(nullptr), tail_leaf_(nullptr), - key_less_(kcf), allocator_(alloc) - { } - - //! Constructor initializing a B+ tree with the range [first,last). The - //! range need not be sorted. To create a B+ tree from a sorted range, use - //! bulk_load(). - template <class InputIterator> - BTree(InputIterator first, InputIterator last, - const allocator_type& alloc = allocator_type()) - : root_(nullptr), head_leaf_(nullptr), tail_leaf_(nullptr), - allocator_(alloc) { - insert(first, last); - } - - //! Constructor initializing a B+ tree with the range [first,last) and a - //! special key comparison object. The range need not be sorted. To create - //! a B+ tree from a sorted range, use bulk_load(). - template <class InputIterator> - BTree(InputIterator first, InputIterator last, const key_compare& kcf, - const allocator_type& alloc = allocator_type()) - : root_(nullptr), head_leaf_(nullptr), tail_leaf_(nullptr), - key_less_(kcf), allocator_(alloc) { - insert(first, last); - } - - //! Frees up all used B+ tree memory pages - ~BTree() { - clear(); - } - - //! Fast swapping of two identical B+ tree objects. - void swap(BTree& from) { - std::swap(root_, from.root_); - std::swap(head_leaf_, from.head_leaf_); - std::swap(tail_leaf_, from.tail_leaf_); - std::swap(stats_, from.stats_); - std::swap(key_less_, from.key_less_); - std::swap(allocator_, from.allocator_); - } - - //! \} - -public: - //! \name Key and Value Comparison Function Objects - //! \{ - - //! Function class to compare value_type objects. Required by the STL - class value_compare - { - protected: - //! Key comparison function from the template parameter - key_compare key_comp; - - //! Constructor called from BTree::value_comp() - explicit value_compare(key_compare kc) - : key_comp(kc) - { } - - //! Friendly to the btree class so it may call the constructor - friend class BTree<key_type, value_type, key_of_value, key_compare, - traits, allow_duplicates, allocator_type>; - - public: - //! Function call "less"-operator resulting in true if x < y. - bool operator () (const value_type& x, const value_type& y) const { - return key_comp(x.first, y.first); - } - }; - - //! Constant access to the key comparison object sorting the B+ tree. - key_compare key_comp() const { - return key_less_; - } - - //! Constant access to a constructed value_type comparison object. Required - //! by the STL. - value_compare value_comp() const { - return value_compare(key_less_); - } - - //! \} - -private: - //! \name Convenient Key Comparison Functions Generated From key_less - //! \{ - - //! True if a < b ? "constructed" from key_less_() - bool key_less(const key_type& a, const key_type& b) const { - return key_less_(a, b); - } - - //! True if a <= b ? constructed from key_less() - bool key_lessequal(const key_type& a, const key_type& b) const { - return !key_less_(b, a); - } - - //! True if a > b ? constructed from key_less() - bool key_greater(const key_type& a, const key_type& b) const { - return key_less_(b, a); - } - - //! True if a >= b ? constructed from key_less() - bool key_greaterequal(const key_type& a, const key_type& b) const { - return !key_less_(a, b); - } - - //! True if a == b ? constructed from key_less(). This requires the < - //! relation to be a total order, otherwise the B+ tree cannot be sorted. - bool key_equal(const key_type& a, const key_type& b) const { - return !key_less_(a, b) && !key_less_(b, a); - } - - //! \} - -public: - //! \name Allocators - //! \{ - - //! Return the base node allocator provided during construction. - allocator_type get_allocator() const { - return allocator_; - } - - //! \} - -private: - //! \name Node Object Allocation and Deallocation Functions - //! \{ - - //! Return an allocator for LeafNode objects. - typename LeafNode::alloc_type leaf_node_allocator() { - return typename LeafNode::alloc_type(allocator_); - } - - //! Return an allocator for InnerNode objects. - typename InnerNode::alloc_type inner_node_allocator() { - return typename InnerNode::alloc_type(allocator_); - } - - //! Allocate and initialize a leaf node - LeafNode * allocate_leaf() { - LeafNode* n = new (leaf_node_allocator().allocate(1)) LeafNode(); - n->initialize(); - stats_.leaves++; - return n; - } - - //! Allocate and initialize an inner node - InnerNode * allocate_inner(unsigned short level) { - InnerNode* n = new (inner_node_allocator().allocate(1)) InnerNode(); - n->initialize(level); - stats_.inner_nodes++; - return n; - } - - //! Correctly free either inner or leaf node, destructs all contained key - //! and value objects. - void free_node(node* n) { - if (n->is_leafnode()) { - LeafNode* ln = static_cast<LeafNode*>(n); - typename LeafNode::alloc_type a(leaf_node_allocator()); - std::allocator_traits<typename LeafNode::alloc_type>::destroy(a, ln); - std::allocator_traits<typename LeafNode::alloc_type>::deallocate(a, ln, 1); - stats_.leaves--; - } - else { - InnerNode* in = static_cast<InnerNode*>(n); - typename InnerNode::alloc_type a(inner_node_allocator()); - std::allocator_traits<typename InnerNode::alloc_type>::destroy(a, in); - std::allocator_traits<typename InnerNode::alloc_type>::deallocate(a, in, 1); - stats_.inner_nodes--; - } - } - - //! \} - -public: - //! \name Fast Destruction of the B+ Tree - //! \{ - - //! Frees all key/data pairs and all nodes of the tree. - void clear() { - if (root_) - { - clear_recursive(root_); - free_node(root_); - - root_ = nullptr; - head_leaf_ = tail_leaf_ = nullptr; - - stats_ = tree_stats(); - } - - TLX_BTREE_ASSERT(stats_.size == 0); - } - -private: - //! Recursively free up nodes. - void clear_recursive(node* n) { - if (n->is_leafnode()) - { - LeafNode* leafnode = static_cast<LeafNode*>(n); - - for (unsigned short slot = 0; slot < leafnode->slotuse; ++slot) - { - // data objects are deleted by LeafNode's destructor - } - } - else - { - InnerNode* innernode = static_cast<InnerNode*>(n); - - for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot) - { - clear_recursive(innernode->childid[slot]); - free_node(innernode->childid[slot]); - } - } - } - - //! \} - -public: - //! \name STL Iterator Construction Functions - //! \{ - - //! Constructs a read/data-write iterator that points to the first slot in - //! the first leaf of the B+ tree. - iterator begin() { - return iterator(head_leaf_, 0); - } - - //! Constructs a read/data-write iterator that points to the first invalid - //! slot in the last leaf of the B+ tree. - iterator end() { - return iterator(tail_leaf_, tail_leaf_ ? tail_leaf_->slotuse : 0); - } - - //! Constructs a read-only constant iterator that points to the first slot - //! in the first leaf of the B+ tree. - const_iterator begin() const { - return const_iterator(head_leaf_, 0); - } - - //! Constructs a read-only constant iterator that points to the first - //! invalid slot in the last leaf of the B+ tree. - const_iterator end() const { - return const_iterator(tail_leaf_, tail_leaf_ ? tail_leaf_->slotuse : 0); - } - - //! Constructs a read/data-write reverse iterator that points to the first - //! invalid slot in the last leaf of the B+ tree. Uses STL magic. - reverse_iterator rbegin() { - return reverse_iterator(end()); - } - - //! Constructs a read/data-write reverse iterator that points to the first - //! slot in the first leaf of the B+ tree. Uses STL magic. - reverse_iterator rend() { - return reverse_iterator(begin()); - } - - //! Constructs a read-only reverse iterator that points to the first - //! invalid slot in the last leaf of the B+ tree. Uses STL magic. - const_reverse_iterator rbegin() const { - return const_reverse_iterator(end()); - } - - //! Constructs a read-only reverse iterator that points to the first slot - //! in the first leaf of the B+ tree. Uses STL magic. - const_reverse_iterator rend() const { - return const_reverse_iterator(begin()); - } - - //! \} - -private: - //! \name B+ Tree Node Binary Search Functions - //! \{ - - //! Searches for the first key in the node n greater or equal to key. Uses - //! binary search with an optional linear self-verification. This is a - //! template function, because the slotkey array is located at different - //! places in LeafNode and InnerNode. - template <typename node_type> - unsigned short find_lower(const node_type* n, const key_type& key) const { - if (sizeof(*n) > traits::binsearch_threshold) - { - if (n->slotuse == 0) return 0; - - unsigned short lo = 0, hi = n->slotuse; - - while (lo < hi) - { - unsigned short mid = (lo + hi) >> 1; - - if (key_lessequal(key, n->key(mid))) { - hi = mid; // key <= mid - } - else { - lo = mid + 1; // key > mid - } - } - - TLX_BTREE_PRINT("BTree::find_lower: on " << n << - " key " << key << " -> " << lo << " / " << hi); - - // verify result using simple linear search - if (self_verify) - { - unsigned short i = 0; - while (i < n->slotuse && key_less(n->key(i), key)) ++i; - - TLX_BTREE_PRINT("BTree::find_lower: testfind: " << i); - TLX_BTREE_ASSERT(i == lo); - } - - return lo; - } - else // for nodes <= binsearch_threshold do linear search. - { - unsigned short lo = 0; - while (lo < n->slotuse && key_less(n->key(lo), key)) ++lo; - return lo; - } - } - - //! Searches for the first key in the node n greater than key. Uses binary - //! search with an optional linear self-verification. This is a template - //! function, because the slotkey array is located at different places in - //! LeafNode and InnerNode. - template <typename node_type> - unsigned short find_upper(const node_type* n, const key_type& key) const { - if (sizeof(*n) > traits::binsearch_threshold) - { - if (n->slotuse == 0) return 0; - - unsigned short lo = 0, hi = n->slotuse; - - while (lo < hi) - { - unsigned short mid = (lo + hi) >> 1; - - if (key_less(key, n->key(mid))) { - hi = mid; // key < mid - } - else { - lo = mid + 1; // key >= mid - } - } - - TLX_BTREE_PRINT("BTree::find_upper: on " << n << - " key " << key << " -> " << lo << " / " << hi); - - // verify result using simple linear search - if (self_verify) - { - unsigned short i = 0; - while (i < n->slotuse && key_lessequal(n->key(i), key)) ++i; - - TLX_BTREE_PRINT("BTree::find_upper testfind: " << i); - TLX_BTREE_ASSERT(i == hi); - } - - return lo; - } - else // for nodes <= binsearch_threshold do linear search. - { - unsigned short lo = 0; - while (lo < n->slotuse && key_lessequal(n->key(lo), key)) ++lo; - return lo; - } - } - - //! \} - -public: - //! \name Access Functions to the Item Count - //! \{ - - //! Return the number of key/data pairs in the B+ tree - size_type size() const { - return stats_.size; - } - - //! Returns true if there is at least one key/data pair in the B+ tree - bool empty() const { - return (size() == size_type(0)); - } - - //! Returns the largest possible size of the B+ Tree. This is just a - //! function required by the STL standard, the B+ Tree can hold more items. - size_type max_size() const { - return size_type(-1); - } - - //! Return a const reference to the current statistics. - const struct tree_stats& get_stats() const { - return stats_; - } - - //! \} - -public: - //! \name STL Access Functions Querying the Tree by Descending to a Leaf - //! \{ - - //! Non-STL function checking whether a key is in the B+ tree. The same as - //! (find(k) != end()) or (count() != 0). - bool exists(const key_type& key) const { - const node* n = root_; - if (!n) return false; - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - return (slot < leaf->slotuse && key_equal(key, leaf->key(slot))); - } - - //! Tries to locate a key in the B+ tree and returns an iterator to the - //! key/data slot if found. If unsuccessful it returns end(). - iterator find(const key_type& key) { - node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - LeafNode* leaf = static_cast<LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - return (slot < leaf->slotuse && key_equal(key, leaf->key(slot))) - ? iterator(leaf, slot) : end(); - } - - //! Tries to locate a key in the B+ tree and returns an constant iterator to - //! the key/data slot if found. If unsuccessful it returns end(). - const_iterator find(const key_type& key) const { - const node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - return (slot < leaf->slotuse && key_equal(key, leaf->key(slot))) - ? const_iterator(leaf, slot) : end(); - } - - size_t range_count(const key_type& lower, const key_type& upper) { - return range_count_recur(root_, lower, upper); - } - - void range_sample(const key_type& lower, const key_type& upper, size_t k, std::vector<key_type>& ans, gsl_rng *rng) { - const node* n = root_; - - //Finding the LCA.... - while (!n->is_leafnode()) { - const InnerNode* inner = static_cast<const InnerNode*>(n); - auto slot = find_lower(inner, lower); - if (slot != find_upper(inner, upper)) break; - n = inner->childid[slot]; - } - - const node* lca = n; - uint64_t lca_weight_count = calculate_weight(lca); - ans.clear(); - while (ans.size() < k) { - const node* now = lca; - - bool flag = false; - while (!now->is_leafnode()) { - const InnerNode* inner = static_cast<const InnerNode*>(now); - - double sum = std::accumulate(inner->weight, inner->weight + inner->slotuse + 1, 0.0); - double pos = gsl_rng_uniform(rng) * sum; - - double prefix_sum = 0; - unsigned short s = -1; - do { - ++s; - prefix_sum += inner->weight[s]; - } while (prefix_sum < pos); - assert(s <= inner->slotuse); - - if ((s < inner->slotuse && key_less(inner->key(s), lower)) || (s > 0 && key_less(upper, inner->key(s - 1)))) { - flag = true; - break; - } - - now = inner->childid[s]; - } - - if (flag) { - continue; - } - assert(now->is_leafnode()); - const LeafNode* leaf = static_cast<const LeafNode*>(now); - double sum = std::accumulate(leaf->weight, leaf->weight + leaf->slotuse, 0.0); - double pos = gsl_rng_uniform(rng) * sum; - - if (sum <= 0) { - continue; - } - double prefix_sum = 0.0; - unsigned short s = -1; - do { - ++s; - prefix_sum += leaf->weight[s]; - } while (prefix_sum < pos); - - key_type sample = static_cast<const LeafNode*>(now)->key(s); - - if (key_lessequal(lower, sample) && key_lessequal(sample, upper)) { - assert(sample <= upper && sample >= lower); - ans.emplace_back(sample); - } - } - } - - //! Tries to locate a key in the B+ tree and returns the number of identical - //! key entries found. - size_type count(const key_type& key) const { - const node* n = root_; - if (!n) return 0; - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - size_type num = 0; - - while (leaf && slot < leaf->slotuse && key_equal(key, leaf->key(slot))) - { - ++num; - if (++slot >= leaf->slotuse) - { - leaf = leaf->next_leaf; - slot = 0; - } - } - - return num; - } - - //! Searches the B+ tree and returns an iterator to the first pair equal to - //! or greater than key, or end() if all keys are smaller. - iterator lower_bound(const key_type& key) { - node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - LeafNode* leaf = static_cast<LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - return iterator(leaf, slot); - } - - //! Searches the B+ tree and returns a constant iterator to the first pair - //! equal to or greater than key, or end() if all keys are smaller. - const_iterator lower_bound(const key_type& key) const { - const node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_lower(inner, key); - - n = inner->childid[slot]; - } - - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - return const_iterator(leaf, slot); - } - - //! Searches the B+ tree and returns an iterator to the first pair greater - //! than key, or end() if all keys are smaller or equal. - iterator upper_bound(const key_type& key) { - node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_upper(inner, key); - - n = inner->childid[slot]; - } - - LeafNode* leaf = static_cast<LeafNode*>(n); - - unsigned short slot = find_upper(leaf, key); - return iterator(leaf, slot); - } - - //! Searches the B+ tree and returns a constant iterator to the first pair - //! greater than key, or end() if all keys are smaller or equal. - const_iterator upper_bound(const key_type& key) const { - const node* n = root_; - if (!n) return end(); - - while (!n->is_leafnode()) - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - unsigned short slot = find_upper(inner, key); - - n = inner->childid[slot]; - } - - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - unsigned short slot = find_upper(leaf, key); - return const_iterator(leaf, slot); - } - - //! Searches the B+ tree and returns both lower_bound() and upper_bound(). - std::pair<iterator, iterator> equal_range(const key_type& key) { - return std::pair<iterator, iterator>( - lower_bound(key), upper_bound(key)); - } - - //! Searches the B+ tree and returns both lower_bound() and upper_bound(). - std::pair<const_iterator, const_iterator> - equal_range(const key_type& key) const { - return std::pair<const_iterator, const_iterator>( - lower_bound(key), upper_bound(key)); - } - - //! \} - -public: - //! \name B+ Tree Object Comparison Functions - //! \{ - - //! Equality relation of B+ trees of the same type. B+ trees of the same - //! size and equal elements (both key and data) are considered equal. Beware - //! of the random ordering of duplicate keys. - bool operator == (const BTree& other) const { - return (size() == other.size()) && - std::equal(begin(), end(), other.begin()); - } - - //! Inequality relation. Based on operator==. - bool operator != (const BTree& other) const { - return !(*this == other); - } - - //! Total ordering relation of B+ trees of the same type. It uses - //! std::lexicographical_compare() for the actual comparison of elements. - bool operator < (const BTree& other) const { - return std::lexicographical_compare( - begin(), end(), other.begin(), other.end()); - } - - //! Greater relation. Based on operator<. - bool operator > (const BTree& other) const { - return other < *this; - } - - //! Less-equal relation. Based on operator<. - bool operator <= (const BTree& other) const { - return !(other < *this); - } - - //! Greater-equal relation. Based on operator<. - bool operator >= (const BTree& other) const { - return !(*this < other); - } - - //! \} - -public: - //! \name Fast Copy: Assign Operator and Copy Constructors - //! \{ - - //! Assignment operator. All the key/data pairs are copied. - BTree& operator = (const BTree& other) { - if (this != &other) - { - clear(); - - key_less_ = other.key_comp(); - allocator_ = other.get_allocator(); - - if (other.size() != 0) - { - stats_.leaves = stats_.inner_nodes = 0; - if (other.root_) { - root_ = copy_recursive(other.root_); - } - stats_ = other.stats_; - } - - if (self_verify) verify(); - } - return *this; - } - - //! Copy constructor. The newly initialized B+ tree object will contain a - //! copy of all key/data pairs. - BTree(const BTree& other) - : root_(nullptr), head_leaf_(nullptr), tail_leaf_(nullptr), - stats_(other.stats_), - key_less_(other.key_comp()), - allocator_(other.get_allocator()) { - if (size() > 0) - { - stats_.leaves = stats_.inner_nodes = 0; - if (other.root_) { - root_ = copy_recursive(other.root_); - } - if (self_verify) verify(); - } - } - -private: - size_t range_count_recur(const node* n, const key_type& lower, const key_type& upper) { - if (!n) return 0; - size_t res = 0; - if (n->is_leafnode()) { - LeafNode* leaf = static_cast<LeafNode*>(n); - for (unsigned short s = 0; s < n->slotuse; ++s) - if (leaf->key(s) >= lower && leaf->key(s) <= upper) ++res; - } else { - InnerNode* inner = static_cast<InnerNode*>(n); - auto low_slot = find_lower(n, lower); - auto high_slot = find_upper(n, upper); - for (unsigned short s = low_slot; s <= high_slot; ++s) - res += range_count_recur(inner->childid[s], lower, upper); - } - return res; - } - - //! Recursively copy nodes from another B+ tree object - struct node * copy_recursive(const node* n) { - if (n->is_leafnode()) - { - const LeafNode* leaf = static_cast<const LeafNode*>(n); - LeafNode* newleaf = allocate_leaf(); - - newleaf->slotuse = leaf->slotuse; - std::copy(leaf->slotdata, leaf->slotdata + leaf->slotuse, - newleaf->slotdata); - - if (head_leaf_ == nullptr) - { - head_leaf_ = tail_leaf_ = newleaf; - newleaf->prev_leaf = newleaf->next_leaf = nullptr; - } - else - { - newleaf->prev_leaf = tail_leaf_; - tail_leaf_->next_leaf = newleaf; - tail_leaf_ = newleaf; - } - - return newleaf; - } - else - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - InnerNode* newinner = allocate_inner(inner->level); - - newinner->slotuse = inner->slotuse; - std::copy(inner->slotkey, inner->slotkey + inner->slotuse, - newinner->slotkey); - - for (unsigned short slot = 0; slot <= inner->slotuse; ++slot) - { - newinner->childid[slot] = copy_recursive(inner->childid[slot]); - } - - return newinner; - } - } - - //! \} - -public: - //! \name Public Insertion Functions - //! \{ - - //! Attempt to insert a key/data pair into the B+ tree. If the tree does not - //! allow duplicate keys, then the insert may fail if it is already present. - std::pair<iterator, bool> insert(const value_type& x, const double& weight=1.0) { - return insert_start(key_of_value::get(x), x, weight); - } - - //! Attempt to insert a key/data pair into the B+ tree. The iterator hint is - //! currently ignored by the B+ tree insertion routine. - iterator insert(iterator /* hint */, const value_type& x, const double& weight=1.0) { - return insert_start(key_of_value::get(x), x, weight).first; - } - - //! Attempt to insert the range [first,last) of value_type pairs into the B+ - //! tree. Each key/data pair is inserted individually; to bulk load the - //! tree, use a constructor with range. - template <typename InputIterator> - void insert(InputIterator first, InputIterator last) { - InputIterator iter = first; - while (iter != last) - { - //XX: bad hack... - insert(*iter, 1.0); - ++iter; - } - } - - //! \} - -private: - //! \name Private Insertion Functions - //! \{ - - //! Start the insertion descent at the current root and handle root splits. - //! Returns true if the item was inserted - std::pair<iterator, bool> - insert_start(const key_type& key, const value_type& value, const double& weight=1.0) { - - node* newchild = nullptr; - key_type newkey = key_type(); - - if (root_ == nullptr) { - root_ = head_leaf_ = tail_leaf_ = allocate_leaf(); - } - - std::pair<iterator, bool> r = - insert_descend(root_, key, value, weight, &newkey, &newchild); - - if (newchild) - { - // this only occurs if insert_descend() could not insert the key - // into the root node, this mean the root is full and a new root - // needs to be created. - InnerNode* newroot = allocate_inner(root_->level + 1); - newroot->slotkey[0] = newkey; - - newroot->childid[0] = root_; - newroot->childid[1] = newchild; - newroot->weight[0] = calculate_weight(root_); - newroot->weight[1] = calculate_weight(newchild); - - newroot->slotuse = 1; - - root_ = newroot; - } - - // increment size if the item was inserted - if (r.second) ++stats_.size; - -#ifdef TLX_BTREE_DEBUG - if (debug) print(std::cout); -#endif - - if (self_verify) { - verify(); - TLX_BTREE_ASSERT(exists(key)); - } - - return r; - } - - /*! - * Insert an item into the B+ tree. - * - * Descend down the nodes to a leaf, insert the key/data pair in a free - * slot. If the node overflows, then it must be split and the new split node - * inserted into the parent. Unroll / this splitting up to the root. - */ - std::pair<iterator, bool> insert_descend( - node* n, const key_type& key, const value_type& value, const double& weight, - key_type* splitkey, node** splitnode) { - - if (!n->is_leafnode()) - { - InnerNode* inner = static_cast<InnerNode*>(n); - - key_type newkey = key_type(); - node* newchild = nullptr; - - unsigned short slot = find_lower(inner, key); - - TLX_BTREE_PRINT( - "BTree::insert_descend into " << inner->childid[slot]); - - std::pair<iterator, bool> r = - insert_descend(inner->childid[slot], - key, value, weight, &newkey, &newchild); - - if (newchild) - { - TLX_BTREE_PRINT("BTree::insert_descend newchild" << - " with key " << newkey << - " node " << newchild << " at slot " << slot); - - if (inner->is_full()) - { - split_inner_node(inner, splitkey, splitnode, slot); - - TLX_BTREE_PRINT("BTree::insert_descend done split_inner:" << - " putslot: " << slot << - " putkey: " << newkey << - " upkey: " << *splitkey); - -#ifdef TLX_BTREE_DEBUG - if (debug) - { - print_node(std::cout, inner); - print_node(std::cout, *splitnode); - } -#endif - - // check if insert slot is in the split sibling node - TLX_BTREE_PRINT("BTree::insert_descend switch: " - << slot << " > " << inner->slotuse + 1); - - if (slot == inner->slotuse + 1 && - inner->slotuse < (*splitnode)->slotuse) - { - // special case when the insert slot matches the split - // place between the two nodes, then the insert key - // becomes the split key. - - TLX_BTREE_ASSERT(inner->slotuse + 1 < inner_slotmax); - - InnerNode* split = static_cast<InnerNode*>(*splitnode); - - // move the split key and it's datum into the left node - inner->slotkey[inner->slotuse] = *splitkey; - inner->childid[inner->slotuse + 1] = split->childid[0]; - inner->weight[inner->slotuse + 1] = calculate_weight(split->childid[0]); - inner->slotuse++; - - // set new split key and move corresponding datum into - // right node - split->childid[0] = newchild; - split->weight[0] = calculate_weight(newchild); - *splitkey = newkey; - - return r; - } - else if (slot >= inner->slotuse + 1) - { - // in case the insert slot is in the newly create split - // node, we reuse the code below. - - slot -= inner->slotuse + 1; - inner = static_cast<InnerNode*>(*splitnode); - TLX_BTREE_PRINT( - "BTree::insert_descend switching to " - "splitted node " << inner << " slot " << slot); - } - } - - // move items and put pointer to child node into correct slot - TLX_BTREE_ASSERT(slot >= 0 && slot <= inner->slotuse); - - std::copy_backward( - inner->slotkey + slot, inner->slotkey + inner->slotuse, - inner->slotkey + inner->slotuse + 1); - std::copy_backward( - inner->childid + slot, inner->childid + inner->slotuse + 1, - inner->childid + inner->slotuse + 2); - std::copy_backward( - inner->weight + slot, inner->weight + inner->slotuse + 1, - inner->weight + inner->slotuse + 2); - - inner->slotkey[slot] = newkey; - inner->childid[slot + 1] = newchild; - inner->weight[slot] = calculate_weight(inner->childid[slot]); - inner->weight[slot + 1] = calculate_weight(newchild); - inner->slotuse++; - } else if (r.second) { - inner->weight[slot] += weight; - //++inner->weight[slot]; - } - - return r; - } - else // n->is_leafnode() == true - { - LeafNode* leaf = static_cast<LeafNode*>(n); - - unsigned short slot = find_lower(leaf, key); - - if (!allow_duplicates && - slot < leaf->slotuse && key_equal(key, leaf->key(slot))) { - return std::pair<iterator, bool>(iterator(leaf, slot), false); - } - - if (leaf->is_full()) - { - split_leaf_node(leaf, splitkey, splitnode); - - // check if insert slot is in the split sibling node - if (slot >= leaf->slotuse) - { - slot -= leaf->slotuse; - leaf = static_cast<LeafNode*>(*splitnode); - } - } - - // move items and put data item into correct data slot - TLX_BTREE_ASSERT(slot >= 0 && slot <= leaf->slotuse); - - std::copy_backward( - leaf->slotdata + slot, leaf->slotdata + leaf->slotuse, - leaf->slotdata + leaf->slotuse + 1); - - std::copy_backward( - leaf->weight + slot, leaf->weight + leaf->slotuse, - leaf->weight + leaf->slotuse + 1); - - leaf->slotdata[slot] = value; - leaf->weight[slot] = weight; - leaf->slotuse++; - - if (splitnode && leaf != *splitnode && slot == leaf->slotuse - 1) - { - // special case: the node was split, and the insert is at the - // last slot of the old node. then the splitkey must be updated. - *splitkey = key; - } - - return std::pair<iterator, bool>(iterator(leaf, slot), true); - } - } - - //! Split up a leaf node into two equally-filled sibling leaves. Returns the - //! new nodes and it's insertion key in the two parameters. - void split_leaf_node(LeafNode* leaf, - key_type* out_newkey, node** out_newleaf) { - TLX_BTREE_ASSERT(leaf->is_full()); - - unsigned short mid = (leaf->slotuse >> 1); - - TLX_BTREE_PRINT("BTree::split_leaf_node on " << leaf); - - LeafNode* newleaf = allocate_leaf(); - - newleaf->slotuse = leaf->slotuse - mid; - - newleaf->next_leaf = leaf->next_leaf; - if (newleaf->next_leaf == nullptr) { - TLX_BTREE_ASSERT(leaf == tail_leaf_); - tail_leaf_ = newleaf; - } - else { - newleaf->next_leaf->prev_leaf = newleaf; - } - - std::copy(leaf->slotdata + mid, leaf->slotdata + leaf->slotuse, - newleaf->slotdata); - std::copy(leaf->weight + mid, leaf->weight + leaf->slotuse, newleaf->weight); - - leaf->slotuse = mid; - leaf->next_leaf = newleaf; - newleaf->prev_leaf = leaf; - - *out_newkey = leaf->key(leaf->slotuse - 1); - *out_newleaf = newleaf; - } - - //! Split up an inner node into two equally-filled sibling nodes. Returns - //! the new nodes and it's insertion key in the two parameters. Requires the - //! slot of the item will be inserted, so the nodes will be the same size - //! after the insert. - void split_inner_node(InnerNode* inner, key_type* out_newkey, - node** out_newinner, unsigned int addslot) { - TLX_BTREE_ASSERT(inner->is_full()); - - unsigned short mid = (inner->slotuse >> 1); - - TLX_BTREE_PRINT("BTree::split_inner: mid " << mid << - " addslot " << addslot); - - // if the split is uneven and the overflowing item will be put into the - // larger node, then the smaller split node may underflow - if (addslot <= mid && mid > inner->slotuse - (mid + 1)) - mid--; - - TLX_BTREE_PRINT("BTree::split_inner: mid " << mid << - " addslot " << addslot); - - TLX_BTREE_PRINT("BTree::split_inner_node on " << inner << - " into two nodes " << mid << " and " << - inner->slotuse - (mid + 1) << " sized"); - - InnerNode* newinner = allocate_inner(inner->level); - - newinner->slotuse = inner->slotuse - (mid + 1); - - std::copy(inner->slotkey + mid + 1, inner->slotkey + inner->slotuse, - newinner->slotkey); - std::copy(inner->childid + mid + 1, inner->childid + inner->slotuse + 1, - newinner->childid); - std::copy(inner->weight + mid + 1, inner->weight + inner->slotuse + 1, - newinner->weight); - - inner->slotuse = mid; - - *out_newkey = inner->key(mid); - *out_newinner = newinner; - } - - //! \} - -public: - //! \name Bulk Loader - Construct Tree from Sorted Sequence - //! \{ - - //! Bulk load a sorted range. Loads items into leaves and constructs a - //! B-tree above them. The tree must be empty when calling this function. - template <typename Iterator> - void bulk_load(Iterator ibegin, Iterator iend) { - TLX_BTREE_ASSERT(empty()); - - stats_.size = iend - ibegin; - - // calculate number of leaves needed, round up. - size_t num_items = iend - ibegin; - size_t num_leaves = (num_items + leaf_slotmax - 1) / leaf_slotmax; - - TLX_BTREE_PRINT("BTree::bulk_load, level 0: " << stats_.size << - " items into " << num_leaves << - " leaves with up to " << - ((iend - ibegin + num_leaves - 1) / num_leaves) << - " items per leaf."); - - Iterator it = ibegin; - for (size_t i = 0; i < num_leaves; ++i) - { - // allocate new leaf node - LeafNode* leaf = allocate_leaf(); - - // copy keys or (key,value) pairs into leaf nodes, uses template - // switch leaf->set_slot(). - leaf->slotuse = static_cast<int>(num_items / (num_leaves - i)); - for (size_t s = 0; s < leaf->slotuse; ++s, ++it) { - leaf->set_slot(s, *it); - //XX: bad hack - leaf->weight[s] = 1.0; - } - - if (tail_leaf_ != nullptr) { - tail_leaf_->next_leaf = leaf; - leaf->prev_leaf = tail_leaf_; - } - else { - head_leaf_ = leaf; - } - tail_leaf_ = leaf; - - num_items -= leaf->slotuse; - } - - TLX_BTREE_ASSERT(it == iend && num_items == 0); - - // if the btree is so small to fit into one leaf, then we're done. - if (head_leaf_ == tail_leaf_) { - root_ = head_leaf_; - return; - } - - TLX_BTREE_ASSERT(stats_.leaves == num_leaves); - - // create first level of inner nodes, pointing to the leaves. - size_t num_parents = - (num_leaves + (inner_slotmax + 1) - 1) / (inner_slotmax + 1); - - TLX_BTREE_PRINT("BTree::bulk_load, level 1: " << - num_leaves << " leaves in " << - num_parents << " inner nodes with up to " << - ((num_leaves + num_parents - 1) / num_parents) << - " leaves per inner node."); - - // save inner nodes and maxkey for next level. - typedef std::pair<InnerNode*, const key_type*> nextlevel_type; - nextlevel_type* nextlevel = new nextlevel_type[num_parents]; - - LeafNode* leaf = head_leaf_; - for (size_t i = 0; i < num_parents; ++i) - { - // allocate new inner node at level 1 - InnerNode* n = allocate_inner(1); - - n->slotuse = static_cast<int>(num_leaves / (num_parents - i)); - TLX_BTREE_ASSERT(n->slotuse > 0); - // this counts keys, but an inner node has keys+1 children. - --n->slotuse; - - // copy last key from each leaf and set child - for (unsigned short s = 0; s < n->slotuse; ++s) - { - n->slotkey[s] = leaf->key(leaf->slotuse - 1); - n->childid[s] = leaf; - n->weight[s] = calculate_weight(leaf); - leaf = leaf->next_leaf; - } - n->childid[n->slotuse] = leaf; - n->weight[n->slotuse] = calculate_weight(leaf); - - // track max key of any descendant. - nextlevel[i].first = n; - nextlevel[i].second = &leaf->key(leaf->slotuse - 1); - - leaf = leaf->next_leaf; - num_leaves -= n->slotuse + 1; - } - - TLX_BTREE_ASSERT(leaf == nullptr && num_leaves == 0); - - // recursively build inner nodes pointing to inner nodes. - for (int level = 2; num_parents != 1; ++level) - { - size_t num_children = num_parents; - num_parents = - (num_children + (inner_slotmax + 1) - 1) / (inner_slotmax + 1); - - TLX_BTREE_PRINT( - "BTree::bulk_load, level " << level << - ": " << num_children << " children in " << - num_parents << " inner nodes with up to " << - ((num_children + num_parents - 1) / num_parents) << - " children per inner node."); - - size_t inner_index = 0; - for (size_t i = 0; i < num_parents; ++i) - { - // allocate new inner node at level - InnerNode* n = allocate_inner(level); - - n->slotuse = static_cast<int>(num_children / (num_parents - i)); - TLX_BTREE_ASSERT(n->slotuse > 0); - // this counts keys, but an inner node has keys+1 children. - --n->slotuse; - - // copy children and maxkeys from nextlevel - for (unsigned short s = 0; s < n->slotuse; ++s) - { - n->slotkey[s] = *nextlevel[inner_index].second; - n->childid[s] = nextlevel[inner_index].first; - n->weight[s] = calculate_weight(nextlevel[inner_index].first); - ++inner_index; - } - n->childid[n->slotuse] = nextlevel[inner_index].first; - n->weight[n->slotuse] = calculate_weight(nextlevel[inner_index].first); - - // reuse nextlevel array for parents, because we can overwrite - // slots we've already consumed. - nextlevel[i].first = n; - nextlevel[i].second = nextlevel[inner_index].second; - - ++inner_index; - num_children -= n->slotuse + 1; - } - - TLX_BTREE_ASSERT(num_children == 0); - } - - root_ = nextlevel[0].first; - delete[] nextlevel; - - if (self_verify) verify(); - } - - //! \} - -private: - //! \name Support Class Encapsulating Deletion Results - //! \{ - - //! Result flags of recursive deletion. - enum result_flags_t { - //! Deletion successful and no fix-ups necessary. - btree_ok = 0, - - //! Deletion not successful because key was not found. - btree_not_found = 1, - - //! Deletion successful, the last key was updated so parent slotkeys - //! need updates. - btree_update_lastkey = 2, - - //! Deletion successful, children nodes were merged and the parent needs - //! to remove the empty node. - btree_fixmerge = 4, - - btree_shift = 8 - }; - - //! B+ tree recursive deletion has much information which is needs to be - //! passed upward. - struct result_t { - //! Merged result flags - result_flags_t flags; - - //! The key to be updated at the parent's slot - key_type lastkey; - - //! Constructor of a result with a specific flag, this can also be used - //! as for implicit conversion. - result_t(result_flags_t f = btree_ok) // NOLINT - : flags(f), lastkey() - { } - - //! Constructor with a lastkey value. - result_t(result_flags_t f, const key_type& k) - : flags(f), lastkey(k) - { } - - //! Test if this result object has a given flag set. - bool has(result_flags_t f) const { - return (flags & f) != 0; - } - - //! Merge two results OR-ing the result flags and overwriting lastkeys. - result_t& operator |= (const result_t& other) { - flags = result_flags_t(flags | other.flags); - - // we overwrite existing lastkeys on purpose - if (other.has(btree_update_lastkey)) - lastkey = other.lastkey; - - return *this; - } - }; - - //! \} - -public: - //! \name Public Erase Functions - //! \{ - - //! Erases one (the first) of the key/data pairs associated with the given - //! key. - bool erase_one(const key_type& key) { - TLX_BTREE_PRINT("BTree::erase_one(" << key << - ") on btree size " << size()); - - if (self_verify) verify(); - - if (!root_) return false; - - double carry_weight = 0.0; - result_t result = erase_one_descend( - key, root_, nullptr, nullptr, nullptr, nullptr, nullptr, 0, carry_weight); - - if (!result.has(btree_not_found)) - --stats_.size; - -#ifdef TLX_BTREE_DEBUG - if (debug) print(std::cout); -#endif - if (self_verify) verify(); - - return !result.has(btree_not_found); - } - - //! Erases all the key/data pairs associated with the given key. This is - //! implemented using erase_one(). - size_type erase(const key_type& key) { - size_type c = 0; - - while (erase_one(key)) - { - ++c; - if (!allow_duplicates) break; - } - - return c; - } - - //! Erase the key/data pair referenced by the iterator. - void erase(iterator iter) { - TLX_BTREE_PRINT("BTree::erase_iter(" << iter.curr_leaf << - "," << iter.curr_slot << ") on btree size " << size()); - - if (self_verify) verify(); - - if (!root_) return; - - double weight = 0.0; - result_t result = erase_iter_descend( - iter, root_, nullptr, nullptr, nullptr, nullptr, nullptr, 0, weight); - - if (!result.has(btree_not_found)) - --stats_.size; - -#ifdef TLX_BTREE_DEBUG - if (debug) print(std::cout); -#endif - if (self_verify) verify(); - } - -#ifdef BTREE_TODO - //! Erase all key/data pairs in the range [first,last). This function is - //! currently not implemented by the B+ Tree. - void erase(iterator /* first */, iterator /* last */) { - abort(); - } -#endif - - //! \} - -private: - //! \name Private Erase Functions - //! \{ - - /*! - * Erase one (the first) key/data pair in the B+ tree matching key. - * - * Descends down the tree in search of key. During the descent the parent, - * left and right siblings and their parents are computed and passed - * down. Once the key/data pair is found, it is removed from the leaf. If - * the leaf underflows 6 different cases are handled. These cases resolve - * the underflow by shifting key/data pairs from adjacent sibling nodes, - * merging two sibling nodes or trimming the tree. - */ - result_t erase_one_descend(const key_type& key, - node* curr, - node* left, node* right, - InnerNode* left_parent, InnerNode* right_parent, - InnerNode* parent, unsigned int parentslot, double& carry_weight) { - if (curr->is_leafnode()) - { - LeafNode* leaf = static_cast<LeafNode*>(curr); - LeafNode* left_leaf = static_cast<LeafNode*>(left); - LeafNode* right_leaf = static_cast<LeafNode*>(right); - - unsigned short slot = find_lower(leaf, key); - - if (slot >= leaf->slotuse || !key_equal(key, leaf->key(slot))) - { - TLX_BTREE_PRINT("Could not find key " << key << " to erase."); - - return btree_not_found; - } - - TLX_BTREE_PRINT( - "Found key in leaf " << curr << " at slot " << slot); - carry_weight = leaf->weight[slot]; - - std::copy(leaf->slotdata + slot + 1, leaf->slotdata + leaf->slotuse, - leaf->slotdata + slot); - std::copy(leaf->weight + slot + 1, leaf->weight + leaf->slotuse, - leaf->weight + slot); - - leaf->slotuse--; - - result_t myres = btree_ok; - - // if the last key of the leaf was changed, the parent is notified - // and updates the key of this leaf - if (slot == leaf->slotuse) - { - if (parent && parentslot < parent->slotuse) - { - TLX_BTREE_ASSERT(parent->childid[parentslot] == curr); - parent->slotkey[parentslot] = leaf->key(leaf->slotuse - 1); - } - else - { - if (leaf->slotuse >= 1) - { - TLX_BTREE_PRINT("Scheduling lastkeyupdate: key " << - leaf->key(leaf->slotuse - 1)); - myres |= result_t( - btree_update_lastkey, leaf->key(leaf->slotuse - 1)); - } - else - { - TLX_BTREE_ASSERT(leaf == root_); - } - } - } - - if (leaf->is_underflow() && !(leaf == root_ && leaf->slotuse >= 1)) - { - // determine what to do about the underflow - - // case : if this empty leaf is the root, then delete all nodes - // and set root to nullptr. - if (left_leaf == nullptr && right_leaf == nullptr) - { - TLX_BTREE_ASSERT(leaf == root_); - TLX_BTREE_ASSERT(leaf->slotuse == 0); - - free_node(root_); - - root_ = leaf = nullptr; - head_leaf_ = tail_leaf_ = nullptr; - - // will be decremented soon by insert_start() - TLX_BTREE_ASSERT(stats_.size == 1); - TLX_BTREE_ASSERT(stats_.leaves == 0); - TLX_BTREE_ASSERT(stats_.inner_nodes == 0); - - return btree_ok; - } - // case : if both left and right leaves would underflow in case - // of a shift, then merging is necessary. choose the more local - // merger with our parent - else if ((left_leaf == nullptr || left_leaf->is_few()) && - (right_leaf == nullptr || right_leaf->is_few())) - { - if (left_parent == parent) - myres |= merge_leaves(left_leaf, leaf, left_parent); - else - myres |= merge_leaves(leaf, right_leaf, right_parent); - } - // case : the right leaf has extra data, so balance right with - // current - else if ((left_leaf != nullptr && left_leaf->is_few()) && - (right_leaf != nullptr && !right_leaf->is_few())) - { - if (right_parent == parent) { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - else - myres |= merge_leaves(left_leaf, leaf, left_parent); - } - // case : the left leaf has extra data, so balance left with - // current - else if ((left_leaf != nullptr && !left_leaf->is_few()) && - (right_leaf != nullptr && right_leaf->is_few())) - { - if (left_parent == parent) - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - else - myres |= merge_leaves(leaf, right_leaf, right_parent); - } - // case : both the leaf and right leaves have extra data and our - // parent, choose the leaf with more data - else if (left_parent == right_parent) - { - if (left_leaf->slotuse <= right_leaf->slotuse) { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - else - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - } - else - { - if (left_parent == parent) - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - else { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - } - } - - return myres; - } - else // !curr->is_leafnode() - { - InnerNode* inner = static_cast<InnerNode*>(curr); - InnerNode* left_inner = static_cast<InnerNode*>(left); - InnerNode* right_inner = static_cast<InnerNode*>(right); - - node* myleft, * myright; - InnerNode* myleft_parent, * myright_parent; - - unsigned short slot = find_lower(inner, key); - - if (slot == 0) { - myleft = - (left == nullptr) ? nullptr : - static_cast<InnerNode*>(left)->childid[left->slotuse - 1]; - myleft_parent = left_parent; - } - else { - myleft = inner->childid[slot - 1]; - myleft_parent = inner; - } - - if (slot == inner->slotuse) { - myright = - (right == nullptr) ? nullptr : - static_cast<InnerNode*>(right)->childid[0]; - myright_parent = right_parent; - } - else { - myright = inner->childid[slot + 1]; - myright_parent = inner; - } - - TLX_BTREE_PRINT("erase_one_descend into " << inner->childid[slot]); - - result_t result = erase_one_descend( - key, - inner->childid[slot], - myleft, myright, - myleft_parent, myright_parent, - inner, slot, carry_weight); - - result_t myres = btree_ok; - - if (result.has(btree_not_found)) - { - return result; - } - - if (result.has(btree_update_lastkey)) - { - if (parent && parentslot < parent->slotuse) - { - TLX_BTREE_PRINT("Fixing lastkeyupdate: key " << - result.lastkey << " into parent " << - parent << " at parentslot " << - parentslot); - - TLX_BTREE_ASSERT(parent->childid[parentslot] == curr); - parent->slotkey[parentslot] = result.lastkey; - } - else - { - TLX_BTREE_PRINT( - "Forwarding lastkeyupdate: key " << result.lastkey); - myres |= result_t(btree_update_lastkey, result.lastkey); - } - } - - if (result.has(btree_fixmerge)) - { - // either the current node or the next is empty and should be - // removed - if (inner->childid[slot]->slotuse != 0) - slot++; - - - // this is the child slot invalidated by the merge - TLX_BTREE_ASSERT(inner->childid[slot]->slotuse == 0); - - free_node(inner->childid[slot]); - - std::copy( - inner->slotkey + slot, inner->slotkey + inner->slotuse, - inner->slotkey + slot - 1); - std::copy( - inner->childid + slot + 1, - inner->childid + inner->slotuse + 1, - inner->childid + slot); - std::copy( - inner->weight + slot + 1, - inner->weight + inner->slotuse + 1, - inner->weight + slot); - - inner->slotuse--; - - if (slot > 0) inner->weight[slot - 1] = calculate_weight(inner->childid[slot - 1]); - if (slot <= inner->slotuse) inner->weight[slot] = calculate_weight(inner->childid[slot]); - - if (inner->level == 1) - { - // fix split key for children leaves - slot--; - LeafNode* child = - static_cast<LeafNode*>(inner->childid[slot]); - inner->slotkey[slot] = child->key(child->slotuse - 1); - } - } else if (!result.has(btree_shift)) { - //--inner->weight[slot]; - inner->weight[slot] -= carry_weight; - } - - if (inner->is_underflow() && - !(inner == root_ && inner->slotuse >= 1)) - { - // case: the inner node is the root and has just one child. that - // child becomes the new root - if (left_inner == nullptr && right_inner == nullptr) - { - TLX_BTREE_ASSERT(inner == root_); - TLX_BTREE_ASSERT(inner->slotuse == 0); - - root_ = inner->childid[0]; - - inner->slotuse = 0; - free_node(inner); - - return btree_ok; - } - // case : if both left and right leaves would underflow in case - // of a shift, then merging is necessary. choose the more local - // merger with our parent - else if ((left_inner == nullptr || left_inner->is_few()) && - (right_inner == nullptr || right_inner->is_few())) - { - if (left_parent == parent) - myres |= merge_inner( - left_inner, inner, left_parent, parentslot - 1); - else - myres |= merge_inner( - inner, right_inner, right_parent, parentslot); - } - // case : the right leaf has extra data, so balance right with - // current - else if ((left_inner != nullptr && left_inner->is_few()) && - (right_inner != nullptr && !right_inner->is_few())) - { - if (right_parent == parent) - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - else - myres |= merge_inner( - left_inner, inner, left_parent, parentslot - 1); - } - // case : the left leaf has extra data, so balance left with - // current - else if ((left_inner != nullptr && !left_inner->is_few()) && - (right_inner != nullptr && right_inner->is_few())) - { - if (left_parent == parent) - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - else - myres |= merge_inner( - inner, right_inner, right_parent, parentslot); - } - // case : both the leaf and right leaves have extra data and our - // parent, choose the leaf with more data - else if (left_parent == right_parent) - { - if (left_inner->slotuse <= right_inner->slotuse) - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - else - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - } - else - { - if (left_parent == parent) - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - else - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - } - } - - return myres; - } - } - - /*! - * Erase one key/data pair referenced by an iterator in the B+ tree. - * - * Descends down the tree in search of an iterator. During the descent the - * parent, left and right siblings and their parents are computed and passed - * down. The difficulty is that the iterator contains only a pointer to a - * LeafNode, which means that this function must do a recursive depth first - * search for that leaf node in the subtree containing all pairs of the same - * key. This subtree can be very large, even the whole tree, though in - * practice it would not make sense to have so many duplicate keys. - * - * Once the referenced key/data pair is found, it is removed from the leaf - * and the same underflow cases are handled as in erase_one_descend. - */ - result_t erase_iter_descend(const iterator& iter, - node* curr, - node* left, node* right, - InnerNode* left_parent, InnerNode* right_parent, - InnerNode* parent, unsigned int parentslot, double& carry_weight) { - if (curr->is_leafnode()) - { - LeafNode* leaf = static_cast<LeafNode*>(curr); - LeafNode* left_leaf = static_cast<LeafNode*>(left); - LeafNode* right_leaf = static_cast<LeafNode*>(right); - - // if this is not the correct leaf, get next step in recursive - // search - if (leaf != iter.curr_leaf) - { - return btree_not_found; - } - - if (iter.curr_slot >= leaf->slotuse) - { - TLX_BTREE_PRINT("Could not find iterator (" << - iter.curr_leaf << "," << iter.curr_slot << - ") to erase. Invalid leaf node?"); - - return btree_not_found; - } - - unsigned short slot = iter.curr_slot; - - TLX_BTREE_PRINT("Found iterator in leaf " << - curr << " at slot " << slot); - carry_weight = leaf->weight[slot]; - - std::copy(leaf->slotdata + slot + 1, leaf->slotdata + leaf->slotuse, - leaf->slotdata + slot); - std::copy(leaf->weight + slot + 1, leaf->weight + leaf->slotuse, - leaf->weight + slot); - - leaf->slotuse--; - - result_t myres = btree_ok; - - // if the last key of the leaf was changed, the parent is notified - // and updates the key of this leaf - if (slot == leaf->slotuse) - { - if (parent && parentslot < parent->slotuse) - { - TLX_BTREE_ASSERT(parent->childid[parentslot] == curr); - parent->slotkey[parentslot] = leaf->key(leaf->slotuse - 1); - } - else - { - if (leaf->slotuse >= 1) - { - TLX_BTREE_PRINT("Scheduling lastkeyupdate: key " << - leaf->key(leaf->slotuse - 1)); - myres |= result_t( - btree_update_lastkey, leaf->key(leaf->slotuse - 1)); - } - else - { - TLX_BTREE_ASSERT(leaf == root_); - } - } - } - - if (leaf->is_underflow() && !(leaf == root_ && leaf->slotuse >= 1)) - { - // determine what to do about the underflow - - // case : if this empty leaf is the root, then delete all nodes - // and set root to nullptr. - if (left_leaf == nullptr && right_leaf == nullptr) - { - TLX_BTREE_ASSERT(leaf == root_); - TLX_BTREE_ASSERT(leaf->slotuse == 0); - - free_node(root_); - - root_ = leaf = nullptr; - head_leaf_ = tail_leaf_ = nullptr; - - // will be decremented soon by insert_start() - TLX_BTREE_ASSERT(stats_.size == 1); - TLX_BTREE_ASSERT(stats_.leaves == 0); - TLX_BTREE_ASSERT(stats_.inner_nodes == 0); - - return btree_ok; - } - // case : if both left and right leaves would underflow in case - // of a shift, then merging is necessary. choose the more local - // merger with our parent - else if ((left_leaf == nullptr || left_leaf->is_few()) && - (right_leaf == nullptr || right_leaf->is_few())) - { - if (left_parent == parent) - myres |= merge_leaves(left_leaf, leaf, left_parent); - else - myres |= merge_leaves(leaf, right_leaf, right_parent); - } - // case : the right leaf has extra data, so balance right with - // current - else if ((left_leaf != nullptr && left_leaf->is_few()) && - (right_leaf != nullptr && !right_leaf->is_few())) - { - if (right_parent == parent) { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - else { - myres |= merge_leaves(left_leaf, leaf, left_parent); - } - } - // case : the left leaf has extra data, so balance left with - // current - else if ((left_leaf != nullptr && !left_leaf->is_few()) && - (right_leaf != nullptr && right_leaf->is_few())) - { - if (left_parent == parent) { - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - } - else { - myres |= merge_leaves(leaf, right_leaf, right_parent); - } - } - // case : both the leaf and right leaves have extra data and our - // parent, choose the leaf with more data - else if (left_parent == right_parent) - { - if (left_leaf->slotuse <= right_leaf->slotuse) { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - else { - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - } - } - else - { - if (left_parent == parent) { - myres |= shift_right_leaf( - left_leaf, leaf, left_parent, parentslot - 1); - } - else { - myres |= shift_left_leaf( - leaf, right_leaf, right_parent, parentslot); - myres |= btree_shift; - } - } - } - - return myres; - } - else // !curr->is_leafnode() - { - InnerNode* inner = static_cast<InnerNode*>(curr); - InnerNode* left_inner = static_cast<InnerNode*>(left); - InnerNode* right_inner = static_cast<InnerNode*>(right); - - // find first slot below which the searched iterator might be - // located. - - result_t result; - unsigned short slot = find_lower(inner, iter.key()); - - while (slot <= inner->slotuse) - { - node* myleft, * myright; - InnerNode* myleft_parent, * myright_parent; - - if (slot == 0) { - myleft = (left == nullptr) ? nullptr - : static_cast<InnerNode*>(left)->childid[ - left->slotuse - 1]; - myleft_parent = left_parent; - } - else { - myleft = inner->childid[slot - 1]; - myleft_parent = inner; - } - - if (slot == inner->slotuse) { - myright = (right == nullptr) ? nullptr - : static_cast<InnerNode*>(right)->childid[0]; - myright_parent = right_parent; - } - else { - myright = inner->childid[slot + 1]; - myright_parent = inner; - } - - TLX_BTREE_PRINT("erase_iter_descend into " << - inner->childid[slot]); - - result = erase_iter_descend(iter, - inner->childid[slot], - myleft, myright, - myleft_parent, myright_parent, - inner, slot, carry_weight); - - if (!result.has(btree_not_found)) - break; - - // continue recursive search for leaf on next slot - - if (slot < inner->slotuse && - key_less(inner->slotkey[slot], iter.key())) - return btree_not_found; - - ++slot; - } - - if (slot > inner->slotuse) - return btree_not_found; - - result_t myres = btree_ok; - - if (result.has(btree_update_lastkey)) - { - if (parent && parentslot < parent->slotuse) - { - TLX_BTREE_PRINT("Fixing lastkeyupdate: key " << - result.lastkey << " into parent " << - parent << " at parentslot " << parentslot); - - TLX_BTREE_ASSERT(parent->childid[parentslot] == curr); - parent->slotkey[parentslot] = result.lastkey; - } - else - { - TLX_BTREE_PRINT( - "Forwarding lastkeyupdate: key " << result.lastkey); - myres |= result_t(btree_update_lastkey, result.lastkey); - } - } - - if (result.has(btree_fixmerge)) - { - // either the current node or the next is empty and should be - // removed - if (inner->childid[slot]->slotuse != 0) - slot++; - - // this is the child slot invalidated by the merge - TLX_BTREE_ASSERT(inner->childid[slot]->slotuse == 0); - - free_node(inner->childid[slot]); - - std::copy( - inner->slotkey + slot, inner->slotkey + inner->slotuse, - inner->slotkey + slot - 1); - std::copy( - inner->childid + slot + 1, - inner->childid + inner->slotuse + 1, - inner->childid + slot); - std::copy( - inner->weight + slot + 1, - inner->weight + inner->slotuse + 1, - inner->weight + slot); - - inner->slotuse--; - - if (slot > 0) inner->weight[slot - 1] = calculate_weight(inner->childid[slot - 1]); - if (slot <= inner->slotuse) inner->weight[slot] = calculate_weight(inner->childid[slot]); - - if (inner->level == 1) - { - // fix split key for children leaves - slot--; - LeafNode* child = - static_cast<LeafNode*>(inner->childid[slot]); - inner->slotkey[slot] = child->key(child->slotuse - 1); - } - } else { - //--inner->weight[slot]; - inner->weight[slot] -= carry_weight; - } - - if (inner->is_underflow() && - !(inner == root_ && inner->slotuse >= 1)) - { - // case: the inner node is the root and has just one - // child. that child becomes the new root - if (left_inner == nullptr && right_inner == nullptr) - { - TLX_BTREE_ASSERT(inner == root_); - TLX_BTREE_ASSERT(inner->slotuse == 0); - - root_ = inner->childid[0]; - - inner->slotuse = 0; - free_node(inner); - - return btree_ok; - } - // case : if both left and right leaves would underflow in case - // of a shift, then merging is necessary. choose the more local - // merger with our parent - else if ((left_inner == nullptr || left_inner->is_few()) && - (right_inner == nullptr || right_inner->is_few())) - { - if (left_parent == parent) { - myres |= merge_inner( - left_inner, inner, left_parent, parentslot - 1); - } - else { - myres |= merge_inner( - inner, right_inner, right_parent, parentslot); - } - } - // case : the right leaf has extra data, so balance right with - // current - else if ((left_inner != nullptr && left_inner->is_few()) && - (right_inner != nullptr && !right_inner->is_few())) - { - if (right_parent == parent) { - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - } - else { - myres |= merge_inner( - left_inner, inner, left_parent, parentslot - 1); - } - } - // case : the left leaf has extra data, so balance left with - // current - else if ((left_inner != nullptr && !left_inner->is_few()) && - (right_inner != nullptr && right_inner->is_few())) - { - if (left_parent == parent) { - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - } - else { - myres |= merge_inner( - inner, right_inner, right_parent, parentslot); - } - } - // case : both the leaf and right leaves have extra data and our - // parent, choose the leaf with more data - else if (left_parent == right_parent) - { - if (left_inner->slotuse <= right_inner->slotuse) { - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - } - else { - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - } - } - else - { - if (left_parent == parent) { - myres |= shift_right_inner( - left_inner, inner, left_parent, parentslot - 1); - } - else { - myres |= shift_left_inner( - inner, right_inner, right_parent, parentslot); - } - } - } - - return myres; - } - } - - //! Merge two leaf nodes. The function moves all key/data pairs from right - //! to left and sets right's slotuse to zero. The right slot is then removed - //! by the calling parent node. - result_t merge_leaves(LeafNode* left, LeafNode* right, - InnerNode* parent) { - TLX_BTREE_PRINT("Merge leaf nodes " << left << " and " << right << - " with common parent " << parent << "."); - (void)parent; - - TLX_BTREE_ASSERT(left->is_leafnode() && right->is_leafnode()); - TLX_BTREE_ASSERT(parent->level == 1); - - TLX_BTREE_ASSERT(left->slotuse + right->slotuse < leaf_slotmax); - - std::copy(right->slotdata, right->slotdata + right->slotuse, - left->slotdata + left->slotuse); - std::copy(right->weight, right->weight + right->slotuse, - left->weight + left->slotuse); - - left->slotuse += right->slotuse; - - left->next_leaf = right->next_leaf; - if (left->next_leaf) - left->next_leaf->prev_leaf = left; - else - tail_leaf_ = left; - - right->slotuse = 0; - - return btree_fixmerge; - } - - //! Merge two inner nodes. The function moves all key/childid pairs from - //! right to left and sets right's slotuse to zero. The right slot is then - //! removed by the calling parent node. - static result_t merge_inner(InnerNode* left, InnerNode* right, - InnerNode* parent, unsigned int parentslot) { - TLX_BTREE_PRINT("Merge inner nodes " << left << " and " << right << - " with common parent " << parent << "."); - - TLX_BTREE_ASSERT(left->level == right->level); - TLX_BTREE_ASSERT(parent->level == left->level + 1); - - TLX_BTREE_ASSERT(parent->childid[parentslot] == left); - - TLX_BTREE_ASSERT(left->slotuse + right->slotuse < inner_slotmax); - - if (self_verify) - { - // find the left node's slot in the parent's children - unsigned int leftslot = 0; - while (leftslot <= parent->slotuse && - parent->childid[leftslot] != left) - ++leftslot; - - TLX_BTREE_ASSERT(leftslot < parent->slotuse); - TLX_BTREE_ASSERT(parent->childid[leftslot] == left); - TLX_BTREE_ASSERT(parent->childid[leftslot + 1] == right); - - TLX_BTREE_ASSERT(parentslot == leftslot); - } - - // retrieve the decision key from parent - left->slotkey[left->slotuse] = parent->slotkey[parentslot]; - left->slotuse++; - - // copy over keys and children from right - std::copy(right->slotkey, right->slotkey + right->slotuse, - left->slotkey + left->slotuse); - std::copy(right->childid, right->childid + right->slotuse + 1, - left->childid + left->slotuse); - std::copy(right->weight, right->weight + right->slotuse + 1, - left->weight + left->slotuse); - - left->slotuse += right->slotuse; - right->slotuse = 0; - - return btree_fixmerge; - } - - //! Balance two leaf nodes. The function moves key/data pairs from right to - //! left so that both nodes are equally filled. The parent node is updated - //! if possible. - static result_t shift_left_leaf( - LeafNode* left, LeafNode* right, - InnerNode* parent, unsigned int parentslot) { - - TLX_BTREE_ASSERT(left->is_leafnode() && right->is_leafnode()); - TLX_BTREE_ASSERT(parent->level == 1); - - TLX_BTREE_ASSERT(left->next_leaf == right); - TLX_BTREE_ASSERT(left == right->prev_leaf); - - TLX_BTREE_ASSERT(left->slotuse < right->slotuse); - TLX_BTREE_ASSERT(parent->childid[parentslot] == left); - - unsigned int shiftnum = (right->slotuse - left->slotuse) >> 1; - - TLX_BTREE_PRINT("Shifting (leaf) " << shiftnum << " entries to left " << - left << " from right " << right << - " with common parent " << parent << "."); - - TLX_BTREE_ASSERT(left->slotuse + shiftnum < leaf_slotmax); - - // copy the first items from the right node to the last slot in the left - // node. - - std::copy(right->slotdata, right->slotdata + shiftnum, - left->slotdata + left->slotuse); - std::copy(right->weight, right->weight + shiftnum, - left->weight + left->slotuse); - - left->slotuse += shiftnum; - - // shift all slots in the right node to the left - - std::copy(right->slotdata + shiftnum, right->slotdata + right->slotuse, - right->slotdata); - std::copy(right->weight + shiftnum, right->weight + right->slotuse, - right->weight); - - right->slotuse -= shiftnum; - - parent->weight[parentslot] = calculate_weight(left); - parent->weight[parentslot + 1] = calculate_weight(right); - - // fixup parent - result_t res; - if (parentslot < parent->slotuse) { - parent->slotkey[parentslot] = left->key(left->slotuse - 1); - return btree_ok; - } - else { // the update is further up the tree - return result_t(btree_update_lastkey, left->key(left->slotuse - 1)); - } - } - - //! Balance two inner nodes. The function moves key/data pairs from right to - //! left so that both nodes are equally filled. The parent node is updated - //! if possible. - static result_t shift_left_inner(InnerNode* left, InnerNode* right, - InnerNode* parent, unsigned int parentslot) { - TLX_BTREE_ASSERT(left->level == right->level); - TLX_BTREE_ASSERT(parent->level == left->level + 1); - - TLX_BTREE_ASSERT(left->slotuse < right->slotuse); - TLX_BTREE_ASSERT(parent->childid[parentslot] == left); - - unsigned int shiftnum = (right->slotuse - left->slotuse) >> 1; - - TLX_BTREE_PRINT("Shifting (inner) " << shiftnum << - " entries to left " << left << - " from right " << right << - " with common parent " << parent << "."); - - TLX_BTREE_ASSERT(left->slotuse + shiftnum < inner_slotmax); - - if (self_verify) - { - // find the left node's slot in the parent's children and compare to - // parentslot - - unsigned int leftslot = 0; - while (leftslot <= parent->slotuse && - parent->childid[leftslot] != left) - ++leftslot; - - TLX_BTREE_ASSERT(leftslot < parent->slotuse); - TLX_BTREE_ASSERT(parent->childid[leftslot] == left); - TLX_BTREE_ASSERT(parent->childid[leftslot + 1] == right); - - TLX_BTREE_ASSERT(leftslot == parentslot); - } - - // copy the parent's decision slotkey and childid to the first new key - // on the left - left->slotkey[left->slotuse] = parent->slotkey[parentslot]; - left->slotuse++; - - // copy the other items from the right node to the last slots in the - // left node. - std::copy(right->slotkey, right->slotkey + shiftnum - 1, - left->slotkey + left->slotuse); - std::copy(right->childid, right->childid + shiftnum, - left->childid + left->slotuse); - std::copy(right->weight, right->weight + shiftnum, - left->weight + left->slotuse); - - left->slotuse += shiftnum - 1; - - // fixup parent - parent->slotkey[parentslot] = right->slotkey[shiftnum - 1]; - - // shift all slots in the right node - std::copy( - right->slotkey + shiftnum, right->slotkey + right->slotuse, - right->slotkey); - std::copy( - right->childid + shiftnum, right->childid + right->slotuse + 1, - right->childid); - std::copy( - right->weight + shiftnum, right->weight + right->slotuse + 1, - right->weight); - - right->slotuse -= shiftnum; - - parent->weight[parentslot] = calculate_weight(left); - parent->weight[parentslot + 1] = calculate_weight(right); - - return btree_shift; - } - - //! Balance two leaf nodes. The function moves key/data pairs from left to - //! right so that both nodes are equally filled. The parent node is updated - //! if possible. - static result_t shift_right_leaf(LeafNode* left, LeafNode* right, - InnerNode* parent, unsigned int parentslot) { - TLX_BTREE_ASSERT(left->is_leafnode() && right->is_leafnode()); - TLX_BTREE_ASSERT(parent->level == 1); - - TLX_BTREE_ASSERT(left->next_leaf == right); - TLX_BTREE_ASSERT(left == right->prev_leaf); - TLX_BTREE_ASSERT(parent->childid[parentslot] == left); - - TLX_BTREE_ASSERT(left->slotuse > right->slotuse); - - unsigned int shiftnum = (left->slotuse - right->slotuse) >> 1; - - TLX_BTREE_PRINT("Shifting (leaf) " << shiftnum << - " entries to right " << right << - " from left " << left << - " with common parent " << parent << "."); - - if (self_verify) - { - // find the left node's slot in the parent's children - unsigned int leftslot = 0; - while (leftslot <= parent->slotuse && - parent->childid[leftslot] != left) - ++leftslot; - - TLX_BTREE_ASSERT(leftslot < parent->slotuse); - TLX_BTREE_ASSERT(parent->childid[leftslot] == left); - TLX_BTREE_ASSERT(parent->childid[leftslot + 1] == right); - - TLX_BTREE_ASSERT(leftslot == parentslot); - } - - // shift all slots in the right node - - TLX_BTREE_ASSERT(right->slotuse + shiftnum < leaf_slotmax); - - std::copy_backward(right->slotdata, right->slotdata + right->slotuse, - right->slotdata + right->slotuse + shiftnum); - std::copy_backward(right->weight, right->weight + right->slotuse, - right->weight + right->slotuse + shiftnum); - - right->slotuse += shiftnum; - - // copy the last items from the left node to the first slot in the right - // node. - std::copy(left->slotdata + left->slotuse - shiftnum, - left->slotdata + left->slotuse, - right->slotdata); - std::copy(left->weight + left->slotuse - shiftnum, - left->weight + left->slotuse, - right->weight); - - left->slotuse -= shiftnum; - - parent->weight[parentslot] = calculate_weight(left); - parent->weight[parentslot + 1] = calculate_weight(right); - - parent->slotkey[parentslot] = left->key(left->slotuse - 1); - - return btree_shift; - } - - //! Balance two inner nodes. The function moves key/data pairs from left to - //! right so that both nodes are equally filled. The parent node is updated - //! if possible. - static result_t shift_right_inner(InnerNode* left, InnerNode* right, - InnerNode* parent, unsigned int parentslot) { - TLX_BTREE_ASSERT(left->level == right->level); - TLX_BTREE_ASSERT(parent->level == left->level + 1); - - TLX_BTREE_ASSERT(left->slotuse > right->slotuse); - TLX_BTREE_ASSERT(parent->childid[parentslot] == left); - - unsigned int shiftnum = (left->slotuse - right->slotuse) >> 1; - - TLX_BTREE_PRINT("Shifting (inner) " << shiftnum << - " entries to right " << right << - " from left " << left << - " with common parent " << parent << "."); - if (self_verify) - { - // find the left node's slot in the parent's children - unsigned int leftslot = 0; - while (leftslot <= parent->slotuse && - parent->childid[leftslot] != left) - ++leftslot; - - TLX_BTREE_ASSERT(leftslot < parent->slotuse); - TLX_BTREE_ASSERT(parent->childid[leftslot] == left); - TLX_BTREE_ASSERT(parent->childid[leftslot + 1] == right); - - TLX_BTREE_ASSERT(leftslot == parentslot); - } - - // shift all slots in the right node - - TLX_BTREE_ASSERT(right->slotuse + shiftnum < inner_slotmax); - - std::copy_backward( - right->slotkey, right->slotkey + right->slotuse, - right->slotkey + right->slotuse + shiftnum); - std::copy_backward( - right->childid, right->childid + right->slotuse + 1, - right->childid + right->slotuse + 1 + shiftnum); - std::copy_backward( - right->weight, right->weight + right->slotuse + 1, - right->weight + right->slotuse + 1 + shiftnum); - - right->slotuse += shiftnum; - - // copy the parent's decision slotkey and childid to the last new key on - // the right - right->slotkey[shiftnum - 1] = parent->slotkey[parentslot]; - - // copy the remaining last items from the left node to the first slot in - // the right node. - std::copy(left->slotkey + left->slotuse - shiftnum + 1, - left->slotkey + left->slotuse, - right->slotkey); - std::copy(left->childid + left->slotuse - shiftnum + 1, - left->childid + left->slotuse + 1, - right->childid); - std::copy(left->weight + left->slotuse - shiftnum + 1, - left->weight + left->slotuse + 1, - right->weight); - - // copy the first to-be-removed key from the left node to the parent's - // decision slot - parent->slotkey[parentslot] = left->slotkey[left->slotuse - shiftnum]; - - left->slotuse -= shiftnum; - - parent->weight[parentslot] = calculate_weight(left); - parent->weight[parentslot + 1] = calculate_weight(right); - - return btree_shift; - } - - //! \} - -#ifdef TLX_BTREE_DEBUG - -public: - //! \name Debug Printing - //! \{ - - //! Print out the B+ tree structure with keys onto the given ostream. This - //! function requires that the header is compiled with TLX_BTREE_DEBUG and - //! that key_type is printable via std::ostream. - void print(std::ostream& os) const { - if (root_) { - print_node(os, root_, 0, true); - } - } - - //! Print out only the leaves via the double linked list. - void print_leaves(std::ostream& os) const { - os << "leaves:" << std::endl; - - const LeafNode* n = head_leaf_; - - while (n) - { - os << " " << n << std::endl; - - n = n->next_leaf; - } - } - -private: - //! Recursively descend down the tree and print out nodes. - static void print_node(std::ostream& os, const node* node, - unsigned int depth = 0, bool recursive = false) { - for (unsigned int i = 0; i < depth; i++) os << " "; - - os << "node " << node << " level " << node->level << - " slotuse " << node->slotuse << std::endl; - - if (node->is_leafnode()) - { - const LeafNode* leafnode = static_cast<const LeafNode*>(node); - - for (unsigned int i = 0; i < depth; i++) os << " "; - os << " leaf prev " << leafnode->prev_leaf << - " next " << leafnode->next_leaf << std::endl; - - for (unsigned int i = 0; i < depth; i++) os << " "; - - for (unsigned short slot = 0; slot < leafnode->slotuse; ++slot) - { - // os << leafnode->key(slot) << " " - // << "(data: " << leafnode->slotdata[slot] << ") "; - os << leafnode->key(slot) << " "; - } - os << std::endl; - } - else - { - const InnerNode* innernode = static_cast<const InnerNode*>(node); - - for (unsigned int i = 0; i < depth; i++) os << " "; - - for (unsigned short slot = 0; slot < innernode->slotuse; ++slot) - { - os << "(" << innernode->childid[slot] << ") " - << innernode->slotkey[slot] << " "; - } - os << "(" << innernode->childid[innernode->slotuse] << ")" - << std::endl; - - if (recursive) - { - for (unsigned short slot = 0; - slot < innernode->slotuse + 1; ++slot) - { - print_node( - os, innernode->childid[slot], depth + 1, recursive); - } - } - } - } - - //! \} -#endif - -public: - //! \name Verification of B+ Tree Invariants - //! \{ - - //! Run a thorough verification of all B+ tree invariants. The program - //! aborts via assert() if something is wrong. - void verify() const { - key_type minkey, maxkey; - tree_stats vstats; - - if (root_) - { - verify_node(root_, &minkey, &maxkey, vstats); - - assert(vstats.size == stats_.size); - assert(vstats.leaves == stats_.leaves); - assert(vstats.inner_nodes == stats_.inner_nodes); - - verify_leaflinks(); - } - } - -private: - //! Recursively descend down the tree and verify each node - void verify_node(const node* n, key_type* minkey, key_type* maxkey, - tree_stats& vstats) const { - TLX_BTREE_PRINT("verifynode " << n); - - if (n->is_leafnode()) - { - const LeafNode* leaf = static_cast<const LeafNode*>(n); - - assert(leaf == root_ || !leaf->is_underflow()); - assert(leaf->slotuse > 0); - - for (unsigned short slot = 0; slot < leaf->slotuse - 1; ++slot) - { - assert( - key_lessequal(leaf->key(slot), leaf->key(slot + 1))); - } - - *minkey = leaf->key(0); - *maxkey = leaf->key(leaf->slotuse - 1); - - vstats.leaves++; - vstats.size += leaf->slotuse; - } - else // !n->is_leafnode() - { - const InnerNode* inner = static_cast<const InnerNode*>(n); - vstats.inner_nodes++; - - assert(inner == root_ || !inner->is_underflow()); - assert(inner->slotuse > 0); - - for (unsigned short slot = 0; slot < inner->slotuse - 1; ++slot) - { - assert( - key_lessequal(inner->key(slot), inner->key(slot + 1))); - } - - for (unsigned short slot = 0; slot <= inner->slotuse; ++slot) - { - const node* subnode = inner->childid[slot]; - key_type subminkey = key_type(); - key_type submaxkey = key_type(); - - assert(subnode->level + 1 == inner->level); - verify_node(subnode, &subminkey, &submaxkey, vstats); - - TLX_BTREE_PRINT("verify subnode " << subnode << - ": " << subminkey << - " - " << submaxkey); - - if (slot == 0) - *minkey = subminkey; - else - assert( - key_greaterequal(subminkey, inner->key(slot - 1))); - - if (slot == inner->slotuse) - *maxkey = submaxkey; - else - assert(key_equal(inner->key(slot), submaxkey)); - - if (inner->level == 1 && slot < inner->slotuse) - { - // children are leaves and must be linked together in the - // correct order - const LeafNode* leafa = static_cast<const LeafNode*>( - inner->childid[slot]); - const LeafNode* leafb = static_cast<const LeafNode*>( - inner->childid[slot + 1]); - - assert(leafa->next_leaf == leafb); - assert(leafa == leafb->prev_leaf); - } - if (inner->level == 2 && slot < inner->slotuse) - { - // verify leaf links between the adjacent inner nodes - const InnerNode* parenta = static_cast<const InnerNode*>( - inner->childid[slot]); - const InnerNode* parentb = static_cast<const InnerNode*>( - inner->childid[slot + 1]); - - const LeafNode* leafa = static_cast<const LeafNode*>( - parenta->childid[parenta->slotuse]); - const LeafNode* leafb = static_cast<const LeafNode*>( - parentb->childid[0]); - - assert(leafa->next_leaf == leafb); - assert(leafa == leafb->prev_leaf); - } - - assert(inner->weight[slot] == calculate_weight(inner->childid[slot])); - } - } - } - - //! Verify the double linked list of leaves. - void verify_leaflinks() const { - const LeafNode* n = head_leaf_; - - assert(n->level == 0); - assert(!n || n->prev_leaf == nullptr); - - unsigned int testcount = 0; - - while (n) - { - assert(n->level == 0); - assert(n->slotuse > 0); - - for (unsigned short slot = 0; slot < n->slotuse - 1; ++slot) - { - assert(key_lessequal(n->key(slot), n->key(slot + 1))); - } - - testcount += n->slotuse; - - if (n->next_leaf) - { - assert(key_lessequal(n->key(n->slotuse - 1), - n->next_leaf->key(0))); - - assert(n == n->next_leaf->prev_leaf); - } - else - { - assert(tail_leaf_ == n); - } - - n = n->next_leaf; - } - - assert(testcount == size()); - } - - //! \} -}; - -//! \} -//! \} - -} // namespace tlx - -#endif // !TLX_CONTAINER_BTREE_HEADER - -/******************************************************************************/ diff --git a/include/ds/BitArray.h b/include/ds/BitArray.h deleted file mode 100644 index 586561b..0000000 --- a/include/ds/BitArray.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * include/ds/BitArray.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <cstdlib> -#include <memory> -#include <cstring> - -#include "util/base.h" - -namespace de { - -class BitArray { -public: - BitArray(size_t bits): m_bits(bits), m_data(nullptr) { - if (m_bits > 0) { - size_t n_bytes = (m_bits >> 3) << 3; - m_data = (char*) std::aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(n_bytes)); - memset(m_data, 0, n_bytes); - } - } - - ~BitArray() { - if (m_data) free(m_data); - } - - bool is_set(size_t bit) { - if (bit >= m_bits) return false; - return m_data[bit >> 3] & (1 << (bit & 7)); - } - - int set(size_t bit) { - if (bit >= m_bits) return 0; - m_data[bit >> 3] |= ((char) 1 << (bit & 7)); - return 1; - } - - int unset(size_t bit) { - if (bit >= m_bits) return 0; - m_data[bit >> 3] &= ~((char) 1 << (bit & 7)); - return 1; - } - - void clear() { - memset(m_data, 0, (m_bits >> 3) << 3); - } - - size_t mem_size() { - return m_bits >> 3; - } - - size_t size() { - return m_bits; - } - -private: - size_t m_bits; - char* m_data; -}; - -} diff --git a/include/ds/BloomFilter.h b/include/ds/BloomFilter.h deleted file mode 100644 index f7e1c08..0000000 --- a/include/ds/BloomFilter.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * include/ds/BloomFilter.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <cmath> -#include <gsl/gsl_rng.h> - -#include "ds/BitArray.h" -#include "util/hash.h" - -namespace de { - -template <typename K> -class BloomFilter { -public: - BloomFilter(size_t n_bits, size_t k) - : m_n_bits(n_bits), m_n_salts(k), m_bitarray(n_bits) { - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - salt = (uint16_t*) aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(k * sizeof(uint16_t))); - for (size_t i = 0; i < k; ++i) { - salt[i] = (uint16_t) gsl_rng_uniform_int(rng, 1 << 16); - } - - gsl_rng_free(rng); - } - - BloomFilter(double max_fpr, size_t n, size_t k) - : BloomFilter((size_t)(-(double) (k * n) / std::log(1.0 - std::pow(max_fpr, 1.0 / k))), k) {} - - ~BloomFilter() { - if (salt) free(salt); - } - - int insert(const K& key, size_t sz = sizeof(K)) { - if (m_bitarray.size() == 0) return 0; - - for (size_t i = 0; i < m_n_salts; ++i) { - m_bitarray.set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits); - } - - return 1; - } - - bool lookup(const K& key, size_t sz = sizeof(K)) { - if (m_bitarray.size() == 0) return false; - for (size_t i = 0; i < m_n_salts; ++i) { - if (!m_bitarray.is_set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits)) - return false; - } - - return true; - } - - void clear() { - m_bitarray.clear(); - } - - size_t get_memory_usage() { - return this->m_bitarray.mem_size(); - } -private: - size_t m_n_salts; - size_t m_n_bits; - uint16_t* salt; - - BitArray m_bitarray; -}; - -} diff --git a/include/ds/PriorityQueue.h b/include/ds/PriorityQueue.h deleted file mode 100644 index 4612eef..0000000 --- a/include/ds/PriorityQueue.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * include/ds/BloomFilter.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <vector> -#include <cassert> - -namespace de { - -template <typename R> -struct queue_record { - const R *data; - size_t version; -}; - -template <typename R> -class standard_minheap { -public: - standard_minheap(R *baseline) {} - inline bool operator()(const R* a, const R* b) { - return *a < *b; - } -}; - -template <typename R> -class standard_maxheap { -public: - standard_maxheap(R *baseline) {} - inline bool operator()(const R* a, const R* b) { - return *a > *b; - } -}; - -template <typename R, typename CMP=standard_minheap<R>> -class PriorityQueue { -public: - PriorityQueue(size_t size, R* cmp_baseline=nullptr) : data(size), tail(0), cmp(cmp_baseline) {} - - ~PriorityQueue() = default; - - size_t size() const { - return tail; - } - - void pop() { - assert(this->tail != 0); - - // If there is only one element, just decrement the - // tail. - if (this->size() == 1) { - this->tail--; - return; - } - - swap(0, --this->tail); - - ssize_t idx = 0; - ssize_t child_idx; - - while ((child_idx = min_child(idx)) != -1 && heap_cmp(child_idx, idx)) { - swap(idx, child_idx); - idx = child_idx; - } - } - - void push(const R* record, size_t version=0) { - assert(tail != this->data.size()); - - size_t new_idx = this->tail++; - this->data[new_idx] = {record, version}; - - while (new_idx != 0 && !heap_cmp(parent(new_idx), new_idx)) { - swap(parent(new_idx), new_idx); - new_idx = parent(new_idx); - } - } - - - queue_record<R> peek(size_t depth=0) { - ssize_t idx = 0; - size_t cur_depth = 0; - - while (cur_depth != depth) { - idx = min_child(idx); - assert(idx != -1); - cur_depth++; - } - - return this->data[idx]; - } - -private: - std::vector<queue_record<R>> data; - CMP cmp; - size_t tail; - R *baseline; - - /* - * Swap the elements at position a and position - * b within the heap - */ - inline void swap(size_t a, size_t b) { - if (a == b) return; - - queue_record temp = this->data[a]; - this->data[a] = this->data[b]; - this->data[b] = temp; - } - - inline size_t left_child(size_t idx) { - return 2 * idx + 1; - } - - inline size_t right_child(size_t idx) { - return 2 * idx + 2; - } - - inline size_t parent(size_t idx) { - return (idx - 1) / 2; - } - - inline ssize_t min_child(size_t idx) { - ssize_t left = left_child(idx) < tail ? left_child(idx) : -1; - ssize_t right = right_child(idx) < tail ? right_child(idx) : -1; - - if (left == -1 && right == -1) { - return -1; - } else if (left == -1) { - return right; - } else if (right == -1) { - return left; - } - - return (heap_cmp(left, right)) ? left : right; - } - - inline bool heap_cmp(size_t a, size_t b) { - if (data[a].data != data[b].data) { - return cmp(data[a].data, data[b].data); - } else if (data[a].version != data[b].version) { - return data[a].version < data[b].version; - } - - constexpr bool has_tombstone = requires(const R &r) { - r.is_tombstone(); - }; - - if constexpr (has_tombstone) { - return data[a].data->is_tombstone() && data[b].data->is_tombstone(); - } else { - return false; - } - } -}; -} diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h index 5c903b9..524024b 100644 --- a/include/framework/DynamicExtension.h +++ b/include/framework/DynamicExtension.h @@ -21,8 +21,8 @@ #include "framework/RecordInterface.h" #include "shard/WIRS.h" -#include "ds/Alias.h" -#include "util/timer.h" +#include "psu-util/timer.h" +#include "psu-ds/Alias.h" namespace de { diff --git a/include/framework/MutableBuffer.h b/include/framework/MutableBuffer.h index 9eea5c8..b79fc02 100644 --- a/include/framework/MutableBuffer.h +++ b/include/framework/MutableBuffer.h @@ -16,15 +16,16 @@ #include <algorithm> #include <type_traits> -#include "util/base.h" +#include "psu-util/alignment.h" #include "util/bf_config.h" -#include "ds/BloomFilter.h" -#include "ds/Alias.h" -#include "util/timer.h" +#include "psu-ds/BloomFilter.h" +#include "psu-ds/Alias.h" +#include "psu-util/timer.h" #include "framework/RecordInterface.h" -namespace de { +using psudb::CACHELINE_SIZE; +namespace de { template <RecordInterface R> class MutableBuffer { @@ -37,7 +38,7 @@ public: m_data = (Wrapped<R>*) std::aligned_alloc(CACHELINE_SIZE, aligned_buffersize); m_tombstone_filter = nullptr; if (max_tombstone_cap > 0) { - m_tombstone_filter = new BloomFilter<R>(BF_FPR, max_tombstone_cap, BF_HASH_FUNCS); + m_tombstone_filter = new psudb::BloomFilter<R>(BF_FPR, max_tombstone_cap, BF_HASH_FUNCS); } } @@ -168,7 +169,7 @@ private: size_t m_tombstone_cap; Wrapped<R>* m_data; - BloomFilter<R>* m_tombstone_filter; + psudb::BloomFilter<R>* m_tombstone_filter; alignas(64) std::atomic<size_t> m_tombstonecnt; alignas(64) std::atomic<uint32_t> m_reccnt; diff --git a/include/framework/RecordInterface.h b/include/framework/RecordInterface.h index f0d9f5c..f78918c 100644 --- a/include/framework/RecordInterface.h +++ b/include/framework/RecordInterface.h @@ -13,8 +13,7 @@ #include <concepts> #include <cmath> -#include "util/base.h" -#include "util/hash.h" +#include "psu-util/hash.h" namespace de { @@ -208,7 +207,7 @@ struct EuclidPoint{ template<RecordInterface R> struct RecordHash { size_t operator()(R const &rec) const { - return hash_bytes((char *) &rec, sizeof(R)); + return psudb::hash_bytes((char *) &rec, sizeof(R)); } }; diff --git a/include/shard/Alex.h b/include/shard/Alex.h index be5222c..9f794dc 100644 --- a/include/shard/Alex.h +++ b/include/shard/Alex.h @@ -16,15 +16,21 @@ #include <concepts> #include "alex.h" -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/BloomFilter.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { template <RecordInterface R> diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h index 3000a6a..a220792 100644 --- a/include/shard/MemISAM.h +++ b/include/shard/MemISAM.h @@ -16,9 +16,15 @@ #include "framework/MutableBuffer.h" #include "util/bf_config.h" -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "util/timer.h" +#include "psu-util/timer.h" + +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; namespace de { @@ -364,7 +370,7 @@ private: // Members: sorted data, internal ISAM levels, reccnt; Wrapped<R>* m_data; - BloomFilter<R> *m_bf; + psudb::BloomFilter<R> *m_bf; InMemISAMNode* m_isam_nodes; InMemISAMNode* m_root; size_t m_reccnt; diff --git a/include/shard/PGM.h b/include/shard/PGM.h index af20594..2cd153e 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -16,15 +16,21 @@ #include <concepts> #include "pgm/pgm_index.hpp" -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/BloomFilter.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { template <RecordInterface R> diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 973b684..69fcfbc 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -16,15 +16,21 @@ #include <concepts> #include "ts/builder.h" -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/BloomFilter.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { template <RecordInterface R> diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h index 86f4ab7..8feec84 100644 --- a/include/shard/VPTree.h +++ b/include/shard/VPTree.h @@ -15,15 +15,21 @@ #include <concepts> #include <map> -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/BloomFilter.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { template <NDRecordInterface R> diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h index fafdef0..19d3eea 100644 --- a/include/shard/WIRS.h +++ b/include/shard/WIRS.h @@ -16,16 +16,22 @@ #include <memory> #include <concepts> -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/Alias.h" -#include "ds/BloomFilter.h" +#include "psu-ds/Alias.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { thread_local size_t wirs_cancelations = 0; diff --git a/include/shard/WSS.h b/include/shard/WSS.h index 435e799..c0af573 100644 --- a/include/shard/WSS.h +++ b/include/shard/WSS.h @@ -16,16 +16,22 @@ #include <memory> #include <concepts> -#include "ds/PriorityQueue.h" +#include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -#include "ds/Alias.h" -#include "ds/BloomFilter.h" +#include "psu-ds/Alias.h" +#include "psu-ds/BloomFilter.h" #include "util/bf_config.h" #include "framework/MutableBuffer.h" #include "framework/RecordInterface.h" #include "framework/ShardInterface.h" #include "framework/QueryInterface.h" +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::Alias; + namespace de { thread_local size_t wss_cancelations = 0; diff --git a/include/util/Cursor.h b/include/util/Cursor.h index 815458c..1b0b8ed 100644 --- a/include/util/Cursor.h +++ b/include/util/Cursor.h @@ -9,9 +9,11 @@ */ #pragma once -#include "util/base.h" #include "framework/RecordInterface.h" -#include "io/PagedFile.h" + +#include "psu-ds/BloomFilter.h" +#include "psu-ds/PriorityQueue.h" +#include "psu-util/alignment.h" namespace de { template<typename R> @@ -37,19 +39,13 @@ struct Cursor { * not be closed. */ template<typename R> -inline static bool advance_cursor(Cursor<R> &cur, PagedFileIterator *iter = nullptr) { +inline static bool advance_cursor(Cursor<R> &cur) { cur.ptr++; cur.cur_rec_idx++; if (cur.cur_rec_idx >= cur.rec_cnt) return false; if (cur.ptr >= cur.end) { - if (iter && iter->next()) { - cur.ptr = (R*)iter->get_item(); - cur.end = cur.ptr + (PAGE_SIZE / sizeof(R)); - return true; - } - return false; } return true; diff --git a/include/util/base.h b/include/util/base.h deleted file mode 100644 index 1729687..0000000 --- a/include/util/base.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * include/util/base.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <cstdlib> -#include <cstdint> -#include <cstddef> -#include <memory> - -namespace de { - -// The correct quantity for use in alignment of buffers to be -// compatible with O_DIRECT -const size_t SECTOR_SIZE = 512; - -// The standard sized block of data (in bytes) for use in IO -// operations. -const size_t PAGE_SIZE = 4096; - -// The size of a cacheline, for alignment purposes. -const size_t CACHELINE_SIZE = 64; - -// The largest representable PageNum. A given file cannot -// have more pages than this. -const size_t MAX_PAGE_COUNT = UINT32_MAX; - -// The largest representable FileId. The file manager cannot -// manage more files than this. -const size_t MAX_FILE_COUNT = UINT32_MAX; - -// The largest representable FrameId. No buffer can be defined with -// more frames than this. -const size_t MAX_FRAME_COUNT = UINT32_MAX; - -// The number of bytes of zeroes available in ZEROBUF. Will be -// a multiple of the parm::PAGE_SIZE. -constexpr size_t ZEROBUF_SIZE = 8 * PAGE_SIZE; - -// A large, preallocated, buffer of zeroes used for pre-allocation -// of pages in a file. -alignas(SECTOR_SIZE) const char ZEROBUF[ZEROBUF_SIZE] = {0}; - - -// alignment code taken from TacoDB (file: tdb_base.h) -template<class T> -constexpr T -TYPEALIGN(uint64_t ALIGNVAL, T LEN) { - return (((uint64_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uint64_t) ((ALIGNVAL) - 1))); -} - -#define SHORTALIGN(LEN) TYPEALIGN(2, (LEN)) -#define INTALIGN(LEN) TYPEALIGN(4, (LEN)) -#define LONGALIGN(LEN) TYPEALIGN(8, (LEN)) -#define DOUBLEALIGN(LEN) TYPEALIGN(8, (LEN)) -#define MAXALIGN(LEN) TYPEALIGN(8, (LEN)) -#define CACHELINEALIGN(LEN) TYPEALIGN(CACHELINE_SIZE, (LEN)) -#define MAXALIGN_OF 8 - -// Returns a pointer to the idx'th page contained within a multi-page -// buffer. buffer must be page aligned, and idx must be less than the -// number of pages within the buffer, or the result is undefined. -static inline char *get_page(char *buffer, size_t idx) { - return buffer + (idx * PAGE_SIZE); -} - -} diff --git a/include/util/bf_config.h b/include/util/bf_config.h index b4d68bc..2390643 100644 --- a/include/util/bf_config.h +++ b/include/util/bf_config.h @@ -9,7 +9,7 @@ */ #pragma once -#include "util/base.h" +#include "psu-util/alignment.h" namespace de { diff --git a/include/util/hash.h b/include/util/hash.h deleted file mode 100644 index 04871dc..0000000 --- a/include/util/hash.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * include/util/hash.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <cstdlib> -#include <cstdint> - -namespace de { - -// 40343 is a "magic constant" that works well, -// 38299 is another good value. -// Both are primes and have a good distribution of bits. -const uint64_t kHashMagicNum = 40343; - -inline uint64_t rotr64(uint64_t x, size_t n) { - return (((x) >> n) | ((x) << (64 - n))); -} - -inline uint64_t hash(uint64_t input) { - uint64_t local_rand = input; - uint64_t local_rand_hash = 8; - local_rand_hash = 40343 * local_rand_hash + ((local_rand) & 0xFFFF); - local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 16) & 0xFFFF); - local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 32) & 0xFFFF); - local_rand_hash = 40343 * local_rand_hash + (local_rand >> 48); - local_rand_hash = 40343 * local_rand_hash; - return rotr64(local_rand_hash, 43); -} - -inline uint64_t hash_bytes(const char* str, size_t len) { - uint64_t hashState = len; - - for(size_t idx = 0; idx < len; ++idx) { - hashState = kHashMagicNum * hashState + str[idx]; - } - - // The final scrambling helps with short keys that vary only on the high order bits. - // Low order bits are not always well distributed so shift them to the high end, where they'll - // form part of the 14-bit tag. - return rotr64(kHashMagicNum * hashState, 6); -} - -inline uint64_t hash_bytes_with_salt(const char* str, size_t len, uint16_t salt) { - uint64_t hashState = len; - - for(size_t idx = 0; idx < len; ++idx) { - hashState = kHashMagicNum * hashState + str[idx]; - } - - hashState = kHashMagicNum * hashState + salt; - - return rotr64(kHashMagicNum * hashState, 6); -} - -} diff --git a/include/util/timer.h b/include/util/timer.h deleted file mode 100644 index a9784a8..0000000 --- a/include/util/timer.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * include/util/timer.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <chrono> - -#ifdef ENABLE_TIMER -#define TIMER_INIT() \ - auto timer_start = std::chrono::high_resolution_clock::now(); \ - auto timer_stop = std::chrono::high_resolution_clock::now(); - -#define TIMER_START() \ - timer_start = std::chrono::high_resolution_clock::now() - -#define TIMER_STOP() \ - timer_stop = std::chrono::high_resolution_clock::now() - -#define TIMER_RESULT() \ - std::chrono::duration_cast<std::chrono::nanoseconds>(timer_stop - timer_start).count() - -#else - #define TIMER_INIT() \ - do {} while(0) - #define TIMER_START() \ - do {} while(0) - #define TIMER_STOP() \ - do {} while(0) - #define TIMER_RESULT() \ - 0l -#endif - diff --git a/include/util/types.h b/include/util/types.h index de5e379..3010e78 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -18,8 +18,6 @@ #include <cstddef> #include <string> -#include "util/base.h" - namespace de { using std::byte; |