diff options
| author | Douglas B. Rumbaugh <dbr4@psu.edu> | 2024-12-06 13:13:51 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-06 18:13:51 +0000 |
| commit | 9fe305c7d28e993e55c55427f377ae7e3251ea4f (patch) | |
| tree | 384b687f64b84eb81bde2becac8a5f24916b07b4 /include/shard/AugBTree.h | |
| parent | 47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc (diff) | |
| download | dynamic-extension-9fe305c7d28e993e55c55427f377ae7e3251ea4f.tar.gz | |
Interface update (#5)
* Query Interface Adjustments/Refactoring
Began the process of adjusting the query interface (and also the shard
interface, to a lesser degree) to better accommodate the user. In
particular the following changes have been made,
1. The number of necessary template arguments for the query type
has been drastically reduced, while also removing the void pointers
and manual delete functions from the interface.
This was accomplished by requiring many of the sub-types associated
with a query (parameters, etc.) to be nested inside the main query
class, and by forcing the SHARD type to expose its associated
record type.
2. User-defined query return types are now supported.
Queries no longer are required to return strictly sets of records.
Instead, the query now has LocalResultType and ResultType
template parameters (which can be defaulted using a typedef in
the Query type itself), allowing much more flexibility.
Note that, at least for the short term, the LocalResultType must
still expose the same is_deleted/is_tombstone interface as a
Wrapped<R> used to, as this is currently needed for delete
filtering. A better approach to this is, hopefully, forthcoming.
3. Updated the ISAMTree.h shard and rangequery.h query to use the
new interfaces, and adjusted the associated unit tests as well.
4. Dropped the unnecessary "get_data()" function from the ShardInterface
concept.
5. Dropped the need to specify a record type in the ShardInterface
concept. This is now handled using a required Shard::RECORD
member of the Shard class itself, which should expose the name
of the record type.
* Updates to framework to support new Query/Shard interfaces
Pretty extensive adjustments to the framework, particularly to the
templates themselves, along with some type-renaming work, to support
the new query and shard interfaces.
Adjusted the external query interface to take an rvalue reference, rather
than a pointer, to the query parameters.
* Removed framework-level delete filtering
This was causing some issues with the new query interface, and should
probably be reworked anyway, so I'm temporarily (TM) removing the
feature.
* Updated benchmarks + remaining code for new interface
Diffstat (limited to 'include/shard/AugBTree.h')
| -rw-r--r-- | include/shard/AugBTree.h | 311 |
1 files changed, 0 insertions, 311 deletions
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h deleted file mode 100644 index c60cbcd..0000000 --- a/include/shard/AugBTree.h +++ /dev/null @@ -1,311 +0,0 @@ -/* - * include/shard/AugBTree.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * Douglas B. Rumbaugh <drumbaugh@psu.edu> - * - * Distributed under the Modified BSD License. - * - * A shard shim around the alias augmented B-tree. Designed to be - * used along side the WIRS query in include/query/wirs.h, but - * also supports the necessary methods for other common query - * types. - * - * TODO: The code in this file is very poorly commented. - */ -#pragma once - - -#include <vector> -#include <cassert> - -#include "framework/ShardRequirements.h" - -#include "psu-ds/Alias.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "util/SortedMerge.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::Alias; -using psudb::byte; - -namespace de { - -template <WeightedRecordInterface R> -struct AugBTreeNode { - struct AugBTreeNode<R> *left, *right; - decltype(R::key) low, high; - decltype(R::weight) weight; - Alias* alias; -}; - -template <WeightedRecordInterface R> -class AugBTree { -private: - typedef decltype(R::key) K; - typedef decltype(R::value) V; - typedef decltype(R::weight) W; - -public: - AugBTree(BufferView<R> buffer) - : m_data(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_group_size(0) - , m_alloc_size(0) - , m_node_cnt(0) - , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) - { - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - buffer.get_record_count() * - sizeof(Wrapped<R>), - (byte**) &m_data); - - auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - AugBTree(std::vector<AugBTree*> shards) - : m_data(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_group_size(0) - , m_alloc_size(0) - , m_node_cnt(0) - , m_bf(nullptr) - { - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count); - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - attemp_reccnt * sizeof(Wrapped<R>), - (byte **) &m_data); - - auto res = sorted_array_merge<R>(cursors, m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - ~AugBTree() { - free(m_data); - for (size_t i=0; i<m_alias.size(); i++) { - delete m_alias[i]; - } - - delete m_bf; - free_tree(m_root); - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - size_t get_memory_usage() { - return m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>); - } - - size_t get_aux_memory_usage() { - return (m_bf) ? m_bf->memory_usage() : 0; - } - - size_t get_lower_bound(const K& key) const { - size_t min = 0; - size_t max = m_reccnt - 1; - - const char * record_key; - while (min < max) { - size_t mid = (min + max) / 2; - - if (key > m_data[mid].rec.key) { - min = mid + 1; - } else { - max = mid; - } - } - - return min; - } - - W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) { - W total_weight = 0; - - /* Simulate a stack to unfold recursion. */ - struct AugBTreeNode<R>* st[64] = {0}; - st[0] = m_root; - size_t top = 1; - while(top > 0) { - auto now = st[--top]; - if (covered_by(now, lower_key, upper_key) || - (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) { - nodes.emplace_back(now); - weights.emplace_back(now->weight); - total_weight += now->weight; - } else { - if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left; - if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right; - } - } - - - return total_weight; - } - - Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) { - /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */ - - /* first level */ - auto node = (AugBTreeNode<R>*) internal_node; - - /* second level */ - auto fat_point = node->low + node->alias->get(rng); - - /* third level */ - size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng); - auto record = m_data + rec_offset; - - /* bounds rejection */ - if (lower_key > record->rec.key || upper_key < record->rec.key) { - return nullptr; - } - - return record; - } - -private: - - bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key; - } - - bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key; - } - - void build_wirs_structure() { - m_group_size = std::ceil(std::log(m_reccnt)); - size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size); - - // Fat point construction + low level alias.... - double sum_weight = 0.0; - std::vector<W> weights; - std::vector<double> group_norm_weight; - size_t i = 0; - size_t group_no = 0; - while (i < m_reccnt) { - double group_weight = 0.0; - group_norm_weight.clear(); - for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) { - auto w = m_data[i].rec.weight; - group_norm_weight.emplace_back(w); - group_weight += w; - sum_weight += w; - } - - for (auto& w: group_norm_weight) - if (group_weight) w /= group_weight; - else w = 1.0 / group_norm_weight.size(); - m_alias.emplace_back(new Alias(group_norm_weight)); - - - weights.emplace_back(group_weight); - } - - assert(weights.size() == n_groups); - - m_root = construct_AugBTreeNode(weights, 0, n_groups-1); - } - - struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) { - if (low == high) { - return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})}; - } else if (low > high) return nullptr; - - std::vector<double> node_weights; - W sum = 0; - for (size_t i = low; i < high; ++i) { - node_weights.emplace_back(weights[i]); - sum += weights[i]; - } - - for (auto& w: node_weights) - if (sum) w /= sum; - else w = 1.0 / node_weights.size(); - - m_node_cnt += 1; - size_t mid = (low + high) / 2; - return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid), - construct_AugBTreeNode(weights, mid + 1, high), - low, high, sum, new Alias(node_weights)}; - } - - void free_tree(struct AugBTreeNode<R>* node) { - if (node) { - delete node->alias; - free_tree(node->left); - free_tree(node->right); - delete node; - } - } - - Wrapped<R>* m_data; - std::vector<Alias *> m_alias; - AugBTreeNode<R>* m_root; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_group_size; - size_t m_alloc_size; - size_t m_node_cnt; - BloomFilter<R> *m_bf; -}; -} |