summaryrefslogtreecommitdiffstats
path: root/include/shard
diff options
context:
space:
mode:
Diffstat (limited to 'include/shard')
-rw-r--r--include/shard/Alias.h10
-rw-r--r--include/shard/AugBTree.h311
-rw-r--r--include/shard/FSTrie.h4
-rw-r--r--include/shard/ISAMTree.h402
-rw-r--r--include/shard/PGM.h4
-rw-r--r--include/shard/TrieSpline.h4
-rw-r--r--include/shard/VPTree.h11
7 files changed, 213 insertions, 533 deletions
diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index 72147d7..8fe70a5 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -25,21 +25,20 @@
using psudb::CACHELINE_SIZE;
using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
using psudb::byte;
namespace de {
-static thread_local size_t wss_cancelations = 0;
-
template <WeightedRecordInterface R>
class Alias {
+public:
+ typedef R RECORD;
private:
typedef decltype(R::key) K;
typedef decltype(R::value) V;
typedef decltype(R::weight) W;
+
public:
Alias(BufferView<R> buffer)
: m_data(nullptr)
@@ -71,7 +70,7 @@ public:
}
}
- Alias(std::vector<Alias*> &shards)
+ Alias(std::vector<Alias*> const &shards)
: m_data(nullptr)
, m_alias(nullptr)
, m_total_weight(0)
@@ -167,7 +166,6 @@ public:
size_t min = 0;
size_t max = m_reccnt - 1;
- const char * record_key;
while (min < max) {
size_t mid = (min + max) / 2;
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
deleted file mode 100644
index c60cbcd..0000000
--- a/include/shard/AugBTree.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * include/shard/AugBTree.h
- *
- * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
- * Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *
- * Distributed under the Modified BSD License.
- *
- * A shard shim around the alias augmented B-tree. Designed to be
- * used along side the WIRS query in include/query/wirs.h, but
- * also supports the necessary methods for other common query
- * types.
- *
- * TODO: The code in this file is very poorly commented.
- */
-#pragma once
-
-
-#include <vector>
-#include <cassert>
-
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/Alias.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
-#include "util/SortedMerge.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::Alias;
-using psudb::byte;
-
-namespace de {
-
-template <WeightedRecordInterface R>
-struct AugBTreeNode {
- struct AugBTreeNode<R> *left, *right;
- decltype(R::key) low, high;
- decltype(R::weight) weight;
- Alias* alias;
-};
-
-template <WeightedRecordInterface R>
-class AugBTree {
-private:
- typedef decltype(R::key) K;
- typedef decltype(R::value) V;
- typedef decltype(R::weight) W;
-
-public:
- AugBTree(BufferView<R> buffer)
- : m_data(nullptr)
- , m_root(nullptr)
- , m_reccnt(0)
- , m_tombstone_cnt(0)
- , m_group_size(0)
- , m_alloc_size(0)
- , m_node_cnt(0)
- , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
- {
- m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE,
- buffer.get_record_count() *
- sizeof(Wrapped<R>),
- (byte**) &m_data);
-
- auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
- m_reccnt = res.record_count;
- m_tombstone_cnt = res.tombstone_count;
-
- if (m_reccnt > 0) {
- build_wirs_structure();
- }
- }
-
- AugBTree(std::vector<AugBTree*> shards)
- : m_data(nullptr)
- , m_root(nullptr)
- , m_reccnt(0)
- , m_tombstone_cnt(0)
- , m_group_size(0)
- , m_alloc_size(0)
- , m_node_cnt(0)
- , m_bf(nullptr)
- {
- size_t attemp_reccnt = 0;
- size_t tombstone_count = 0;
- auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count);
-
- m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
- m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE,
- attemp_reccnt * sizeof(Wrapped<R>),
- (byte **) &m_data);
-
- auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
- m_reccnt = res.record_count;
- m_tombstone_cnt = res.tombstone_count;
-
- if (m_reccnt > 0) {
- build_wirs_structure();
- }
- }
-
- ~AugBTree() {
- free(m_data);
- for (size_t i=0; i<m_alias.size(); i++) {
- delete m_alias[i];
- }
-
- delete m_bf;
- free_tree(m_root);
- }
-
- Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
- if (filter && !m_bf->lookup(rec)) {
- return nullptr;
- }
-
- size_t idx = get_lower_bound(rec.key);
- if (idx >= m_reccnt) {
- return nullptr;
- }
-
- while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx;
-
- if (m_data[idx].rec == rec) {
- return m_data + idx;
- }
-
- return nullptr;
- }
-
- Wrapped<R>* get_data() const {
- return m_data;
- }
-
- size_t get_record_count() const {
- return m_reccnt;
- }
-
- size_t get_tombstone_count() const {
- return m_tombstone_cnt;
- }
-
- const Wrapped<R>* get_record_at(size_t idx) const {
- if (idx >= m_reccnt) return nullptr;
- return m_data + idx;
- }
-
- size_t get_memory_usage() {
- return m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>);
- }
-
- size_t get_aux_memory_usage() {
- return (m_bf) ? m_bf->memory_usage() : 0;
- }
-
- size_t get_lower_bound(const K& key) const {
- size_t min = 0;
- size_t max = m_reccnt - 1;
-
- const char * record_key;
- while (min < max) {
- size_t mid = (min + max) / 2;
-
- if (key > m_data[mid].rec.key) {
- min = mid + 1;
- } else {
- max = mid;
- }
- }
-
- return min;
- }
-
- W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) {
- W total_weight = 0;
-
- /* Simulate a stack to unfold recursion. */
- struct AugBTreeNode<R>* st[64] = {0};
- st[0] = m_root;
- size_t top = 1;
- while(top > 0) {
- auto now = st[--top];
- if (covered_by(now, lower_key, upper_key) ||
- (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) {
- nodes.emplace_back(now);
- weights.emplace_back(now->weight);
- total_weight += now->weight;
- } else {
- if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
- if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
- }
- }
-
-
- return total_weight;
- }
-
- Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) {
- /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */
-
- /* first level */
- auto node = (AugBTreeNode<R>*) internal_node;
-
- /* second level */
- auto fat_point = node->low + node->alias->get(rng);
-
- /* third level */
- size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng);
- auto record = m_data + rec_offset;
-
- /* bounds rejection */
- if (lower_key > record->rec.key || upper_key < record->rec.key) {
- return nullptr;
- }
-
- return record;
- }
-
-private:
-
- bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
- auto low_index = node->low * m_group_size;
- auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
- return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key;
- }
-
- bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
- auto low_index = node->low * m_group_size;
- auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
- return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key;
- }
-
- void build_wirs_structure() {
- m_group_size = std::ceil(std::log(m_reccnt));
- size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size);
-
- // Fat point construction + low level alias....
- double sum_weight = 0.0;
- std::vector<W> weights;
- std::vector<double> group_norm_weight;
- size_t i = 0;
- size_t group_no = 0;
- while (i < m_reccnt) {
- double group_weight = 0.0;
- group_norm_weight.clear();
- for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) {
- auto w = m_data[i].rec.weight;
- group_norm_weight.emplace_back(w);
- group_weight += w;
- sum_weight += w;
- }
-
- for (auto& w: group_norm_weight)
- if (group_weight) w /= group_weight;
- else w = 1.0 / group_norm_weight.size();
- m_alias.emplace_back(new Alias(group_norm_weight));
-
-
- weights.emplace_back(group_weight);
- }
-
- assert(weights.size() == n_groups);
-
- m_root = construct_AugBTreeNode(weights, 0, n_groups-1);
- }
-
- struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) {
- if (low == high) {
- return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
- } else if (low > high) return nullptr;
-
- std::vector<double> node_weights;
- W sum = 0;
- for (size_t i = low; i < high; ++i) {
- node_weights.emplace_back(weights[i]);
- sum += weights[i];
- }
-
- for (auto& w: node_weights)
- if (sum) w /= sum;
- else w = 1.0 / node_weights.size();
-
- m_node_cnt += 1;
- size_t mid = (low + high) / 2;
- return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid),
- construct_AugBTreeNode(weights, mid + 1, high),
- low, high, sum, new Alias(node_weights)};
- }
-
- void free_tree(struct AugBTreeNode<R>* node) {
- if (node) {
- delete node->alias;
- free_tree(node->left);
- free_tree(node->right);
- delete node;
- }
- }
-
- Wrapped<R>* m_data;
- std::vector<Alias *> m_alias;
- AugBTreeNode<R>* m_root;
- size_t m_reccnt;
- size_t m_tombstone_cnt;
- size_t m_group_size;
- size_t m_alloc_size;
- size_t m_node_cnt;
- BloomFilter<R> *m_bf;
-};
-}
diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h
index 3783b38..4e51037 100644
--- a/include/shard/FSTrie.h
+++ b/include/shard/FSTrie.h
@@ -26,6 +26,8 @@ namespace de {
template <KVPInterface R>
class FSTrie {
+public:
+ typedef R RECORD;
private:
typedef decltype(R::key) K;
@@ -80,7 +82,7 @@ public:
delete[] temp_buffer;
}
- FSTrie(std::vector<FSTrie*> &shards)
+ FSTrie(std::vector<FSTrie*> const &shards)
: m_data(nullptr)
, m_reccnt(0)
, m_alloc_size(0)
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 1cca506..64c0b2b 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -1,8 +1,8 @@
/*
* include/shard/ISAMTree.h
*
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- * Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
*
* Distributed under the Modified BSD License.
*
@@ -12,258 +12,246 @@
*/
#pragma once
-#include <vector>
#include <cassert>
+#include <vector>
#include "framework/ShardRequirements.h"
-#include "util/bf_config.h"
#include "psu-ds/BloomFilter.h"
#include "util/SortedMerge.h"
+#include "util/bf_config.h"
-using psudb::CACHELINE_SIZE;
using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
using psudb::byte;
+using psudb::CACHELINE_SIZE;
namespace de {
-template <KVPInterface R>
-class ISAMTree {
+template <KVPInterface R> class ISAMTree {
private:
+ typedef decltype(R::key) K;
+ typedef decltype(R::value) V;
-typedef decltype(R::key) K;
-typedef decltype(R::value) V;
-
-constexpr static size_t NODE_SZ = 256;
-constexpr static size_t INTERNAL_FANOUT = NODE_SZ / (sizeof(K) + sizeof(byte*));
+ constexpr static size_t NODE_SZ = 256;
+ constexpr static size_t INTERNAL_FANOUT =
+ NODE_SZ / (sizeof(K) + sizeof(byte *));
-struct InternalNode {
+ struct InternalNode {
K keys[INTERNAL_FANOUT];
- byte* child[INTERNAL_FANOUT];
-};
-
-static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match");
+ byte *child[INTERNAL_FANOUT];
+ };
-constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R);
+ static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match");
+ constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R);
public:
- ISAMTree(BufferView<R> buffer)
- : m_bf(nullptr)
- , m_isam_nodes(nullptr)
- , m_root(nullptr)
- , m_reccnt(0)
- , m_tombstone_cnt(0)
- , m_internal_node_cnt(0)
- , m_deleted_cnt(0)
- , m_alloc_size(0)
- {
- m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE,
- buffer.get_record_count() *
- sizeof(Wrapped<R>),
- (byte**) &m_data);
-
- auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
- m_reccnt = res.record_count;
- m_tombstone_cnt = res.tombstone_count;
-
- if (m_reccnt > 0) {
- build_internal_levels();
- }
+ typedef R RECORD;
+
+ ISAMTree(BufferView<R> buffer)
+ : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0),
+ m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0),
+ m_alloc_size(0) {
+ m_alloc_size = psudb::sf_aligned_alloc(
+ CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>),
+ (byte **)&m_data);
+
+ auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
+ m_reccnt = res.record_count;
+ m_tombstone_cnt = res.tombstone_count;
+
+ if (m_reccnt > 0) {
+ build_internal_levels();
+ }
+ }
+
+ ISAMTree(std::vector<ISAMTree *> const &shards)
+ : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0),
+ m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0),
+ m_alloc_size(0) {
+ size_t attemp_reccnt = 0;
+ size_t tombstone_count = 0;
+ auto cursors =
+ build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count);
+
+ m_bf = nullptr;
+ m_alloc_size = psudb::sf_aligned_alloc(
+ CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped<R>), (byte **)&m_data);
+
+ auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+ m_reccnt = res.record_count;
+ m_tombstone_cnt = res.tombstone_count;
+
+ if (m_reccnt > 0) {
+ build_internal_levels();
}
+ }
- ISAMTree(std::vector<ISAMTree*> &shards)
- : m_bf(nullptr)
- , m_isam_nodes(nullptr)
- , m_root(nullptr)
- , m_reccnt(0)
- , m_tombstone_cnt(0)
- , m_internal_node_cnt(0)
- , m_deleted_cnt(0)
- , m_alloc_size(0)
- {
- size_t attemp_reccnt = 0;
- size_t tombstone_count = 0;
- auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count);
-
- m_bf = nullptr;
- m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE,
- attemp_reccnt * sizeof(Wrapped<R>),
- (byte **) &m_data);
-
- auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
- m_reccnt = res.record_count;
- m_tombstone_cnt = res.tombstone_count;
-
- if (m_reccnt > 0) {
- build_internal_levels();
- }
+ ~ISAMTree() {
+ free(m_data);
+ free(m_isam_nodes);
+ delete m_bf;
+ }
+
+ Wrapped<R> *point_lookup(const R &rec, bool filter = false) {
+ if (filter && !m_bf->lookup(rec)) {
+ return nullptr;
}
- ~ISAMTree() {
- free(m_data);
- free(m_isam_nodes);
- delete m_bf;
+ size_t idx = get_lower_bound(rec.key);
+ if (idx >= m_reccnt) {
+ return nullptr;
}
- Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
- if (filter && !m_bf->lookup(rec)) {
- return nullptr;
- }
+ while (idx < m_reccnt && m_data[idx].rec < rec)
+ ++idx;
- size_t idx = get_lower_bound(rec.key);
- if (idx >= m_reccnt) {
- return nullptr;
- }
+ if (m_data[idx].rec == rec) {
+ return m_data + idx;
+ }
- while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+ return nullptr;
+ }
- if (m_data[idx].rec == rec) {
- return m_data + idx;
- }
+ Wrapped<R> *get_data() const { return m_data; }
- return nullptr;
- }
+ size_t get_record_count() const { return m_reccnt; }
- Wrapped<R>* get_data() const {
- return m_data;
- }
-
- size_t get_record_count() const {
- return m_reccnt;
- }
+ size_t get_tombstone_count() const { return m_tombstone_cnt; }
- size_t get_tombstone_count() const {
- return m_tombstone_cnt;
- }
+ size_t get_memory_usage() const { return m_internal_node_cnt * NODE_SZ; }
+ size_t get_aux_memory_usage() const { return (m_bf) ? m_bf->memory_usage() : 0; }
- size_t get_memory_usage() {
- return m_internal_node_cnt * NODE_SZ;
- }
+ /* SortedShardInterface methods */
+ size_t get_lower_bound(const K &key) const {
+ const InternalNode *now = m_root;
+ while (!is_leaf(reinterpret_cast<const byte *>(now))) {
+ const InternalNode *next = nullptr;
+ for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
+ if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
+ next = reinterpret_cast<InternalNode *>(now->child[i]);
+ break;
+ }
+ }
- size_t get_aux_memory_usage() {
- return (m_bf) ? m_bf->memory_usage() : 0;
+ now = next ? next
+ : reinterpret_cast<const InternalNode *>(
+ now->child[INTERNAL_FANOUT - 1]);
}
- /* SortedShardInterface methods */
- size_t get_lower_bound(const K& key) const {
- const InternalNode* now = m_root;
- while (!is_leaf(reinterpret_cast<const byte*>(now))) {
- const InternalNode* next = nullptr;
- for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
- if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
- next = reinterpret_cast<InternalNode*>(now->child[i]);
- break;
- }
- }
-
- now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
+ const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now);
+ while (pos < m_data + m_reccnt && pos->rec.key < key)
+ pos++;
+
+ return pos - m_data;
+ }
+
+ size_t get_upper_bound(const K &key) const {
+ const InternalNode *now = m_root;
+ while (!is_leaf(reinterpret_cast<const byte *>(now))) {
+ const InternalNode *next = nullptr;
+ for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
+ if (now->child[i + 1] == nullptr || key < now->keys[i]) {
+ next = reinterpret_cast<InternalNode *>(now->child[i]);
+ break;
}
+ }
- const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
- while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
-
- return pos - m_data;
+ now = next ? next
+ : reinterpret_cast<const InternalNode *>(
+ now->child[INTERNAL_FANOUT - 1]);
}
- size_t get_upper_bound(const K& key) const {
- const InternalNode* now = m_root;
- while (!is_leaf(reinterpret_cast<const byte*>(now))) {
- const InternalNode* next = nullptr;
- for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
- if (now->child[i + 1] == nullptr || key < now->keys[i]) {
- next = reinterpret_cast<InternalNode*>(now->child[i]);
- break;
- }
- }
-
- now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
- }
-
- const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
- while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
+ const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now);
+ while (pos < m_data + m_reccnt && pos->rec.key <= key)
+ pos++;
- return pos - m_data;
- }
+ return pos - m_data;
+ }
- const Wrapped<R>* get_record_at(size_t idx) const {
- return (idx < m_reccnt) ? m_data + idx : nullptr;
- }
+ const Wrapped<R> *get_record_at(size_t idx) const {
+ return (idx < m_reccnt) ? m_data + idx : nullptr;
+ }
private:
- void build_internal_levels() {
- size_t n_leaf_nodes = m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0);
-
- size_t level_node_cnt = n_leaf_nodes;
- size_t node_cnt = 0;
- do {
- level_node_cnt = level_node_cnt / INTERNAL_FANOUT + (level_node_cnt % INTERNAL_FANOUT != 0);
- node_cnt += level_node_cnt;
- } while (level_node_cnt > 1);
-
- m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, (byte**) &m_isam_nodes);
- m_internal_node_cnt = node_cnt;
-
- InternalNode* current_node = m_isam_nodes;
-
- const Wrapped<R>* leaf_base = m_data;
- const Wrapped<R>* leaf_stop = m_data + m_reccnt;
- while (leaf_base < leaf_stop) {
- size_t fanout = 0;
- for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
- auto rec_ptr = leaf_base + LEAF_FANOUT * i;
- if (rec_ptr >= leaf_stop) break;
- const Wrapped<R>* sep_key = std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1);
- current_node->keys[i] = sep_key->rec.key;
- current_node->child[i] = (byte*)rec_ptr;
- ++fanout;
- }
- current_node++;
- leaf_base += fanout * LEAF_FANOUT;
- }
-
- auto level_start = m_isam_nodes;
- auto level_stop = current_node;
- auto current_level_node_cnt = level_stop - level_start;
- while (current_level_node_cnt > 1) {
- auto now = level_start;
- while (now < level_stop) {
- size_t child_cnt = 0;
- for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
- auto node_ptr = now + i;
- ++child_cnt;
- if (node_ptr >= level_stop) break;
- current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1];
- current_node->child[i] = (byte*)node_ptr;
- }
- now += child_cnt;
- current_node++;
- }
- level_start = level_stop;
- level_stop = current_node;
- current_level_node_cnt = level_stop - level_start;
- }
-
- assert(current_level_node_cnt == 1);
- m_root = level_start;
+ void build_internal_levels() {
+ size_t n_leaf_nodes =
+ m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0);
+
+ size_t level_node_cnt = n_leaf_nodes;
+ size_t node_cnt = 0;
+ do {
+ level_node_cnt = level_node_cnt / INTERNAL_FANOUT +
+ (level_node_cnt % INTERNAL_FANOUT != 0);
+ node_cnt += level_node_cnt;
+ } while (level_node_cnt > 1);
+
+ m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ,
+ (byte **)&m_isam_nodes);
+ m_internal_node_cnt = node_cnt;
+
+ InternalNode *current_node = m_isam_nodes;
+
+ const Wrapped<R> *leaf_base = m_data;
+ const Wrapped<R> *leaf_stop = m_data + m_reccnt;
+ while (leaf_base < leaf_stop) {
+ size_t fanout = 0;
+ for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
+ auto rec_ptr = leaf_base + LEAF_FANOUT * i;
+ if (rec_ptr >= leaf_stop)
+ break;
+ const Wrapped<R> *sep_key =
+ std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1);
+ current_node->keys[i] = sep_key->rec.key;
+ current_node->child[i] = (byte *)rec_ptr;
+ ++fanout;
+ }
+ current_node++;
+ leaf_base += fanout * LEAF_FANOUT;
}
- bool is_leaf(const byte* ptr) const {
- return ptr >= (const byte*)m_data && ptr < (const byte*)(m_data + m_reccnt);
+ auto level_start = m_isam_nodes;
+ auto level_stop = current_node;
+ auto current_level_node_cnt = level_stop - level_start;
+ while (current_level_node_cnt > 1) {
+ auto now = level_start;
+ while (now < level_stop) {
+ size_t child_cnt = 0;
+ for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
+ auto node_ptr = now + i;
+ ++child_cnt;
+ if (node_ptr >= level_stop)
+ break;
+ current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1];
+ current_node->child[i] = (byte *)node_ptr;
+ }
+ now += child_cnt;
+ current_node++;
+ }
+ level_start = level_stop;
+ level_stop = current_node;
+ current_level_node_cnt = level_stop - level_start;
}
- psudb::BloomFilter<R> *m_bf;
- InternalNode* m_isam_nodes;
- InternalNode* m_root;
- size_t m_reccnt;
- size_t m_tombstone_cnt;
- size_t m_internal_node_cnt;
- size_t m_deleted_cnt;
- size_t m_alloc_size;
-
- Wrapped<R>* m_data;
+ assert(current_level_node_cnt == 1);
+ m_root = level_start;
+ }
+
+ bool is_leaf(const byte *ptr) const {
+ return ptr >= (const byte *)m_data &&
+ ptr < (const byte *)(m_data + m_reccnt);
+ }
+
+ psudb::BloomFilter<R> *m_bf;
+ InternalNode *m_isam_nodes;
+ InternalNode *m_root;
+ size_t m_reccnt;
+ size_t m_tombstone_cnt;
+ size_t m_internal_node_cnt;
+ size_t m_deleted_cnt;
+ size_t m_alloc_size;
+
+ Wrapped<R> *m_data;
};
-}
+} // namespace de
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 509796b..7d1f492 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -33,6 +33,8 @@ namespace de {
template <RecordInterface R, size_t epsilon=128>
class PGM {
+public:
+ typedef R RECORD;
private:
typedef decltype(R::key) K;
typedef decltype(R::value) V;
@@ -109,7 +111,7 @@ public:
}
}
- PGM(std::vector<PGM*> shards)
+ PGM(std::vector<PGM*> const &shards)
: m_data(nullptr)
, m_bf(nullptr)
, m_reccnt(0)
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 581277e..9d8c3bb 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -30,6 +30,8 @@ namespace de {
template <KVPInterface R, size_t E=1024>
class TrieSpline {
+public:
+ typedef R RECORD;
private:
typedef decltype(R::key) K;
typedef decltype(R::value) V;
@@ -122,7 +124,7 @@ public:
}
}
- TrieSpline(std::vector<TrieSpline*> &shards)
+ TrieSpline(std::vector<TrieSpline*> const &shards)
: m_reccnt(0)
, m_tombstone_cnt(0)
, m_alloc_size(0)
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index d5a2393..477db5c 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -21,13 +21,15 @@
using psudb::CACHELINE_SIZE;
using psudb::PriorityQueue;
-using psudb::queue_record;
using psudb::byte;
namespace de {
template <NDRecordInterface R, size_t LEAFSZ=100, bool HMAP=false>
class VPTree {
+public:
+ typedef R RECORD;
+
private:
struct vpnode {
size_t start;
@@ -50,7 +52,7 @@ private:
public:
VPTree(BufferView<R> buffer)
- : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
+ : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) {
m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE,
@@ -59,8 +61,6 @@ public:
(byte**) &m_data);
m_ptrs = new vp_ptr[buffer.get_record_count()];
-
- size_t offset = 0;
m_reccnt = 0;
// FIXME: will eventually need to figure out tombstones
@@ -87,7 +87,7 @@ public:
}
VPTree(std::vector<VPTree*> shards)
- : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
+ : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) {
size_t attemp_reccnt = 0;
for (size_t i=0; i<shards.size(); i++) {
@@ -363,7 +363,6 @@ private:
if (d < *farthest) {
if (pq.size() == k) {
- auto t = pq.peek().data->rec;
pq.pop();
}
pq.push(m_ptrs[node->start].ptr);