diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-02-07 17:23:23 -0500 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-02-07 17:24:50 -0500 |
| commit | bd74e27b28bd95267ce50d2e4b6f12b51d9b6aae (patch) | |
| tree | 8e40038feaa9c83c4da967ab78564c51fc67a653 /include | |
| parent | 2c5d549b3618b9ea72e6eece4cb4f3da5a6811a8 (diff) | |
| download | dynamic-extension-bd74e27b28bd95267ce50d2e4b6f12b51d9b6aae.tar.gz | |
Cleaned up shard files (except VPTree)
Cleaned up shard implementations, fixed a few bugs, and set up some
tests. There's still some work to be done in creating tests for the
weighted sampling operations for the alias and aug btree shards.
Diffstat (limited to 'include')
| -rw-r--r-- | include/query/wss.h | 2 | ||||
| -rw-r--r-- | include/shard/Alias.h | 155 | ||||
| -rw-r--r-- | include/shard/AugBTree.h | 150 | ||||
| -rw-r--r-- | include/shard/ISAMTree.h | 105 | ||||
| -rw-r--r-- | include/shard/PGM.h | 140 | ||||
| -rw-r--r-- | include/shard/TrieSpline.h | 149 | ||||
| -rw-r--r-- | include/util/SortedMerge.h | 185 |
7 files changed, 357 insertions, 529 deletions
diff --git a/include/query/wss.h b/include/query/wss.h index ea36cb2..8797035 100644 --- a/include/query/wss.h +++ b/include/query/wss.h @@ -60,7 +60,7 @@ public: return res; } - static void* get_buffer_query_state(BufferState<R> *buffer, void *parms) { + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { BufferState<R> *state = new BufferState<R>(); auto parameters = (Parms<R>*) parms; if constexpr (Rejection) { diff --git a/include/shard/Alias.h b/include/shard/Alias.h index a234575..f0d1d59 100644 --- a/include/shard/Alias.h +++ b/include/shard/Alias.h @@ -14,20 +14,19 @@ #pragma once #include <vector> -#include <cassert> #include "framework/ShardRequirements.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/Alias.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; +using psudb::byte; namespace de { @@ -41,126 +40,73 @@ private: typedef decltype(R::weight) W; public: - Alias(BufferView<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - std::vector<W> weights; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - wss_cancelations++; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } + Alias(BufferView<R> buffer) + : m_data(nullptr) + , m_alias(nullptr) + , m_total_weight(0) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) { + + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - m_total_weight+= base->rec.weight; - weights.push_back(base->rec.weight); - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); + if (m_reccnt > 0) { + std::vector<W> weights; + for (size_t i=0; i<m_reccnt; i++) { + weights.emplace_back(m_data[i].rec.weight); + m_total_weight += m_data[i].rec.weight; } - - base++; - } - if (m_reccnt > 0) { build_alias_structure(weights); } } Alias(std::vector<Alias*> &shards) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(shards.size()); - - PriorityQueue<Wrapped<R>> pq(shards.size()); + : m_data(nullptr) + , m_alias(nullptr) + , m_total_weight(0) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_bf(nullptr) { size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - for (size_t i = 0; i < shards.size(); ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } + auto cursors = build_cursor_vec<R, Alias>(shards, &attemp_reccnt, &tombstone_count); m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector<W> weights; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - m_total_weight += cursor.ptr->rec.weight; - weights.push_back(cursor.ptr->rec.weight); - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + std::vector<W> weights; + for (size_t i=0; i<m_reccnt; i++) { + weights.emplace_back(m_data[i].rec.weight); + m_total_weight += m_data[i].rec.weight; + } + build_alias_structure(weights); } } ~Alias() { - if (m_data) free(m_data); - if (m_alias) delete m_alias; - if (m_bf) delete m_bf; - + free(m_data); + delete m_alias; + delete m_bf; } Wrapped<R> *point_lookup(const R &rec, bool filter=false) { @@ -173,7 +119,7 @@ public: return nullptr; } - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; + while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; if (m_data[idx].rec == rec) { return m_data + idx; @@ -205,7 +151,7 @@ public: } size_t get_aux_memory_usage() { - return 0; + return (m_bf) ? m_bf->memory_usage() : 0; } W get_total_weight() { @@ -254,7 +200,6 @@ private: W m_total_weight; size_t m_reccnt; size_t m_tombstone_cnt; - size_t m_group_size; size_t m_alloc_size; BloomFilter<R> *m_bf; }; diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h index be664ac..58bd098 100644 --- a/include/shard/AugBTree.h +++ b/include/shard/AugBTree.h @@ -16,28 +16,21 @@ #include <vector> #include <cassert> -#include <queue> -#include <memory> -#include <concepts> #include "framework/ShardRequirements.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/Alias.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; using psudb::Alias; +using psudb::byte; namespace de { -thread_local size_t wirs_cancelations = 0; - template <WeightedRecordInterface R> struct AugBTreeNode { struct AugBTreeNode<R> *left, *right; @@ -54,108 +47,52 @@ private: typedef decltype(R::weight) W; public: - AugBTree(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) { - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - wirs_cancelations++; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - m_total_weight+= base->rec.weight; - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } + AugBTree(BufferView<R> buffer) + : m_data(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_group_size(0) + , m_alloc_size(0) + , m_node_cnt(0) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + { + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { build_wirs_structure(); } } - AugBTree(AugBTree** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - + AugBTree(std::vector<AugBTree*> shards) + : m_data(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_group_size(0) + , m_alloc_size(0) + , m_node_cnt(0) + , m_bf(nullptr) + { size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } + auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count); m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - m_total_weight += cursor.ptr->rec.weight; - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { build_wirs_structure(); @@ -163,13 +100,12 @@ public: } ~AugBTree() { - if (m_data) free(m_data); + free(m_data); for (size_t i=0; i<m_alias.size(); i++) { - if (m_alias[i]) delete m_alias[i]; + delete m_alias[i]; } - if (m_bf) delete m_bf; - + delete m_bf; free_tree(m_root); } @@ -183,7 +119,7 @@ public: return nullptr; } - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; + while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; if (m_data[idx].rec == rec) { return m_data + idx; @@ -209,13 +145,12 @@ public: return m_data + idx; } - size_t get_memory_usage() { return m_alloc_size + m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>); } size_t get_aux_memory_usage() { - return 0; + return (m_bf) ? m_bf->memory_usage() : 0; } size_t get_lower_bound(const K& key) const { @@ -364,7 +299,6 @@ private: Wrapped<R>* m_data; std::vector<Alias *> m_alias; AugBTreeNode<R>* m_root; - W m_total_weight; size_t m_reccnt; size_t m_tombstone_cnt; size_t m_group_size; diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h index 7de9cb1..33ba82f 100644 --- a/include/shard/ISAMTree.h +++ b/include/shard/ISAMTree.h @@ -17,9 +17,8 @@ #include "framework/ShardRequirements.h" #include "util/bf_config.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-util/timer.h" +#include "psu-ds/BloomFilter.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; @@ -61,60 +60,18 @@ public: , m_alloc_size(0) , m_data(nullptr) { - TIMER_INIT(); - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>), (byte**) &m_data); - TIMER_START(); - auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>)); - buffer.copy_to_buffer((byte *) temp_buffer); - - auto base = temp_buffer; - auto stop = base + buffer.get_record_count(); - std::sort(base, stop, std::less<Wrapped<R>>()); - TIMER_STOP(); - - auto sort_time = TIMER_RESULT(); - - TIMER_START(); - while (base < stop) { - if (!base->is_tombstone() && (base + 1 < stop) - && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - if (m_bf && base->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(base->rec); - } - - base++; - } - - TIMER_STOP(); - auto copy_time = TIMER_RESULT(); + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - TIMER_START(); if (m_reccnt > 0) { build_internal_levels(); } - TIMER_STOP(); - auto level_time = TIMER_RESULT(); - - free(temp_buffer); } ISAMTree(std::vector<ISAMTree*> &shards) @@ -128,58 +85,18 @@ public: , m_alloc_size(0) , m_data(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(shards.size()); - - PriorityQueue<Wrapped<R>> pq(shards.size()); - size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - for (size_t i = 0; i < shards.size(); ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } + auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count); m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped<R>), (byte **) &m_data); - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - if (cursor.ptr->is_tombstone()) { - //fprintf(stderr, "ISAM: Tombstone from shard %ld next record from shard %ld\n", - //now.version, next.version); - ++m_tombstone_cnt; - m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); - } - } + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { build_internal_levels(); @@ -225,11 +142,11 @@ public: size_t get_memory_usage() { - return m_alloc_size; + return m_alloc_size + m_internal_node_cnt * NODE_SZ; } size_t get_aux_memory_usage() { - return m_bf->memory_usage(); + return (m_bf) ? m_bf->memory_usage() : 0; } /* SortedShardInterface methods */ diff --git a/include/shard/PGM.h b/include/shard/PGM.h index 13db26a..8031870 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -14,24 +14,19 @@ #include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> #include "framework/ShardRequirements.h" #include "pgm/pgm_index.hpp" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/BloomFilter.h" +#include "util/SortedMerge.h" #include "util/bf_config.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::Alias; +using psudb::byte; namespace de { @@ -41,111 +36,65 @@ private: typedef decltype(R::key) K; typedef decltype(R::value) V; - public: - PGM(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - std::vector<K> keys; - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); + PGM(BufferView<R> buffer) + : m_data(nullptr) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) { + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - std::sort(base, stop, std::less<Wrapped<R>>()); - - K min_key = base->rec.key; - K max_key = (stop - 1)->rec.key; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; + if (m_reccnt > 0) { + std::vector<K> keys; + for (size_t i=0; i<m_reccnt; i++) { + keys.emplace_back(m_data[i].rec.key); } - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - keys.emplace_back(base->rec.key); - base++; - } - - if (m_reccnt > 0) { m_pgm = pgm::PGMIndex<K, epsilon>(keys); } } - PGM(PGM** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - + PGM(std::vector<PGM*> shards) + : m_data(nullptr) + , m_bf(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) { + size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } + auto cursors = build_cursor_vec<R, PGM>(shards, &attemp_reccnt, &tombstone_count); - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector<K> keys; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - keys.emplace_back(cursor.ptr->rec.key); - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } + m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + std::vector<K> keys; + for (size_t i=0; i<m_reccnt; i++) { + keys.emplace_back(m_data[i].rec.key); + } + m_pgm = pgm::PGMIndex<K, epsilon>(keys); } } ~PGM() { - if (m_data) free(m_data); + free(m_data); + delete m_bf; } Wrapped<R> *point_lookup(const R &rec, bool filter=false) { @@ -186,7 +135,7 @@ public: } size_t get_aux_memory_usage() { - return 0; + return (m_bf) ? m_bf->memory_usage() : 0; } size_t get_lower_bound(const K& key) const { @@ -228,6 +177,7 @@ public: private: Wrapped<R>* m_data; + BloomFilter<R> *m_bf; size_t m_reccnt; size_t m_tombstone_cnt; size_t m_alloc_size; diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 9473177..f9fb3cb 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -15,11 +15,9 @@ #include "framework/ShardRequirements.h" #include "ts/builder.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" -#include "psu-util/timer.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; @@ -45,78 +43,26 @@ public: , m_min_key(0) , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) { - TIMER_INIT(); - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>), (byte**) &m_data); - TIMER_START(); - auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>)); - buffer.copy_to_buffer((byte *) temp_buffer); - - auto base = temp_buffer; - auto stop = base + buffer.get_record_count(); - std::sort(base, stop, std::less<Wrapped<R>>()); - - K min_key = base->rec.key; - K max_key = (stop-1)->rec.key; - TIMER_STOP(); - - auto sort_time = TIMER_RESULT(); - - TIMER_START(); - auto bldr = ts::Builder<K>(min_key, max_key, E); - while (base < stop) { - if (!base->is_tombstone() && (base + 1 < stop) - && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } else if (base->is_deleted()) { - base += 1; - continue; - } + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - bldr.AddKey(base->rec.key); - if (m_bf && base->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(base->rec); - } + if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; - /* - * determine the "true" min/max keys based on the scan. This is - * to avoid situations where the min/max in the input array - * are deleted and don't survive into the structure itself. - */ - if (m_reccnt == 0) { - m_max_key = m_min_key = base->rec.key; - } else if (base->rec.key > m_max_key) { - m_max_key = base->rec.key; - } else if (base->rec.key < m_min_key) { - m_min_key = base->rec.key; + auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); + for (size_t i=0; i<m_reccnt; i++) { + bldr.AddKey(m_data[i].rec.key); } - base++; - } - - TIMER_STOP(); - auto copy_time = TIMER_RESULT(); - - TIMER_START(); - if (m_reccnt > 0) { m_ts = bldr.Finalize(); } - TIMER_STOP(); - auto level_time = TIMER_RESULT(); - - free(temp_buffer); } TrieSpline(std::vector<TrieSpline*> &shards) @@ -128,77 +74,28 @@ public: , m_min_key(0) , m_bf(nullptr) { - - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(shards.size()); - - PriorityQueue<Wrapped<R>> pq(shards.size()); - size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - /* - * Initialize m_max_key and m_min_key using the values from the - * first shard. These will later be updated when building - * the initial priority queue to their true values. - */ - m_max_key = shards[0]->m_max_key; - m_min_key = shards[0]->m_min_key; + auto cursors = build_cursor_vec<R, TrieSpline>(shards, &attemp_reccnt, &tombstone_count); - for (size_t i = 0; i < shards.size(); ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - if (shards[i]->m_max_key > m_max_key) { - m_max_key = shards[i]->m_max_key; - } - - if (shards[i]->m_min_key < m_min_key) { - m_min_key = shards[i]->m_min_key; - } - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped<R>), (byte **) &m_data); - auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - bldr.AddKey(cursor.ptr->rec.key); - if (cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); - } - } + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; + + auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); + for (size_t i=0; i<m_reccnt; i++) { + bldr.AddKey(m_data[i].rec.key); + } + m_ts = bldr.Finalize(); } } @@ -250,7 +147,7 @@ public: } size_t get_aux_memory_usage() { - return 0; + return (m_bf) ? m_bf->memory_usage() : 0; } size_t get_lower_bound(const K& key) const { diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h new file mode 100644 index 0000000..ed47acb --- /dev/null +++ b/include/util/SortedMerge.h @@ -0,0 +1,185 @@ +/* + * include/util/SortedMerge.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A sorted array merge routine for use in Shard construction, as many + * shards will use a sorted array to represent their data. Also encapsulates + * the necessary tombstone-cancellation logic. + * + * FIXME: include generic per-record processing functionality for Shards that + * need it, to avoid needing to reprocess the array in the shard after + * creation. + */ +#pragma once + +#include "util/Cursor.h" +#include "framework/interface/Shard.h" +#include "psu-ds/PriorityQueue.h" + +namespace de { + +using psudb::PriorityQueue; +using psudb::BloomFilter; +using psudb::queue_record; +using psudb::byte; +using psudb::CACHELINE_SIZE; + +struct merge_info { + size_t record_count; + size_t tombstone_count; +}; + + +template <RecordInterface R, ShardInterface<R> S> +static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(shards.size()); + + *reccnt = 0; + *tscnt = 0; + + for (size_t i = 0; i < shards.size(); ++i) { + if (shards[i]) { + auto base = shards[i]->get_data(); + cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); + *reccnt += shards[i]->get_record_count(); + *tscnt += shards[i]->get_tombstone_count(); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } + } + + return cursors; +} + +/* + * + */ +template <RecordInterface R> +static merge_info sorted_array_from_bufferview(BufferView<R> bv, + Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf=nullptr) { + /* + * Copy the contents of the buffer view into a temporary buffer, and + * sort them. We still need to iterate over these temporary records to + * apply tombstone/deleted record filtering, as well as any possible + * per-record processing that is required by the shard being built. + */ + auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, + bv.get_record_count(), + sizeof(Wrapped<R>)); + bv.copy_to_buffer((byte *) temp_buffer); + + auto base = temp_buffer; + auto stop = base + bv.get_record_count(); + std::sort(base, stop, std::less<Wrapped<R>>()); + + merge_info info = {0, 0}; + + /* + * Iterate over the temporary buffer to process the records, copying + * them into buffer as needed + */ + while (base < stop) { + if (!base->is_tombstone() && (base + 1 < stop) + && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { + base += 2; + continue; + } else if (base->is_deleted()) { + base += 1; + continue; + } + + // fixme: this shouldn't be necessary, but the tagged record + // bypass doesn't seem to be working on this code-path, so this + // ensures that tagged records from the buffer are able to be + // dropped, eventually. it should only need to be &= 1 + base->header &= 3; + buffer[info.record_count++] = *base; + + if (base->is_tombstone()) { + info.tombstone_count++; + if (bf){ + bf->insert(base->rec); + } + } + + base++; + } + + free(temp_buffer); + return info; +} + +/* + * Perform a sorted merge of the records within cursors into the provided + * buffer. Includes tombstone and tagged delete cancellation logic, and + * will insert tombstones into a bloom filter, if one is provided. + * + * The behavior of this function is undefined if the provided buffer does + * not have space to contain all of the records within the input cursors. + */ +template <RecordInterface R> +static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors, + Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf=nullptr) { + + // FIXME: For smaller cursor arrays, it may be more efficient to skip + // the priority queue and just do a scan. + PriorityQueue<Wrapped<R>> pq(cursors.size()); + for (size_t i=0; i<cursors.size(); i++) { + pq.push(cursors[i].ptr, i); + } + + merge_info info = {0, 0}; + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + /* + * if the current record is not a tombstone, and the next record is + * a tombstone that matches the current one, then the current one + * has been deleted, and both it and its tombstone can be skipped + * over. + */ + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[now.version]; + auto& cursor2 = cursors[next.version]; + if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[now.version]; + /* skip over records that have been deleted via tagging */ + if (!cursor.ptr->is_deleted()) { + buffer[info.record_count++] = *cursor.ptr; + + /* + * if the record is a tombstone, increment the ts count and + * insert it into the bloom filter if one has been + * provided. + */ + if (cursor.ptr->is_tombstone()) { + info.tombstone_count++; + if (bf) { + bf->insert(cursor.ptr->rec); + } + } + } + pq.pop(); + + if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); + } + } + + return info; +} + + + +} |