summaryrefslogtreecommitdiffstats
path: root/include/shard/FSTrie.h
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2025-09-25 14:42:44 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2025-09-25 14:42:44 -0400
commitcf5f3bbb0cb58430ed68ad3ebfcefc009e553d71 (patch)
tree4c17bc3169ee195c236cea9c9efda0aef7488e3c /include/shard/FSTrie.h
parent826c1fff5accbaa6b415acc176a5acbeb5f691b6 (diff)
downloaddynamic-extension-cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71.tar.gz
Code reformatting
Diffstat (limited to 'include/shard/FSTrie.h')
-rw-r--r--include/shard/FSTrie.h282
1 files changed, 135 insertions, 147 deletions
diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h
index 59ff116..31db40b 100644
--- a/include/shard/FSTrie.h
+++ b/include/shard/FSTrie.h
@@ -9,194 +9,182 @@
*/
#pragma once
-
#include <vector>
#include "framework/ShardRequirements.h"
#include "fst.hpp"
#include "util/SortedMerge.h"
-using psudb::CACHELINE_SIZE;
using psudb::BloomFilter;
+using psudb::byte;
+using psudb::CACHELINE_SIZE;
using psudb::PriorityQueue;
using psudb::queue_record;
-using psudb::byte;
namespace de {
-template <KVPInterface R>
-class FSTrie {
+template <KVPInterface R> class FSTrie {
public:
- typedef R RECORD;
-private:
+ typedef R RECORD;
- typedef decltype(R::key) K;
- typedef decltype(R::value) V;
- static_assert(std::is_same_v<K, const char*>, "FST requires const char* keys.");
+private:
+ typedef decltype(R::key) K;
+ typedef decltype(R::value) V;
+ static_assert(std::is_same_v<K, const char *>,
+ "FST requires const char* keys.");
public:
- FSTrie(BufferView<R> buffer)
- : m_data(nullptr)
- , m_reccnt(0)
- , m_alloc_size(0)
- {
- m_data = new Wrapped<R>[buffer.get_record_count()]();
- m_alloc_size = sizeof(Wrapped<R>) * buffer.get_record_count();
-
- size_t cnt = 0;
- std::vector<std::string> keys;
- keys.reserve(buffer.get_record_count());
-
- /*
- * Copy the contents of the buffer view into a temporary buffer, and
- * sort them. We still need to iterate over these temporary records to
- * apply tombstone/deleted record filtering, as well as any possible
- * per-record processing that is required by the shard being built.
- */
- auto temp_buffer = new Wrapped<R>[buffer.get_record_count()]();
- for (size_t i=0; i<buffer.get_record_count(); i++) {
- temp_buffer[i] = *(buffer.get(i));
- }
-
- auto base = temp_buffer;
- auto stop = base + buffer.get_record_count();
- std::sort(base, stop, std::less<Wrapped<R>>());
+ FSTrie(BufferView<R> buffer) : m_data(nullptr), m_reccnt(0), m_alloc_size(0) {
+ m_data = new Wrapped<R>[buffer.get_record_count()]();
+ m_alloc_size = sizeof(Wrapped<R>) * buffer.get_record_count();
+
+ size_t cnt = 0;
+ std::vector<std::string> keys;
+ keys.reserve(buffer.get_record_count());
+
+ /*
+ * Copy the contents of the buffer view into a temporary buffer, and
+ * sort them. We still need to iterate over these temporary records to
+ * apply tombstone/deleted record filtering, as well as any possible
+ * per-record processing that is required by the shard being built.
+ */
+ auto temp_buffer = new Wrapped<R>[buffer.get_record_count()]();
+ for (size_t i = 0; i < buffer.get_record_count(); i++) {
+ temp_buffer[i] = *(buffer.get(i));
+ }
- for (size_t i=0; i<buffer.get_record_count(); i++) {
- if (temp_buffer[i].is_deleted() || !temp_buffer[i].is_visible() || temp_buffer[i].rec.key[0] != '\0') {
- continue;
- }
+ auto base = temp_buffer;
+ auto stop = base + buffer.get_record_count();
+ std::sort(base, stop, std::less<Wrapped<R>>());
- m_data[cnt] = temp_buffer[i];
- m_data[cnt].clear_timestamp();
+ for (size_t i = 0; i < buffer.get_record_count(); i++) {
+ if (temp_buffer[i].is_deleted() || !temp_buffer[i].is_visible() ||
+ temp_buffer[i].rec.key[0] != '\0') {
+ continue;
+ }
- keys.push_back(std::string(m_data[cnt].rec.key));
- cnt++;
- }
+ m_data[cnt] = temp_buffer[i];
+ m_data[cnt].clear_timestamp();
- m_reccnt = cnt;
- if (m_reccnt > 0) {
- m_fst = new fst::Trie(keys, true, 1);
- }
+ keys.push_back(std::string(m_data[cnt].rec.key));
+ cnt++;
+ }
- delete[] temp_buffer;
+ m_reccnt = cnt;
+ if (m_reccnt > 0) {
+ m_fst = new fst::Trie(keys, true, 1);
}
- FSTrie(std::vector<const FSTrie*> const &shards)
- : m_data(nullptr)
- , m_reccnt(0)
- , m_alloc_size(0)
- {
- size_t attemp_reccnt = 0;
- size_t tombstone_count = 0;
- auto cursors = build_cursor_vec<R, FSTrie>(shards, &attemp_reccnt, &tombstone_count);
-
- m_data = new Wrapped<R>[attemp_reccnt]();
- m_alloc_size = attemp_reccnt * sizeof(Wrapped<R>);
-
- std::vector<std::string> keys;
- keys.reserve(attemp_reccnt);
-
- // FIXME: For smaller cursor arrays, it may be more efficient to skip
- // the priority queue and just do a scan.
- PriorityQueue<Wrapped<R>> pq(cursors.size());
- for (size_t i=0; i<cursors.size(); i++) {
- pq.push(cursors[i].ptr, i);
- }
+ delete[] temp_buffer;
+ }
- while (pq.size()) {
- auto now = pq.peek();
- auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
- /*
- * if the current record is not a tombstone, and the next record is
- * a tombstone that matches the current one, then the current one
- * has been deleted, and both it and its tombstone can be skipped
- * over.
- */
- if (!now.data->is_tombstone() && next.data != nullptr &&
- now.data->rec == next.data->rec && next.data->is_tombstone()) {
-
- pq.pop(); pq.pop();
- auto& cursor1 = cursors[now.version];
- auto& cursor2 = cursors[next.version];
- if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
- if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
- } else {
- auto& cursor = cursors[now.version];
- /* skip over records that have been deleted via tagging */
- if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') {
- m_data[m_reccnt] = *cursor.ptr;
- keys.push_back(std::string(m_data[m_reccnt].rec.key));
-
- m_reccnt++;
- }
- pq.pop();
-
- if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
- }
- }
+ FSTrie(std::vector<const FSTrie *> const &shards)
+ : m_data(nullptr), m_reccnt(0), m_alloc_size(0) {
+ size_t attemp_reccnt = 0;
+ size_t tombstone_count = 0;
+ auto cursors =
+ build_cursor_vec<R, FSTrie>(shards, &attemp_reccnt, &tombstone_count);
- if (m_reccnt > 0) {
- m_fst = new fst::Trie(keys, true, 1);
- }
+ m_data = new Wrapped<R>[attemp_reccnt]();
+ m_alloc_size = attemp_reccnt * sizeof(Wrapped<R>);
+
+ std::vector<std::string> keys;
+ keys.reserve(attemp_reccnt);
+
+ // FIXME: For smaller cursor arrays, it may be more efficient to skip
+ // the priority queue and just do a scan.
+ PriorityQueue<Wrapped<R>> pq(cursors.size());
+ for (size_t i = 0; i < cursors.size(); i++) {
+ pq.push(cursors[i].ptr, i);
}
- ~FSTrie() {
- delete[] m_data;
- delete m_fst;
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next =
+ pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+ /*
+ * if the current record is not a tombstone, and the next record is
+ * a tombstone that matches the current one, then the current one
+ * has been deleted, and both it and its tombstone can be skipped
+ * over.
+ */
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop();
+ pq.pop();
+ auto &cursor1 = cursors[now.version];
+ auto &cursor2 = cursors[next.version];
+ if (advance_cursor(cursor1))
+ pq.push(cursor1.ptr, now.version);
+ if (advance_cursor(cursor2))
+ pq.push(cursor2.ptr, next.version);
+ } else {
+ auto &cursor = cursors[now.version];
+ /* skip over records that have been deleted via tagging */
+ if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') {
+ m_data[m_reccnt] = *cursor.ptr;
+ keys.push_back(std::string(m_data[m_reccnt].rec.key));
+
+ m_reccnt++;
+ }
+ pq.pop();
+
+ if (advance_cursor(cursor))
+ pq.push(cursor.ptr, now.version);
+ }
}
- Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+ if (m_reccnt > 0) {
+ m_fst = new fst::Trie(keys, true, 1);
+ }
+ }
- auto idx = m_fst->exactSearch(rec.key);
+ ~FSTrie() {
+ delete[] m_data;
+ delete m_fst;
+ }
- if (idx == fst::kNotFound) {
- return nullptr;
- }
+ Wrapped<R> *point_lookup(const R &rec, bool filter = false) {
- // FIXME: for convenience, I'm treating this Trie as a unique index
- // for now, so no need to scan forward and/or check values. This
- // also makes the point lookup query class a lot easier to make.
- // Ultimately, though, we can support non-unique indexes with some
- // extra work.
+ auto idx = m_fst->exactSearch(rec.key);
- return m_data + idx;
+ if (idx == fst::kNotFound) {
+ return nullptr;
}
- Wrapped<R>* get_data() const {
- return m_data;
- }
-
- size_t get_record_count() const {
- return m_reccnt;
- }
+ // FIXME: for convenience, I'm treating this Trie as a unique index
+ // for now, so no need to scan forward and/or check values. This
+ // also makes the point lookup query class a lot easier to make.
+ // Ultimately, though, we can support non-unique indexes with some
+ // extra work.
- size_t get_tombstone_count() const {
- return 0;
- }
+ return m_data + idx;
+ }
- const Wrapped<R>* get_record_at(size_t idx) const {
- if (idx >= m_reccnt) return nullptr;
- return m_data + idx;
- }
+ Wrapped<R> *get_data() const { return m_data; }
+ size_t get_record_count() const { return m_reccnt; }
- size_t get_memory_usage() const {
- return m_fst->getMemoryUsage();
- }
+ size_t get_tombstone_count() const { return 0; }
- size_t get_aux_memory_usage() const {
- return m_alloc_size;
- }
+ const Wrapped<R> *get_record_at(size_t idx) const {
+ if (idx >= m_reccnt)
+ return nullptr;
+ return m_data + idx;
+ }
- size_t get_lower_bound(R &rec) {return 0;}
- size_t get_upper_bound(R &rec) {return 0;}
+ size_t get_memory_usage() const { return m_fst->getMemoryUsage(); }
-private:
+ size_t get_aux_memory_usage() const { return m_alloc_size; }
+
+ size_t get_lower_bound(R &rec) { return 0; }
+ size_t get_upper_bound(R &rec) { return 0; }
- Wrapped<R>* m_data;
- size_t m_reccnt;
- size_t m_alloc_size;
- fst::Trie *m_fst;
+private:
+ Wrapped<R> *m_data;
+ size_t m_reccnt;
+ size_t m_alloc_size;
+ fst::Trie *m_fst;
};
-}
+} // namespace de