From cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 25 Sep 2025 14:42:44 -0400 Subject: Code reformatting --- include/shard/FSTrie.h | 282 +++++++++++++++++++++++-------------------------- 1 file changed, 135 insertions(+), 147 deletions(-) (limited to 'include/shard/FSTrie.h') diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h index 59ff116..31db40b 100644 --- a/include/shard/FSTrie.h +++ b/include/shard/FSTrie.h @@ -9,194 +9,182 @@ */ #pragma once - #include #include "framework/ShardRequirements.h" #include "fst.hpp" #include "util/SortedMerge.h" -using psudb::CACHELINE_SIZE; using psudb::BloomFilter; +using psudb::byte; +using psudb::CACHELINE_SIZE; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::byte; namespace de { -template -class FSTrie { +template class FSTrie { public: - typedef R RECORD; -private: + typedef R RECORD; - typedef decltype(R::key) K; - typedef decltype(R::value) V; - static_assert(std::is_same_v, "FST requires const char* keys."); +private: + typedef decltype(R::key) K; + typedef decltype(R::value) V; + static_assert(std::is_same_v, + "FST requires const char* keys."); public: - FSTrie(BufferView buffer) - : m_data(nullptr) - , m_reccnt(0) - , m_alloc_size(0) - { - m_data = new Wrapped[buffer.get_record_count()](); - m_alloc_size = sizeof(Wrapped) * buffer.get_record_count(); - - size_t cnt = 0; - std::vector keys; - keys.reserve(buffer.get_record_count()); - - /* - * Copy the contents of the buffer view into a temporary buffer, and - * sort them. We still need to iterate over these temporary records to - * apply tombstone/deleted record filtering, as well as any possible - * per-record processing that is required by the shard being built. - */ - auto temp_buffer = new Wrapped[buffer.get_record_count()](); - for (size_t i=0; i>()); + FSTrie(BufferView buffer) : m_data(nullptr), m_reccnt(0), m_alloc_size(0) { + m_data = new Wrapped[buffer.get_record_count()](); + m_alloc_size = sizeof(Wrapped) * buffer.get_record_count(); + + size_t cnt = 0; + std::vector keys; + keys.reserve(buffer.get_record_count()); + + /* + * Copy the contents of the buffer view into a temporary buffer, and + * sort them. We still need to iterate over these temporary records to + * apply tombstone/deleted record filtering, as well as any possible + * per-record processing that is required by the shard being built. + */ + auto temp_buffer = new Wrapped[buffer.get_record_count()](); + for (size_t i = 0; i < buffer.get_record_count(); i++) { + temp_buffer[i] = *(buffer.get(i)); + } - for (size_t i=0; i>()); - m_data[cnt] = temp_buffer[i]; - m_data[cnt].clear_timestamp(); + for (size_t i = 0; i < buffer.get_record_count(); i++) { + if (temp_buffer[i].is_deleted() || !temp_buffer[i].is_visible() || + temp_buffer[i].rec.key[0] != '\0') { + continue; + } - keys.push_back(std::string(m_data[cnt].rec.key)); - cnt++; - } + m_data[cnt] = temp_buffer[i]; + m_data[cnt].clear_timestamp(); - m_reccnt = cnt; - if (m_reccnt > 0) { - m_fst = new fst::Trie(keys, true, 1); - } + keys.push_back(std::string(m_data[cnt].rec.key)); + cnt++; + } - delete[] temp_buffer; + m_reccnt = cnt; + if (m_reccnt > 0) { + m_fst = new fst::Trie(keys, true, 1); } - FSTrie(std::vector const &shards) - : m_data(nullptr) - , m_reccnt(0) - , m_alloc_size(0) - { - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - auto cursors = build_cursor_vec(shards, &attemp_reccnt, &tombstone_count); - - m_data = new Wrapped[attemp_reccnt](); - m_alloc_size = attemp_reccnt * sizeof(Wrapped); - - std::vector keys; - keys.reserve(attemp_reccnt); - - // FIXME: For smaller cursor arrays, it may be more efficient to skip - // the priority queue and just do a scan. - PriorityQueue> pq(cursors.size()); - for (size_t i=0; i 1 ? pq.peek(1) : queue_record>{nullptr, 0}; - /* - * if the current record is not a tombstone, and the next record is - * a tombstone that matches the current one, then the current one - * has been deleted, and both it and its tombstone can be skipped - * over. - */ - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - /* skip over records that have been deleted via tagging */ - if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') { - m_data[m_reccnt] = *cursor.ptr; - keys.push_back(std::string(m_data[m_reccnt].rec.key)); - - m_reccnt++; - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); - } - } + FSTrie(std::vector const &shards) + : m_data(nullptr), m_reccnt(0), m_alloc_size(0) { + size_t attemp_reccnt = 0; + size_t tombstone_count = 0; + auto cursors = + build_cursor_vec(shards, &attemp_reccnt, &tombstone_count); - if (m_reccnt > 0) { - m_fst = new fst::Trie(keys, true, 1); - } + m_data = new Wrapped[attemp_reccnt](); + m_alloc_size = attemp_reccnt * sizeof(Wrapped); + + std::vector keys; + keys.reserve(attemp_reccnt); + + // FIXME: For smaller cursor arrays, it may be more efficient to skip + // the priority queue and just do a scan. + PriorityQueue> pq(cursors.size()); + for (size_t i = 0; i < cursors.size(); i++) { + pq.push(cursors[i].ptr, i); } - ~FSTrie() { - delete[] m_data; - delete m_fst; + while (pq.size()) { + auto now = pq.peek(); + auto next = + pq.size() > 1 ? pq.peek(1) : queue_record>{nullptr, 0}; + /* + * if the current record is not a tombstone, and the next record is + * a tombstone that matches the current one, then the current one + * has been deleted, and both it and its tombstone can be skipped + * over. + */ + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); + pq.pop(); + auto &cursor1 = cursors[now.version]; + auto &cursor2 = cursors[next.version]; + if (advance_cursor(cursor1)) + pq.push(cursor1.ptr, now.version); + if (advance_cursor(cursor2)) + pq.push(cursor2.ptr, next.version); + } else { + auto &cursor = cursors[now.version]; + /* skip over records that have been deleted via tagging */ + if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') { + m_data[m_reccnt] = *cursor.ptr; + keys.push_back(std::string(m_data[m_reccnt].rec.key)); + + m_reccnt++; + } + pq.pop(); + + if (advance_cursor(cursor)) + pq.push(cursor.ptr, now.version); + } } - Wrapped *point_lookup(const R &rec, bool filter=false) { + if (m_reccnt > 0) { + m_fst = new fst::Trie(keys, true, 1); + } + } - auto idx = m_fst->exactSearch(rec.key); + ~FSTrie() { + delete[] m_data; + delete m_fst; + } - if (idx == fst::kNotFound) { - return nullptr; - } + Wrapped *point_lookup(const R &rec, bool filter = false) { - // FIXME: for convenience, I'm treating this Trie as a unique index - // for now, so no need to scan forward and/or check values. This - // also makes the point lookup query class a lot easier to make. - // Ultimately, though, we can support non-unique indexes with some - // extra work. + auto idx = m_fst->exactSearch(rec.key); - return m_data + idx; + if (idx == fst::kNotFound) { + return nullptr; } - Wrapped* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } + // FIXME: for convenience, I'm treating this Trie as a unique index + // for now, so no need to scan forward and/or check values. This + // also makes the point lookup query class a lot easier to make. + // Ultimately, though, we can support non-unique indexes with some + // extra work. - size_t get_tombstone_count() const { - return 0; - } + return m_data + idx; + } - const Wrapped* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } + Wrapped *get_data() const { return m_data; } + size_t get_record_count() const { return m_reccnt; } - size_t get_memory_usage() const { - return m_fst->getMemoryUsage(); - } + size_t get_tombstone_count() const { return 0; } - size_t get_aux_memory_usage() const { - return m_alloc_size; - } + const Wrapped *get_record_at(size_t idx) const { + if (idx >= m_reccnt) + return nullptr; + return m_data + idx; + } - size_t get_lower_bound(R &rec) {return 0;} - size_t get_upper_bound(R &rec) {return 0;} + size_t get_memory_usage() const { return m_fst->getMemoryUsage(); } -private: + size_t get_aux_memory_usage() const { return m_alloc_size; } + + size_t get_lower_bound(R &rec) { return 0; } + size_t get_upper_bound(R &rec) { return 0; } - Wrapped* m_data; - size_t m_reccnt; - size_t m_alloc_size; - fst::Trie *m_fst; +private: + Wrapped *m_data; + size_t m_reccnt; + size_t m_alloc_size; + fst::Trie *m_fst; }; -} +} // namespace de -- cgit v1.2.3