From 7c03d771475421c1d5a2bbc135242536af1a371c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 25 Sep 2023 10:49:36 -0400 Subject: Re-structuring Project + scheduling updates This is a big one--probably should have split it apart, but I'm feeling lazy this morning. * Organized the mess of header files in include/framework by splitting them out into their own subdirectories, and renaming a few files to remove redundancies introduced by the directory structure. * Introduced a new framework/ShardRequirements.h header file for simpler shard development. This header simply contains the necessary includes from framework/* for creating shard files. This should help to remove structural dependencies from the framework file structure and shards, as well as centralizing the necessary framework files to make shard development easier. * Created a (currently dummy) SchedulerInterface, and make the scheduler implementation a template parameter of the dynamic extension for easier testing of various scheduling policies. There's still more work to be done to fully integrate the scheduler (queries, multiple buffers), but some more of the necessary framework code for this has been added as well. * Adjusted the Task interface setup for the scheduler. The task structures have been removed from ExtensionStructure and placed in their own header file. Additionally, I started experimenting with using std::variant, as opposed to inheritence, to implement subtype polymorphism on the Merge and Query tasks. The scheduler now has a general task queue that contains both, and std::variant, std::visit, and std::get are used to manipulate them without virtual functions. * Removed Alex.h, as it can't build anyway. There's a branch out there containing the Alex implementation stripped of the C++20 stuff. So there's no need to keep it here. --- include/shard/TrieSpline.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 69fcfbc..98153c0 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -15,15 +15,12 @@ #include #include +#include "framework/ShardRequirements.h" #include "ts/builder.h" #include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; -- cgit v1.2.3 From d2279e1b96d352a0af1d425dcaaf93e8a26a8d52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 30 Oct 2023 17:15:05 -0400 Subject: General Comment + Consistency updates --- include/shard/TrieSpline.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 98153c0..a784a38 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -250,6 +250,10 @@ public: return m_ts.GetSize() + m_alloc_size; } + size_t get_aux_memory_usage() { + return 0; + } + private: size_t get_lower_bound(const K& key) const { -- cgit v1.2.3 From e02742b07540dd5a9bcbb44dae14856bf10955ed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 6 Nov 2023 15:18:53 -0500 Subject: Refactoring progress --- include/shard/TrieSpline.h | 184 +-------------------------------------------- 1 file changed, 2 insertions(+), 182 deletions(-) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index a784a38..fdf8edb 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -30,32 +30,6 @@ using psudb::Alias; namespace de { -template -struct ts_range_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; -}; - -template -class TrieSplineRangeQuery; - -template -struct TrieSplineState { - size_t start_idx; - size_t stop_idx; -}; - -template -struct TrieSplineBufferState { - size_t cutoff; - Alias* alias; - - ~TrieSplineBufferState() { - delete alias; - } - -}; - template class TrieSpline { private: @@ -63,10 +37,6 @@ private: typedef decltype(R::value) V; public: - - // FIXME: there has to be a better way to do this - friend class TrieSplineRangeQuery; - TrieSpline(MutableBuffer* buffer) : m_reccnt(0), m_tombstone_cnt(0) { @@ -254,8 +224,6 @@ public: return 0; } -private: - size_t get_lower_bound(const K& key) const { auto bound = m_ts.GetSearchBound(key); size_t idx = bound.begin; @@ -293,6 +261,8 @@ private: return (m_data[idx].rec.key <= key) ? idx : m_reccnt; } +private: + Wrapped* m_data; size_t m_reccnt; size_t m_tombstone_cnt; @@ -302,154 +272,4 @@ private: ts::TrieSpline m_ts; BloomFilter *m_bf; }; - - -template -class TrieSplineRangeQuery { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(TrieSpline *ts, void *parms) { - auto res = new TrieSplineState(); - auto p = (ts_range_query_parms *) parms; - - res->start_idx = ts->get_lower_bound(p->lower_bound); - res->stop_idx = ts->get_record_count(); - - return res; - } - - static void* get_buffer_query_state(MutableBuffer *buffer, void *parms) { - auto res = new TrieSplineBufferState(); - res->cutoff = buffer->get_record_count(); - - return res; - } - - static void process_query_states(void *query_parms, std::vector &shard_states, void *buff_state) { - return; - } - - static std::vector> query(TrieSpline *ts, void *q_state, void *parms) { - //std::vector> records; - size_t tot = 0; - auto p = (ts_range_query_parms *) parms; - auto s = (TrieSplineState *) q_state; - - // if the returned index is one past the end of the - // records for the TrieSpline, then there are not records - // in the index falling into the specified range. - if (s->start_idx == ts->get_record_count()) { - return {}; - } - - auto ptr = ts->get_record_at(s->start_idx); - - // roll the pointer forward to the first record that is - // greater than or equal to the lower bound. - while(ptr->rec.key < p->lower_bound) { - ptr++; - } - - - while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { - if (ptr->is_tombstone()) --tot; - else if (!ptr->is_deleted()) ++tot; - //records.emplace_back(*ptr); - ptr++; - } - - return {Wrapped{0, {tot, 0}}}; - //return records; - } - - static std::vector> buffer_query(MutableBuffer *buffer, void *state, void *parms) { - size_t tot = 0; - auto p = (ts_range_query_parms *) parms; - auto s = (TrieSplineBufferState *) state; - - //std::vector> records; - for (size_t i=0; icutoff; i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - if (rec->is_tombstone()) --tot; - else if (!rec->is_deleted()) ++tot; - //records.emplace_back(*rec); - } - - } - - return {Wrapped{0, {tot, 0}}}; - //return records; - } - - static std::vector merge(std::vector>> &results, void *parms) { -/* - std::vector>> cursors; - cursors.reserve(results.size()); - - PriorityQueue> pq(results.size()); - size_t total = 0; - size_t tmp_n = results.size(); - - - for (size_t i = 0; i < tmp_n; ++i) - if (results[i].size() > 0){ - auto base = results[i].data(); - cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); - assert(i == cursors.size() - 1); - total += results[i].size(); - pq.push(cursors[i].ptr, tmp_n - i - 1); - } else { - cursors.emplace_back(Cursor>{nullptr, nullptr, 0, 0}); - } - - if (total == 0) { - return std::vector(); - } - - std::vector output; - output.reserve(total); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[tmp_n - now.version - 1]; - auto& cursor2 = cursors[tmp_n - next.version - 1]; - if (advance_cursor>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[tmp_n - now.version - 1]; - if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); - pq.pop(); - - if (advance_cursor>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - return output;*/ - - size_t tot = 0; - for (auto& result: results) - if (result.size() > 0) tot += result[0].rec.key; - - return {{tot, 0}}; - } - - static void delete_query_state(void *state) { - auto s = (TrieSplineState *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (TrieSplineBufferState *) state; - delete s; - } -}; - } -- cgit v1.2.3 From 357cab549c2ed33970562b84ff6f83923742343d Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 7 Nov 2023 15:34:24 -0500 Subject: Comment and License updates --- include/shard/TrieSpline.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index fdf8edb..56ec357 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -3,7 +3,9 @@ * * Copyright (C) 2023 Douglas B. Rumbaugh * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. + * + * A shard shim around the TrieSpline learned index. * */ #pragma once -- cgit v1.2.3 From 51a85013236f4b2bd596caf179d90e67c848963c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Jan 2024 15:31:34 -0500 Subject: TrieSpline + tests --- include/shard/TrieSpline.h | 151 +++++++++++++++++++++++++++------------------ 1 file changed, 90 insertions(+), 61 deletions(-) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 56ec357..8142a67 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -12,10 +12,6 @@ #include -#include -#include -#include -#include #include "framework/ShardRequirements.h" #include "ts/builder.h" @@ -23,62 +19,64 @@ #include "util/Cursor.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" +#include "psu-util/timer.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::Alias; namespace de { -template +template class TrieSpline { private: typedef decltype(R::key) K; typedef decltype(R::value) V; public: - TrieSpline(MutableBuffer* buffer) - : m_reccnt(0), m_tombstone_cnt(0) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - + TrieSpline(BufferView buffer) + : m_data(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_max_key(0) + , m_min_key(0) + , m_bf(new BloomFilter(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + { + TIMER_INIT(); + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped), + (byte**) &m_data); + + TIMER_START(); + auto temp_buffer = (Wrapped *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped)); + buffer.copy_to_buffer((byte *) temp_buffer); + + auto base = temp_buffer; + auto stop = base + buffer.get_record_count(); std::sort(base, stop, std::less>()); K min_key = base->rec.key; - K max_key = (stop - 1)->rec.key; + K max_key = (stop-1)->rec.key; + TIMER_STOP(); - auto bldr = ts::Builder(min_key, max_key, E); + auto sort_time = TIMER_RESULT(); + TIMER_START(); + auto bldr = ts::Builder(min_key, max_key, E); while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } + if (!base->is_tombstone() && (base + 1 < stop) + && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { + base += 2; + continue; } else if (base->is_deleted()) { base += 1; continue; } - if (m_reccnt == 0) { - m_max_key = m_min_key = base->rec.key; - } else if (base->rec.key > m_max_key) { - m_max_key = base->rec.key; - } else if (base->rec.key < m_min_key) { - m_min_key = base->rec.key; - } - // FIXME: this shouldn't be necessary, but the tagged record // bypass doesn't seem to be working on this code-path, so this // ensures that tagged records from the buffer are able to be @@ -86,37 +84,67 @@ public: base->header &= 3; m_data[m_reccnt++] = *base; bldr.AddKey(base->rec.key); - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; + ++m_tombstone_cnt; m_bf->insert(base->rec); } - + + /* + * determine the "true" min/max keys based on the scan. This is + * to avoid situations where the min/max in the input array + * are deleted and don't survive into the structure itself. + */ + if (m_reccnt == 0) { + m_max_key = m_min_key = base->rec.key; + } else if (base->rec.key > m_max_key) { + m_max_key = base->rec.key; + } else if (base->rec.key < m_min_key) { + m_min_key = base->rec.key; + } + base++; } + TIMER_STOP(); + auto copy_time = TIMER_RESULT(); + + TIMER_START(); if (m_reccnt > 0) { m_ts = bldr.Finalize(); } + TIMER_STOP(); + auto level_time = TIMER_RESULT(); + + free(temp_buffer); } - TrieSpline(TrieSpline** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0) { + TrieSpline(std::vector &shards) + : m_data(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_max_key(0) + , m_min_key(0) + , m_bf(nullptr) + { + std::vector>> cursors; - cursors.reserve(len); + cursors.reserve(shards.size()); - PriorityQueue> pq(len); + PriorityQueue> pq(shards.size()); size_t attemp_reccnt = 0; size_t tombstone_count = 0; - // initialize m_max_key and m_min_key using the values from the - // first shard. These will later be updated when building - // the initial priority queue to their true values. + /* + * Initialize m_max_key and m_min_key using the values from the + * first shard. These will later be updated when building + * the initial priority queue to their true values. + */ m_max_key = shards[0]->m_max_key; m_min_key = shards[0]->m_min_key; - for (size_t i = 0; i < len; ++i) { + for (size_t i = 0; i < shards.size(); ++i) { if (shards[i]) { auto base = shards[i]->get_data(); cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); @@ -137,12 +165,11 @@ public: } m_bf = new BloomFilter(BF_FPR, tombstone_count, BF_HASH_FUNCS); - auto bldr = ts::Builder(m_min_key, m_max_key, E); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped), + (byte **) &m_data); + auto bldr = ts::Builder(m_min_key, m_max_key, E); while (pq.size()) { auto now = pq.peek(); auto next = pq.size() > 1 ? pq.peek(1) : queue_record>{nullptr, 0}; @@ -152,33 +179,32 @@ public: pq.pop(); pq.pop(); auto& cursor1 = cursors[now.version]; auto& cursor2 = cursors[next.version]; - if (advance_cursor>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor>(cursor2)) pq.push(cursor2.ptr, next.version); + if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); } else { auto& cursor = cursors[now.version]; if (!cursor.ptr->is_deleted()) { m_data[m_reccnt++] = *cursor.ptr; bldr.AddKey(cursor.ptr->rec.key); - if (m_bf && cursor.ptr->is_tombstone()) { + if (cursor.ptr->is_tombstone()) { ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); + m_bf->insert(cursor.ptr->rec); } } pq.pop(); - if (advance_cursor>(cursor)) pq.push(cursor.ptr, now.version); + if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); } } if (m_reccnt > 0) { m_ts = bldr.Finalize(); } - } + } ~TrieSpline() { - if (m_data) free(m_data); - if (m_bf) delete m_bf; - + free(m_data); + delete m_bf; } Wrapped *point_lookup(const R &rec, bool filter=false) { @@ -253,14 +279,17 @@ public: max = mid; } } + } + if (idx == m_reccnt) { + return m_reccnt; } if (m_data[idx].rec.key > key && idx > 0 && m_data[idx-1].rec.key <= key) { return idx-1; } - return (m_data[idx].rec.key <= key) ? idx : m_reccnt; + return idx; } private: -- cgit v1.2.3 From 2c5d549b3618b9ea72e6eece4cb4f3da5a6811a8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 7 Feb 2024 13:42:34 -0500 Subject: Fully realized shard concept interface --- include/shard/TrieSpline.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 8142a67..9473177 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -25,6 +25,7 @@ using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; +using psudb::byte; namespace de { -- cgit v1.2.3 From bd74e27b28bd95267ce50d2e4b6f12b51d9b6aae Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 7 Feb 2024 17:23:23 -0500 Subject: Cleaned up shard files (except VPTree) Cleaned up shard implementations, fixed a few bugs, and set up some tests. There's still some work to be done in creating tests for the weighted sampling operations for the alias and aug btree shards. --- include/shard/TrieSpline.h | 149 +++++++-------------------------------------- 1 file changed, 23 insertions(+), 126 deletions(-) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 9473177..f9fb3cb 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -15,11 +15,9 @@ #include "framework/ShardRequirements.h" #include "ts/builder.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" -#include "psu-util/timer.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; @@ -45,78 +43,26 @@ public: , m_min_key(0) , m_bf(new BloomFilter(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) { - TIMER_INIT(); - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped), (byte**) &m_data); - TIMER_START(); - auto temp_buffer = (Wrapped *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped)); - buffer.copy_to_buffer((byte *) temp_buffer); - - auto base = temp_buffer; - auto stop = base + buffer.get_record_count(); - std::sort(base, stop, std::less>()); - - K min_key = base->rec.key; - K max_key = (stop-1)->rec.key; - TIMER_STOP(); - - auto sort_time = TIMER_RESULT(); - - TIMER_START(); - auto bldr = ts::Builder(min_key, max_key, E); - while (base < stop) { - if (!base->is_tombstone() && (base + 1 < stop) - && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } else if (base->is_deleted()) { - base += 1; - continue; - } + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - bldr.AddKey(base->rec.key); - if (m_bf && base->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(base->rec); - } + if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; - /* - * determine the "true" min/max keys based on the scan. This is - * to avoid situations where the min/max in the input array - * are deleted and don't survive into the structure itself. - */ - if (m_reccnt == 0) { - m_max_key = m_min_key = base->rec.key; - } else if (base->rec.key > m_max_key) { - m_max_key = base->rec.key; - } else if (base->rec.key < m_min_key) { - m_min_key = base->rec.key; + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; i 0) { m_ts = bldr.Finalize(); } - TIMER_STOP(); - auto level_time = TIMER_RESULT(); - - free(temp_buffer); } TrieSpline(std::vector &shards) @@ -128,77 +74,28 @@ public: , m_min_key(0) , m_bf(nullptr) { - - std::vector>> cursors; - cursors.reserve(shards.size()); - - PriorityQueue> pq(shards.size()); - size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - /* - * Initialize m_max_key and m_min_key using the values from the - * first shard. These will later be updated when building - * the initial priority queue to their true values. - */ - m_max_key = shards[0]->m_max_key; - m_min_key = shards[0]->m_min_key; + auto cursors = build_cursor_vec(shards, &attemp_reccnt, &tombstone_count); - for (size_t i = 0; i < shards.size(); ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - if (shards[i]->m_max_key > m_max_key) { - m_max_key = shards[i]->m_max_key; - } - - if (shards[i]->m_min_key < m_min_key) { - m_min_key = shards[i]->m_min_key; - } - } else { - cursors.emplace_back(Cursor>{nullptr, nullptr, 0, 0}); - } - } - m_bf = new BloomFilter(BF_FPR, tombstone_count, BF_HASH_FUNCS); m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped), (byte **) &m_data); - auto bldr = ts::Builder(m_min_key, m_max_key, E); - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - bldr.AddKey(cursor.ptr->rec.key); - if (cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); - } - } + auto res = sorted_array_merge(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; + + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; imemory_usage() : 0; } size_t get_lower_bound(const K& key) const { -- cgit v1.2.3 From 402fc269c0aaa671d84a6d15918735ad4b90e6b2 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 9 Feb 2024 12:30:21 -0500 Subject: Comment updates/fixes --- include/shard/TrieSpline.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/shard/TrieSpline.h') diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index f9fb3cb..2a432e8 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -7,6 +7,7 @@ * * A shard shim around the TrieSpline learned index. * + * TODO: The code in this file is very poorly commented. */ #pragma once -- cgit v1.2.3