From 7c03d771475421c1d5a2bbc135242536af1a371c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 25 Sep 2023 10:49:36 -0400 Subject: Re-structuring Project + scheduling updates This is a big one--probably should have split it apart, but I'm feeling lazy this morning. * Organized the mess of header files in include/framework by splitting them out into their own subdirectories, and renaming a few files to remove redundancies introduced by the directory structure. * Introduced a new framework/ShardRequirements.h header file for simpler shard development. This header simply contains the necessary includes from framework/* for creating shard files. This should help to remove structural dependencies from the framework file structure and shards, as well as centralizing the necessary framework files to make shard development easier. * Created a (currently dummy) SchedulerInterface, and make the scheduler implementation a template parameter of the dynamic extension for easier testing of various scheduling policies. There's still more work to be done to fully integrate the scheduler (queries, multiple buffers), but some more of the necessary framework code for this has been added as well. * Adjusted the Task interface setup for the scheduler. The task structures have been removed from ExtensionStructure and placed in their own header file. Additionally, I started experimenting with using std::variant, as opposed to inheritence, to implement subtype polymorphism on the Merge and Query tasks. The scheduler now has a general task queue that contains both, and std::variant, std::visit, and std::get are used to manipulate them without virtual functions. * Removed Alex.h, as it can't build anyway. There's a branch out there containing the Alex implementation stripped of the C++20 stuff. So there's no need to keep it here. --- include/shard/WSS.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/shard/WSS.h') diff --git a/include/shard/WSS.h b/include/shard/WSS.h index c0af573..87b016c 100644 --- a/include/shard/WSS.h +++ b/include/shard/WSS.h @@ -16,15 +16,13 @@ #include #include +#include "framework/ShardRequirements.h" + #include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" #include "psu-ds/Alias.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; -- cgit v1.2.3 From d2279e1b96d352a0af1d425dcaaf93e8a26a8d52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 30 Oct 2023 17:15:05 -0400 Subject: General Comment + Consistency updates --- include/shard/WSS.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/shard/WSS.h') diff --git a/include/shard/WSS.h b/include/shard/WSS.h index 87b016c..4e3a326 100644 --- a/include/shard/WSS.h +++ b/include/shard/WSS.h @@ -1,8 +1,8 @@ /* * include/shard/WSS.h * - * Copyright (C) 2023 Dong Xie - * Douglas Rumbaugh + * Copyright (C) 2023 Douglas B. Rumbaugh + * Dong Xie * * All rights reserved. Published under the Modified BSD License. * @@ -243,6 +243,10 @@ public: return m_alloc_size; } + size_t get_aux_memory_usage() { + return 0; + } + private: size_t get_lower_bound(const K& key) const { -- cgit v1.2.3 From e02742b07540dd5a9bcbb44dae14856bf10955ed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 6 Nov 2023 15:18:53 -0500 Subject: Refactoring progress --- include/shard/WSS.h | 453 ---------------------------------------------------- 1 file changed, 453 deletions(-) delete mode 100644 include/shard/WSS.h (limited to 'include/shard/WSS.h') diff --git a/include/shard/WSS.h b/include/shard/WSS.h deleted file mode 100644 index 4e3a326..0000000 --- a/include/shard/WSS.h +++ /dev/null @@ -1,453 +0,0 @@ -/* - * include/shard/WSS.h - * - * Copyright (C) 2023 Douglas B. Rumbaugh - * Dong Xie - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - - -#include -#include -#include -#include -#include - -#include "framework/ShardRequirements.h" - -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-ds/Alias.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; -using psudb::Alias; - -namespace de { - -thread_local size_t wss_cancelations = 0; - -template -struct wss_query_parms { - size_t sample_size; - gsl_rng *rng; -}; - -template -class WSSQuery; - -template -struct WSSState { - decltype(R::weight) total_weight; - size_t sample_size; - - WSSState() { - total_weight = 0; - } -}; - -template -struct WSSBufferState { - size_t cutoff; - size_t sample_size; - Alias* alias; - decltype(R::weight) max_weight; - decltype(R::weight) total_weight; - - ~WSSBufferState() { - delete alias; - } - -}; - -template -class WSS { -private: - typedef decltype(R::key) K; - typedef decltype(R::value) V; - typedef decltype(R::weight) W; - -public: - - // FIXME: there has to be a better way to do this - friend class WSSQuery; - friend class WSSQuery; - - WSS(MutableBuffer* buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less>()); - - std::vector weights; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - wss_cancelations++; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - m_total_weight+= base->rec.weight; - weights.push_back(base->rec.weight); - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } - - if (m_reccnt > 0) { - build_alias_structure(weights); - } - } - - WSS(WSS** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - std::vector>> cursors; - cursors.reserve(len); - - PriorityQueue> pq(len); - - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor>{nullptr, nullptr, 0, 0}); - } - } - - m_bf = new BloomFilter(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector weights; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - m_total_weight += cursor.ptr->rec.weight; - weights.push_back(cursor.ptr->rec.weight); - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - if (m_reccnt > 0) { - build_alias_structure(weights); - } - } - - ~WSS() { - if (m_data) free(m_data); - if (m_alias) delete m_alias; - if (m_bf) delete m_bf; - - } - - Wrapped *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - - size_t get_memory_usage() { - return m_alloc_size; - } - - size_t get_aux_memory_usage() { - return 0; - } - -private: - - size_t get_lower_bound(const K& key) const { - size_t min = 0; - size_t max = m_reccnt - 1; - - const char * record_key; - while (min < max) { - size_t mid = (min + max) / 2; - - if (key > m_data[mid].rec.key) { - min = mid + 1; - } else { - max = mid; - } - } - - return min; - } - - void build_alias_structure(std::vector &weights) { - - // normalize the weights vector - std::vector norm_weights(weights.size()); - - for (size_t i=0; i* m_data; - Alias *m_alias; - W m_total_weight; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_group_size; - size_t m_alloc_size; - BloomFilter *m_bf; -}; - - -template -class WSSQuery { -public: - - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(WSS *wss, void *parms) { - auto res = new WSSState(); - res->total_weight = wss->m_total_weight; - res->sample_size = 0; - - return res; - } - - static void* get_buffer_query_state(MutableBuffer *buffer, void *parms) { - WSSBufferState *state = new WSSBufferState(); - auto parameters = (wss_query_parms*) parms; - if constexpr (Rejection) { - state->cutoff = buffer->get_record_count() - 1; - state->max_weight = buffer->get_max_weight(); - state->total_weight = buffer->get_total_weight(); - return state; - } - - std::vector weights; - - state->cutoff = buffer->get_record_count() - 1; - double total_weight = 0.0; - - for (size_t i = 0; i <= state->cutoff; i++) { - auto rec = buffer->get_data() + i; - weights.push_back(rec->rec.weight); - total_weight += rec->rec.weight; - } - - for (size_t i = 0; i < weights.size(); i++) { - weights[i] = weights[i] / total_weight; - } - - state->alias = new Alias(weights); - state->total_weight = total_weight; - - return state; - } - - static void process_query_states(void *query_parms, std::vector &shard_states, void *buff_state) { - auto p = (wss_query_parms *) query_parms; - auto bs = (WSSBufferState *) buff_state; - - std::vector shard_sample_sizes(shard_states.size()+1, 0); - size_t buffer_sz = 0; - - std::vector weights; - weights.push_back(bs->total_weight); - - decltype(R::weight) total_weight = 0; - for (auto &s : shard_states) { - auto state = (WSSState *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - std::vector normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = Alias(normalized_weights); - for (size_t i=0; isample_size; i++) { - auto idx = shard_alias.get(p->rng); - if (idx == 0) { - buffer_sz++; - } else { - shard_sample_sizes[idx - 1]++; - } - } - - - bs->sample_size = buffer_sz; - for (size_t i=0; i *) shard_states[i]; - state->sample_size = shard_sample_sizes[i+1]; - } - } - - static std::vector> query(WSS *wss, void *q_state, void *parms) { - auto rng = ((wss_query_parms *) parms)->rng; - - auto state = (WSSState *) q_state; - auto sample_size = state->sample_size; - - std::vector> result_set; - - if (sample_size == 0) { - return result_set; - } - size_t attempts = 0; - do { - attempts++; - size_t idx = wss->m_alias->get(rng); - result_set.emplace_back(*wss->get_record_at(idx)); - } while (attempts < sample_size); - - return result_set; - } - - static std::vector> buffer_query(MutableBuffer *buffer, void *state, void *parms) { - auto st = (WSSBufferState *) state; - auto p = (wss_query_parms *) parms; - - std::vector> result; - result.reserve(st->sample_size); - - if constexpr (Rejection) { - for (size_t i=0; isample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get_data() + idx; - - auto test = gsl_rng_uniform(p->rng) * st->max_weight; - - if (test <= rec->rec.weight) { - result.emplace_back(*rec); - } - } - return result; - } - - for (size_t i=0; isample_size; i++) { - auto idx = st->alias->get(p->rng); - result.emplace_back(*(buffer->get_data() + idx)); - } - - return result; - } - - static std::vector merge(std::vector>> &results, void *parms) { - std::vector output; - - for (size_t i=0; i *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (WSSBufferState *) state; - delete s; - } -}; - -} -- cgit v1.2.3