diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2023-09-25 10:49:36 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2023-09-25 10:49:36 -0400 |
| commit | 7c03d771475421c1d5a2bbc135242536af1a371c (patch) | |
| tree | 94856ac950662c564608ad3cdc5b59bfd08b955c /include/framework/structure/MutableBuffer.h | |
| parent | 754372aeccb74815cbb16f32ceacb04b4c5aaba9 (diff) | |
| download | dynamic-extension-7c03d771475421c1d5a2bbc135242536af1a371c.tar.gz | |
Re-structuring Project + scheduling updates
This is a big one--probably should have split it apart, but I'm feeling
lazy this morning.
* Organized the mess of header files in include/framework by splitting
them out into their own subdirectories, and renaming a few files to
remove redundancies introduced by the directory structure.
* Introduced a new framework/ShardRequirements.h header file for simpler
shard development. This header simply contains the necessary includes
from framework/* for creating shard files. This should help to remove
structural dependencies from the framework file structure and shards,
as well as centralizing the necessary framework files to make shard
development easier.
* Created a (currently dummy) SchedulerInterface, and make the scheduler
implementation a template parameter of the dynamic extension for easier
testing of various scheduling policies. There's still more work to be
done to fully integrate the scheduler (queries, multiple buffers), but
some more of the necessary framework code for this has been added as well.
* Adjusted the Task interface setup for the scheduler. The task structures
have been removed from ExtensionStructure and placed in their own header
file. Additionally, I started experimenting with using std::variant,
as opposed to inheritence, to implement subtype polymorphism on the
Merge and Query tasks. The scheduler now has a general task queue that
contains both, and std::variant, std::visit, and std::get are used to
manipulate them without virtual functions.
* Removed Alex.h, as it can't build anyway. There's a branch out there
containing the Alex implementation stripped of the C++20 stuff. So
there's no need to keep it here.
Diffstat (limited to 'include/framework/structure/MutableBuffer.h')
| -rw-r--r-- | include/framework/structure/MutableBuffer.h | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h new file mode 100644 index 0000000..9f12175 --- /dev/null +++ b/include/framework/structure/MutableBuffer.h @@ -0,0 +1,242 @@ +/* + * include/framework/MutableBuffer.h + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include <cstdlib> +#include <atomic> +#include <condition_variable> +#include <cassert> +#include <numeric> +#include <algorithm> +#include <type_traits> + +#include "psu-util/alignment.h" +#include "util/bf_config.h" +#include "psu-ds/BloomFilter.h" +#include "psu-ds/Alias.h" +#include "psu-util/timer.h" +#include "framework/interface/Record.h" + +using psudb::CACHELINE_SIZE; + +namespace de { + +template <RecordInterface R> +class MutableBuffer { +public: + MutableBuffer(size_t capacity, size_t max_tombstone_cap) + : m_cap(capacity), m_tombstone_cap(max_tombstone_cap), m_reccnt(0) + , m_tombstonecnt(0), m_weight(0), m_max_weight(0) { + m_data = (Wrapped<R>*) psudb::sf_aligned_alloc(CACHELINE_SIZE, capacity*sizeof(Wrapped<R>)); + m_merge_data = (Wrapped<R>*) psudb::sf_aligned_alloc(CACHELINE_SIZE, capacity*sizeof(Wrapped<R>)); + m_tombstone_filter = nullptr; + if (max_tombstone_cap > 0) { + m_tombstone_filter = new psudb::BloomFilter<R>(BF_FPR, max_tombstone_cap, BF_HASH_FUNCS); + } + + m_refcnt.store(0); + m_deferred_truncate.store(false); + m_merging.store(false); + } + + ~MutableBuffer() { + assert(m_refcnt.load() == 0); + assert(m_merging.load() == false); + + if (m_data) free(m_data); + if (m_tombstone_filter) delete m_tombstone_filter; + if (m_merge_data) free(m_merge_data); + } + + template <typename R_ = R> + int append(const R &rec, bool tombstone=false) { + if (tombstone && m_tombstonecnt + 1 > m_tombstone_cap) return 0; + + int32_t pos = 0; + if ((pos = try_advance_tail()) == -1) return 0; + + Wrapped<R> wrec; + wrec.rec = rec; + wrec.header = 0; + if (tombstone) wrec.set_tombstone(); + + m_data[pos] = wrec; + m_data[pos].header |= (pos << 2); + + if (tombstone) { + m_tombstonecnt.fetch_add(1); + if (m_tombstone_filter) m_tombstone_filter->insert(rec); + } + + if constexpr (WeightedRecordInterface<R_>) { + m_weight.fetch_add(rec.weight); + double old = m_max_weight.load(); + while (old < rec.weight) { + m_max_weight.compare_exchange_strong(old, rec.weight); + old = m_max_weight.load(); + } + } else { + m_weight.fetch_add(1); + } + + return 1; + } + + bool truncate() { + m_tombstonecnt.store(0); + m_reccnt.store(0); + m_weight.store(0); + m_max_weight.store(0); + if (m_tombstone_filter) m_tombstone_filter->clear(); + + return true; + } + + size_t get_record_count() { + return m_reccnt; + } + + size_t get_capacity() { + return m_cap; + } + + bool is_full() { + return m_reccnt == m_cap; + } + + size_t get_tombstone_count() { + return m_tombstonecnt.load(); + } + + bool delete_record(const R& rec) { + auto offset = 0; + while (offset < m_reccnt.load()) { + if (m_data[offset].rec == rec) { + m_data[offset].set_delete(); + return true; + } + offset++; + } + + return false; + } + + bool check_tombstone(const R& rec) { + if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) return false; + + auto offset = 0; + while (offset < m_reccnt.load()) { + if (m_data[offset].rec == rec && m_data[offset].is_tombstone()) { + return true; + } + offset++;; + } + return false; + } + + size_t get_memory_usage() { + return m_cap * sizeof(R); + } + + size_t get_aux_memory_usage() { + return m_tombstone_filter->get_memory_usage(); + } + + size_t get_tombstone_capacity() { + return m_tombstone_cap; + } + + double get_total_weight() { + return m_weight.load(); + } + + Wrapped<R> *get_data() { + return m_data; + } + + double get_max_weight() { + return m_max_weight; + } + + bool start_merge() { + if (m_merge_lock.try_lock()) { + /* there cannot already been an active merge */ + if (m_merging.load()) { + m_merge_lock.unlock(); + return false; + } + + m_merging.store(true); + memcpy(m_merge_data, m_data, sizeof(Wrapped<R>) * m_reccnt.load()); + return true; + } + + /* lock could not be obtained */ + return false; + } + + bool finish_merge() { + m_merge_lock.unlock(); + return true; + } + + /* + * Concurrency-related operations + */ + bool take_reference() { + m_refcnt.fetch_add(1); + return true; + } + + bool release_reference() { + m_refcnt.fetch_add(-1); + + if (m_refcnt.load() == 0 && m_deferred_truncate.load()) { + assert(this->truncate()); + } + + return true; + } + + bool active_merge() { + return m_merging.load(); + } + +private: + int32_t try_advance_tail() { + size_t new_tail = m_reccnt.fetch_add(1); + + if (new_tail < m_cap) return new_tail; + else return -1; + } + + size_t m_cap; + size_t m_tombstone_cap; + + Wrapped<R>* m_data; + Wrapped<R>* m_merge_data; + + psudb::BloomFilter<R>* m_tombstone_filter; + + alignas(64) std::atomic<size_t> m_tombstonecnt; + alignas(64) std::atomic<uint32_t> m_reccnt; + alignas(64) std::atomic<double> m_weight; + alignas(64) std::atomic<double> m_max_weight; + alignas(64) std::atomic<bool> m_merging; + alignas(64) std::atomic<bool> m_deferred_truncate; + alignas(64) std::atomic<size_t> m_refcnt; + + alignas(64) std::mutex m_merge_lock; + alignas(64) std::mutex m_trunc_lock; + alignas(64) std::condition_variable m_trunc_signal; + +}; + +} |