summaryrefslogtreecommitdiffstats
path: root/include/framework/scheduling
diff options
context:
space:
mode:
Diffstat (limited to 'include/framework/scheduling')
-rw-r--r--include/framework/scheduling/Epoch.h143
-rw-r--r--include/framework/scheduling/FIFOScheduler.h129
-rw-r--r--include/framework/scheduling/SerialScheduler.h62
-rw-r--r--include/framework/scheduling/Task.h89
-rw-r--r--include/framework/scheduling/statistics.h118
5 files changed, 541 insertions, 0 deletions
diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h
new file mode 100644
index 0000000..9377fb0
--- /dev/null
+++ b/include/framework/scheduling/Epoch.h
@@ -0,0 +1,143 @@
+/*
+ * include/framework/scheduling/Epoch.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+#include "framework/structure/MutableBuffer.h"
+#include "framework/structure/ExtensionStructure.h"
+#include "framework/structure/BufferView.h"
+
+namespace de {
+
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+class Epoch {
+private:
+ typedef MutableBuffer<R> Buffer;
+ typedef ExtensionStructure<R, S, Q, L> Structure;
+ typedef BufferView<R> BufView;
+public:
+ Epoch(size_t number=0)
+ : m_buffer(nullptr)
+ , m_structure(nullptr)
+ , m_active_merge(false)
+ , m_epoch_number(number)
+ , m_buffer_head(0)
+ {}
+
+ Epoch(size_t number, Structure *structure, Buffer *buff, size_t head)
+ : m_buffer(buff)
+ , m_structure(structure)
+ , m_active_merge(false)
+ , m_epoch_number(number)
+ , m_buffer_head(head)
+ {
+ structure->take_reference();
+ }
+
+ ~Epoch() {
+ if (m_structure) {
+ m_structure->release_reference();
+ }
+
+ if (m_structure->get_reference_count() == 0) {
+ delete m_structure;
+ }
+
+ }
+
+ /*
+ * Epochs are *not* copyable or movable. Only one can exist, and all users
+ * of it work with pointers
+ */
+ Epoch(const Epoch&) = delete;
+ Epoch(Epoch&&) = delete;
+ Epoch &operator=(const Epoch&) = delete;
+ Epoch &operator=(Epoch&&) = delete;
+
+ size_t get_epoch_number() {
+ return m_epoch_number;
+ }
+
+ Structure *get_structure() {
+ return m_structure;
+ }
+
+ BufView get_buffer() {
+ return m_buffer->get_buffer_view(m_buffer_head);
+ }
+
+ /*
+ * Returns a new Epoch object that is a copy of this one. The new object
+ * will also contain a copy of the m_structure, rather than a reference to
+ * the same one. The epoch number of the new epoch will be set to the
+ * provided argument.
+ */
+ Epoch *clone(size_t number) {
+ std::unique_lock<std::mutex> m_buffer_lock;
+ auto epoch = new Epoch(number);
+ epoch->m_buffer = m_buffer;
+ epoch->m_buffer_head = m_buffer_head;
+
+ if (m_structure) {
+ epoch->m_structure = m_structure->copy();
+ /* the copy routine returns a structure with 0 references */
+ epoch->m_structure->take_reference();
+ }
+
+ return epoch;
+ }
+
+ /*
+ * Check if a merge can be started from this Epoch. At present, without
+ * concurrent merging, this simply checks if there is currently a scheduled
+ * merge based on this Epoch. If there is, returns false. If there isn't,
+ * return true and set a flag indicating that there is an active merge.
+ */
+ bool prepare_reconstruction() {
+ auto old = m_active_merge.load();
+ if (old) {
+ return false;
+ }
+
+ // FIXME: this needs cleaned up
+ while (!m_active_merge.compare_exchange_strong(old, true)) {
+ old = m_active_merge.load();
+ if (old) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool advance_buffer_head(size_t head) {
+ m_buffer_head = head;
+ return m_buffer->advance_head(m_buffer_head);
+ }
+
+private:
+ Structure *m_structure;
+ Buffer *m_buffer;
+
+ std::mutex m_buffer_lock;
+ std::atomic<bool> m_active_merge;
+
+ /*
+ * The number of currently active jobs
+ * (queries/merges) operating on this
+ * epoch. An epoch can only be retired
+ * when this number is 0.
+ */
+ size_t m_epoch_number;
+ size_t m_buffer_head;
+};
+}
diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h
new file mode 100644
index 0000000..3ed4f49
--- /dev/null
+++ b/include/framework/scheduling/FIFOScheduler.h
@@ -0,0 +1,129 @@
+/*
+ * include/framework/scheduling/FIFOScheduler.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * This scheduler runs just concurrently, using a standard FIFO queue to
+ * determine which jobs to run next. If more jobs are scheduled than there
+ * are available threads, the excess will stall until a thread becomes
+ * available and then run in the order they were received by the scheduler.
+ *
+ * TODO: We need to set up a custom threadpool based on jthreads to support
+ * thread preemption for a later phase of this project. That will allow us
+ * to avoid blocking epoch transitions on long-running queries, or to pause
+ * reconstructions on demand.
+ */
+#pragma once
+
+#include <thread>
+#include <condition_variable>
+#include <chrono>
+#include "framework/scheduling/Task.h"
+#include "framework/scheduling/statistics.h"
+
+#include "ctpl/ctpl.h"
+#include "psu-ds/LockedPriorityQueue.h"
+
+namespace de {
+
+using namespace std::literals::chrono_literals;
+
+
+class FIFOScheduler {
+private:
+ static const size_t DEFAULT_MAX_THREADS = 8;
+
+public:
+ FIFOScheduler(size_t memory_budget, size_t thread_cnt)
+ : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
+ , m_thrd_cnt((thread_cnt) ? thread_cnt: DEFAULT_MAX_THREADS)
+ , m_used_memory(0)
+ , m_used_thrds(0)
+ , m_shutdown(false)
+ {
+ m_sched_thrd = std::thread(&FIFOScheduler::run, this);
+ m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this);
+ m_thrd_pool.resize(m_thrd_cnt);
+ }
+
+ ~FIFOScheduler() {
+ if (!m_shutdown.load()) {
+ shutdown();
+ }
+
+ m_sched_thrd.join();
+ m_sched_wakeup_thrd.join();
+ }
+
+ void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
+ std::unique_lock<std::mutex> lk(m_cv_lock);
+ size_t ts = m_counter.fetch_add(1);
+
+ m_stats.job_queued(ts, type, size);
+ m_task_queue.push(Task(size, ts, job, args, type, &m_stats));
+
+ m_cv.notify_all();
+ }
+
+ void shutdown() {
+ m_shutdown.store(true);
+ m_thrd_pool.stop(true);
+ m_cv.notify_all();
+ }
+
+ void print_statistics() {
+ m_stats.print_statistics();
+ }
+
+private:
+ psudb::LockedPriorityQueue<Task> m_task_queue;
+
+ size_t m_memory_budget;
+ size_t m_thrd_cnt;
+
+ std::atomic<bool> m_shutdown;
+
+ std::atomic<size_t> m_counter;
+ std::mutex m_cv_lock;
+ std::condition_variable m_cv;
+
+ std::thread m_sched_thrd;
+ std::thread m_sched_wakeup_thrd;
+ ctpl::thread_pool m_thrd_pool;
+
+ std::atomic<size_t> m_used_thrds;
+ std::atomic<size_t> m_used_memory;
+
+ SchedulerStatistics m_stats;
+
+ void periodic_wakeup() {
+ do {
+ std::this_thread::sleep_for(10us);
+ m_cv.notify_all();
+ } while (!m_shutdown.load());
+ }
+
+ void schedule_next() {
+ assert(m_task_queue.size() > 0);
+ auto t = m_task_queue.pop();
+ m_stats.job_scheduled(t.m_timestamp);
+
+ m_thrd_pool.push(t);
+ }
+
+ void run() {
+ do {
+ std::unique_lock<std::mutex> cv_lock(m_cv_lock);
+ m_cv.wait(cv_lock);
+
+ while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) {
+ schedule_next();
+ }
+ } while(!m_shutdown.load());
+ }
+
+};
+
+}
diff --git a/include/framework/scheduling/SerialScheduler.h b/include/framework/scheduling/SerialScheduler.h
new file mode 100644
index 0000000..ac59301
--- /dev/null
+++ b/include/framework/scheduling/SerialScheduler.h
@@ -0,0 +1,62 @@
+/*
+ * include/framework/scheduling/SerialScheduler.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * IMPORTANT: This "scheduler" is a shim implementation for allowing
+ * strictly serial, single-threaded operation of the framework. It should
+ * never be used in multi-threaded contexts. A call to the schedule_job
+ * function will immediately run the job and block on its completion before
+ * returning.
+ *
+ */
+#pragma once
+
+#include "framework/scheduling/Task.h"
+#include "framework/scheduling/statistics.h"
+
+namespace de {
+
+class SerialScheduler {
+public:
+ SerialScheduler(size_t memory_budget, size_t thread_cnt)
+ : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
+ , m_thrd_cnt((thread_cnt) ? thread_cnt: UINT64_MAX)
+ , m_used_memory(0)
+ , m_used_thrds(0)
+ , m_counter(0)
+ {}
+
+ ~SerialScheduler() = default;
+
+ void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
+ size_t ts = m_counter++;
+ m_stats.job_queued(ts, type, size);
+ m_stats.job_scheduled(ts);
+ auto t = Task(size, ts, job, args, type, &m_stats);
+ t(0);
+ }
+
+ void shutdown() {
+ /* intentionally left blank */
+ }
+
+ void print_statistics() {
+ m_stats.print_statistics();
+ }
+
+private:
+ size_t m_memory_budget;
+ size_t m_thrd_cnt;
+
+ size_t m_used_thrds;
+ size_t m_used_memory;
+
+ size_t m_counter;
+
+ SchedulerStatistics m_stats;
+};
+
+}
diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h
new file mode 100644
index 0000000..d5d4266
--- /dev/null
+++ b/include/framework/scheduling/Task.h
@@ -0,0 +1,89 @@
+/*
+ * include/framework/scheduling/Task.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * An abstraction to represent a job to be scheduled. Currently the
+ * supported task types are queries and merges. Based on the current plan,
+ * simple buffer inserts will likely also be made into a task at some
+ * point.
+ *
+ */
+#pragma once
+
+#include <future>
+#include <functional>
+#include <chrono>
+
+#include "framework/util/Configuration.h"
+#include "framework/scheduling/Epoch.h"
+#include "framework/scheduling/statistics.h"
+
+namespace de {
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+struct ReconstructionArgs {
+ Epoch<R, S, Q, L> *epoch;
+ std::vector<ReconstructionTask> merges;
+ std::promise<bool> result;
+ bool compaction;
+ void *extension;
+};
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+struct QueryArgs {
+ std::promise<std::vector<R>> result_set;
+ void *query_parms;
+ void *extension;
+};
+
+typedef std::function<void(void*)> Job;
+
+struct Task {
+ Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr)
+ : m_job(job)
+ , m_size(size)
+ , m_timestamp(ts)
+ , m_args(args)
+ , m_type(type)
+ , m_stats(stats)
+ {}
+
+ Job m_job;
+ size_t m_size;
+ size_t m_timestamp;
+ void *m_args;
+ size_t m_type;
+ SchedulerStatistics *m_stats;
+
+ friend bool operator<(const Task &self, const Task &other) {
+ return self.m_timestamp < other.m_timestamp;
+ }
+
+ friend bool operator>(const Task &self, const Task &other) {
+ return self.m_timestamp > other.m_timestamp;
+ }
+
+ void operator()(size_t thrd_id) {
+ auto start = std::chrono::high_resolution_clock::now();
+ if (m_stats) {
+ m_stats->job_begin(m_timestamp);
+ }
+
+ m_job(m_args);
+
+ if (m_stats) {
+ m_stats->job_complete(m_timestamp);
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ if (m_stats) {
+ auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ m_stats->log_time_data(time, m_type);
+ }
+ }
+};
+
+}
diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h
new file mode 100644
index 0000000..6c479cd
--- /dev/null
+++ b/include/framework/scheduling/statistics.h
@@ -0,0 +1,118 @@
+/*
+ * include/framework/scheduling/statistics.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * This is a stub for a statistics tracker to be used in scheduling. It
+ * currently only tracks simple aggregated statistics, but should be
+ * updated in the future for more fine-grained statistics. These will be
+ * used for making scheduling decisions and predicting the runtime of a
+ * given job.
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cassert>
+#include <unordered_map>
+#include <vector>
+#include <mutex>
+#include <chrono>
+#include <atomic>
+
+namespace de {
+
+class SchedulerStatistics {
+private:
+ enum class EventType {
+ QUEUED,
+ SCHEDULED,
+ STARTED,
+ FINISHED
+ };
+
+ struct Event {
+ size_t id;
+ EventType type;
+ };
+
+ struct JobInfo {
+ size_t id;
+ size_t size;
+ size_t type;
+ };
+
+
+public:
+ SchedulerStatistics() = default;
+ ~SchedulerStatistics() = default;
+
+ void job_queued(size_t id, size_t type, size_t size) {
+ auto time = std::chrono::high_resolution_clock::now();
+ }
+
+ void job_scheduled(size_t id) {
+ std::unique_lock<std::mutex> lk(m_mutex);
+
+ }
+
+ void job_begin(size_t id) {
+
+ }
+
+ void job_complete(size_t id) {
+
+ }
+
+ /* FIXME: This is just a temporary approach */
+ void log_time_data(size_t length, size_t type) {
+ assert(type == 1 || type == 2);
+
+ if (type == 1) {
+ m_type_1_cnt.fetch_add(1);
+ m_type_1_total_time.fetch_add(length);
+
+ if (length > m_type_1_largest_time) {
+ m_type_1_largest_time.store(length);
+ }
+ } else {
+ m_type_2_cnt.fetch_add(1);
+ m_type_2_total_time.fetch_add(length);
+
+ if (length > m_type_2_largest_time) {
+ m_type_2_largest_time.store(length);
+ }
+ }
+ }
+
+ void print_statistics() {
+ if (m_type_1_cnt > 0) {
+ fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n",
+ m_type_1_cnt.load(),
+ m_type_1_total_time.load() / m_type_1_cnt.load(),
+ m_type_1_largest_time.load());
+ }
+ if (m_type_2_cnt > 0) {
+ fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n",
+ m_type_2_cnt.load(),
+ m_type_2_total_time.load() / m_type_2_cnt.load(),
+ m_type_2_largest_time.load());
+ }
+ }
+
+private:
+ std::mutex m_mutex;
+ std::unordered_map<size_t, JobInfo> m_jobs;
+ std::vector<Event> m_event_log;
+
+ std::atomic<size_t> m_type_1_cnt;
+ std::atomic<size_t> m_type_1_total_time;
+
+ std::atomic<size_t> m_type_2_cnt;
+ std::atomic<size_t> m_type_2_total_time;
+
+ std::atomic<size_t> m_type_1_largest_time;
+ std::atomic<size_t> m_type_2_largest_time;
+};
+}