5 files changed, 541 insertions, 0 deletions
diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h
new file mode 100644
index 0000000..9377fb0
--- /dev/null
+++ b/include/framework/scheduling/Epoch.h
@@ -0,0 +1,143 @@
+/*
+ * include/framework/scheduling/Epoch.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ *
+ * Distributed under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+#include "framework/structure/MutableBuffer.h"
+#include "framework/structure/ExtensionStructure.h"
+#include "framework/structure/BufferView.h"
+
+namespace de {
+
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+class Epoch {
+private:
+    typedef MutableBuffer<R> Buffer;
+    typedef ExtensionStructure<R, S, Q, L> Structure;
+    typedef BufferView<R> BufView;
+public:
+    Epoch(size_t number=0)
+        : m_buffer(nullptr)
+        , m_structure(nullptr)
+        , m_active_merge(false)
+        , m_epoch_number(number)
+        , m_buffer_head(0)
+    {}
+
+    Epoch(size_t number, Structure *structure, Buffer *buff, size_t head)
+        : m_buffer(buff)
+        , m_structure(structure)
+        , m_active_merge(false)
+        , m_epoch_number(number)
+        , m_buffer_head(head)
+    {
+        structure->take_reference();
+    }
+
+    ~Epoch() {
+        if (m_structure) {
+            m_structure->release_reference();
+        }
+
+        if (m_structure->get_reference_count() == 0) {
+            delete m_structure;
+        }
+
+    }
+
+    /*
+     * Epochs are *not* copyable or movable. Only one can exist, and all users
+     * of it work with pointers
+     */
+    Epoch(const Epoch&) = delete;
+    Epoch(Epoch&&) = delete;
+    Epoch &operator=(const Epoch&) = delete;
+    Epoch &operator=(Epoch&&) = delete;
+
+    size_t get_epoch_number() {
+        return m_epoch_number;
+    }
+
+    Structure *get_structure() {
+        return m_structure;
+    }
+
+    BufView get_buffer() {
+        return m_buffer->get_buffer_view(m_buffer_head);
+    }
+
+    /*
+     * Returns a new Epoch object that is a copy of this one. The new object
+     * will also contain a copy of the m_structure, rather than a reference to
+     * the same one. The epoch number of the new epoch will be set to the
+     * provided argument.
+     */
+    Epoch *clone(size_t number) {
+        std::unique_lock<std::mutex> m_buffer_lock;
+        auto epoch = new Epoch(number);
+        epoch->m_buffer = m_buffer;
+        epoch->m_buffer_head = m_buffer_head;
+
+        if (m_structure) {
+            epoch->m_structure = m_structure->copy();
+            /* the copy routine returns a structure with 0 references */
+            epoch->m_structure->take_reference();
+        }
+
+        return epoch;
+    }
+
+    /*
+     * Check if a merge can be started from this Epoch. At present, without
+     * concurrent merging, this simply checks if there is currently a scheduled
+     * merge based on this Epoch. If there is, returns false. If there isn't,
+     * return true and set a flag indicating that there is an active merge.
+     */
+    bool prepare_reconstruction() {
+        auto old = m_active_merge.load();
+        if (old) {
+            return false;
+        }
+
+        // FIXME: this needs cleaned up
+        while (!m_active_merge.compare_exchange_strong(old, true)) {
+            old = m_active_merge.load();
+            if (old) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    bool advance_buffer_head(size_t head) {
+        m_buffer_head = head;
+        return m_buffer->advance_head(m_buffer_head);
+    }
+
+private:
+    Structure *m_structure;
+    Buffer *m_buffer;
+
+    std::mutex m_buffer_lock;
+    std::atomic<bool> m_active_merge;
+
+    /*
+     * The number of currently active jobs
+     * (queries/merges) operating on this
+     * epoch. An epoch can only be retired
+     * when this number is 0.
+     */
+    size_t m_epoch_number;
+    size_t m_buffer_head;
+};
+}
diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h
new file mode 100644
index 0000000..3ed4f49
--- /dev/null
+++ b/include/framework/scheduling/FIFOScheduler.h
@@ -0,0 +1,129 @@
+/*
+ * include/framework/scheduling/FIFOScheduler.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * This scheduler runs just concurrently, using a standard FIFO queue to
+ * determine which jobs to run next. If more jobs are scheduled than there
+ * are available threads, the excess will stall until a thread becomes
+ * available and then run in the order they were received by the scheduler.
+ *
+ * TODO: We need to set up a custom threadpool based on jthreads to support
+ * thread preemption for a later phase of this project. That will allow us
+ * to avoid blocking epoch transitions on long-running queries, or to pause
+ * reconstructions on demand.
+ */
+#pragma once
+
+#include <thread>
+#include <condition_variable>
+#include <chrono>
+#include "framework/scheduling/Task.h"
+#include "framework/scheduling/statistics.h"
+
+#include "ctpl/ctpl.h"
+#include "psu-ds/LockedPriorityQueue.h"
+
+namespace de {
+
+using namespace std::literals::chrono_literals;
+
+
+class FIFOScheduler {
+private:
+    static const size_t DEFAULT_MAX_THREADS = 8;
+
+public:
+    FIFOScheduler(size_t memory_budget, size_t thread_cnt)
+      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
+      , m_thrd_cnt((thread_cnt) ? thread_cnt: DEFAULT_MAX_THREADS)
+      , m_used_memory(0)
+      , m_used_thrds(0)
+      , m_shutdown(false)
+    {
+        m_sched_thrd = std::thread(&FIFOScheduler::run, this);
+        m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this);
+        m_thrd_pool.resize(m_thrd_cnt);
+    }
+
+    ~FIFOScheduler() {
+        if (!m_shutdown.load()) {
+            shutdown();
+        }
+
+        m_sched_thrd.join();
+        m_sched_wakeup_thrd.join();
+    }
+
+    void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
+        std::unique_lock<std::mutex> lk(m_cv_lock);
+        size_t ts = m_counter.fetch_add(1);
+
+        m_stats.job_queued(ts, type, size);
+        m_task_queue.push(Task(size, ts, job, args, type, &m_stats));
+
+        m_cv.notify_all();
+    }
+
+    void shutdown() {
+        m_shutdown.store(true);
+        m_thrd_pool.stop(true);
+        m_cv.notify_all();
+    }
+
+    void print_statistics() {
+        m_stats.print_statistics();
+    }
+
+private:
+    psudb::LockedPriorityQueue<Task> m_task_queue;
+
+    size_t m_memory_budget;
+    size_t m_thrd_cnt;
+
+    std::atomic<bool> m_shutdown; 
+
+    std::atomic<size_t> m_counter;
+    std::mutex m_cv_lock;
+    std::condition_variable m_cv;
+
+    std::thread m_sched_thrd;
+    std::thread m_sched_wakeup_thrd;
+    ctpl::thread_pool m_thrd_pool;
+
+    std::atomic<size_t> m_used_thrds;
+    std::atomic<size_t> m_used_memory;
+
+    SchedulerStatistics m_stats;
+
+    void periodic_wakeup() {
+        do {
+            std::this_thread::sleep_for(10us);
+            m_cv.notify_all();
+        } while (!m_shutdown.load());
+    }
+
+    void schedule_next() {
+        assert(m_task_queue.size() > 0);
+        auto t = m_task_queue.pop();
+        m_stats.job_scheduled(t.m_timestamp);
+
+        m_thrd_pool.push(t);
+    }
+
+    void run() {
+        do {
+            std::unique_lock<std::mutex> cv_lock(m_cv_lock);
+            m_cv.wait(cv_lock);
+
+            while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) {
+                schedule_next();
+            }
+        } while(!m_shutdown.load());
+    }
+
+};
+
+}
diff --git a/include/framework/scheduling/SerialScheduler.h b/include/framework/scheduling/SerialScheduler.h
new file mode 100644
index 0000000..ac59301
--- /dev/null
+++ b/include/framework/scheduling/SerialScheduler.h
@@ -0,0 +1,62 @@
+/*
+ * include/framework/scheduling/SerialScheduler.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * IMPORTANT: This "scheduler" is a shim implementation for allowing
+ * strictly serial, single-threaded operation of the framework. It should 
+ * never be used in multi-threaded contexts. A call to the schedule_job 
+ * function will immediately run the job and block on its completion before
+ * returning.
+ *
+ */
+#pragma once
+
+#include "framework/scheduling/Task.h"
+#include "framework/scheduling/statistics.h"
+
+namespace de {
+
+class SerialScheduler {
+public:
+    SerialScheduler(size_t memory_budget, size_t thread_cnt) 
+      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
+      , m_thrd_cnt((thread_cnt) ? thread_cnt: UINT64_MAX)
+      , m_used_memory(0)
+      , m_used_thrds(0)
+      , m_counter(0)
+    {}
+
+    ~SerialScheduler() = default;
+
+    void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
+        size_t ts = m_counter++;
+        m_stats.job_queued(ts, type, size);
+        m_stats.job_scheduled(ts);
+        auto t = Task(size, ts, job, args, type, &m_stats);
+        t(0);
+    }
+
+    void shutdown() {
+        /* intentionally left blank */
+    }
+
+    void print_statistics() {
+        m_stats.print_statistics();
+    }
+
+private:
+    size_t m_memory_budget;
+    size_t m_thrd_cnt;
+
+    size_t m_used_thrds;
+    size_t m_used_memory;
+
+    size_t m_counter;
+
+    SchedulerStatistics m_stats;
+};
+
+}
diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h
new file mode 100644
index 0000000..d5d4266
--- /dev/null
+++ b/include/framework/scheduling/Task.h
@@ -0,0 +1,89 @@
+/*
+ * include/framework/scheduling/Task.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * An abstraction to represent a job to be scheduled. Currently the
+ * supported task types are queries and merges. Based on the current plan,
+ * simple buffer inserts will likely also be made into a task at some
+ * point.
+ *
+ */
+#pragma once
+
+#include <future>
+#include <functional>
+#include <chrono>
+
+#include "framework/util/Configuration.h"
+#include "framework/scheduling/Epoch.h"
+#include "framework/scheduling/statistics.h"
+
+namespace de {
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+struct ReconstructionArgs {
+    Epoch<R, S, Q, L> *epoch;
+    std::vector<ReconstructionTask> merges;
+    std::promise<bool> result;
+    bool compaction;
+    void *extension;
+};
+
+template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+struct QueryArgs {
+    std::promise<std::vector<R>> result_set;
+    void *query_parms;
+    void *extension;
+};
+
+typedef std::function<void(void*)> Job;
+
+struct Task {
+    Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr) 
+      : m_job(job)
+      , m_size(size)
+      , m_timestamp(ts)
+      , m_args(args)
+      , m_type(type)
+      , m_stats(stats)
+    {}
+
+    Job m_job;
+    size_t m_size;
+    size_t m_timestamp;
+    void *m_args;
+    size_t m_type;
+    SchedulerStatistics *m_stats;
+
+    friend bool operator<(const Task &self, const Task &other) {
+        return self.m_timestamp < other.m_timestamp;
+    }
+
+    friend bool operator>(const Task &self, const Task &other) {
+        return self.m_timestamp > other.m_timestamp;
+    }
+
+    void operator()(size_t thrd_id) {
+        auto start = std::chrono::high_resolution_clock::now();
+        if (m_stats) {
+            m_stats->job_begin(m_timestamp);
+        }
+
+        m_job(m_args);
+
+        if (m_stats) {
+            m_stats->job_complete(m_timestamp);
+        }
+        auto stop = std::chrono::high_resolution_clock::now();
+
+        if (m_stats) {
+            auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+            m_stats->log_time_data(time, m_type);
+        }
+    }
+};
+
+}
diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h
new file mode 100644
index 0000000..6c479cd
--- /dev/null
+++ b/include/framework/scheduling/statistics.h
@@ -0,0 +1,118 @@
+/*
+ * include/framework/scheduling/statistics.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * This is a stub for a statistics tracker to be used in scheduling. It
+ * currently only tracks simple aggregated statistics, but should be
+ * updated in the future for more fine-grained statistics. These will be
+ * used for making scheduling decisions and predicting the runtime of a
+ * given job.
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cassert>
+#include <unordered_map>
+#include <vector>
+#include <mutex>
+#include <chrono>
+#include <atomic>
+
+namespace de {
+
+class SchedulerStatistics {
+private:
+    enum class EventType {
+        QUEUED,
+        SCHEDULED,
+        STARTED, 
+        FINISHED
+    };
+
+    struct Event {
+        size_t id;
+        EventType type;
+    };
+
+    struct JobInfo {
+        size_t id;
+        size_t size;
+        size_t type;
+    };
+
+
+public:
+    SchedulerStatistics() = default;
+    ~SchedulerStatistics() = default;
+
+    void job_queued(size_t id, size_t type, size_t size) {
+        auto time = std::chrono::high_resolution_clock::now();
+    }
+
+    void job_scheduled(size_t id) {
+        std::unique_lock<std::mutex> lk(m_mutex);
+
+    }
+
+    void job_begin(size_t id) {
+
+    }
+
+    void job_complete(size_t id) {
+
+    }
+
+    /* FIXME: This is just a temporary approach */
+    void log_time_data(size_t length, size_t type) {
+        assert(type == 1 || type == 2);
+
+        if (type == 1) {
+            m_type_1_cnt.fetch_add(1);
+            m_type_1_total_time.fetch_add(length);
+            
+            if (length > m_type_1_largest_time) {
+                m_type_1_largest_time.store(length);
+            }
+        } else {
+            m_type_2_cnt.fetch_add(1);
+            m_type_2_total_time.fetch_add(length);
+
+            if (length > m_type_2_largest_time) {
+                m_type_2_largest_time.store(length);
+            }
+        }
+    }
+
+    void print_statistics() {
+        if (m_type_1_cnt > 0) {
+            fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", 
+                    m_type_1_cnt.load(), 
+                    m_type_1_total_time.load() / m_type_1_cnt.load(),
+                    m_type_1_largest_time.load());
+        }
+        if (m_type_2_cnt > 0) {
+            fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n",
+                    m_type_2_cnt.load(),
+                    m_type_2_total_time.load() / m_type_2_cnt.load(),
+                    m_type_2_largest_time.load());
+        }
+    }
+
+private:
+    std::mutex m_mutex;
+    std::unordered_map<size_t, JobInfo> m_jobs;
+    std::vector<Event> m_event_log;
+
+    std::atomic<size_t> m_type_1_cnt;
+    std::atomic<size_t> m_type_1_total_time;
+
+    std::atomic<size_t> m_type_2_cnt;
+    std::atomic<size_t> m_type_2_total_time;
+
+    std::atomic<size_t> m_type_1_largest_time;
+    std::atomic<size_t> m_type_2_largest_time;
+};
+}