From 7c03d771475421c1d5a2bbc135242536af1a371c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 25 Sep 2023 10:49:36 -0400 Subject: Re-structuring Project + scheduling updates This is a big one--probably should have split it apart, but I'm feeling lazy this morning. * Organized the mess of header files in include/framework by splitting them out into their own subdirectories, and renaming a few files to remove redundancies introduced by the directory structure. * Introduced a new framework/ShardRequirements.h header file for simpler shard development. This header simply contains the necessary includes from framework/* for creating shard files. This should help to remove structural dependencies from the framework file structure and shards, as well as centralizing the necessary framework files to make shard development easier. * Created a (currently dummy) SchedulerInterface, and make the scheduler implementation a template parameter of the dynamic extension for easier testing of various scheduling policies. There's still more work to be done to fully integrate the scheduler (queries, multiple buffers), but some more of the necessary framework code for this has been added as well. * Adjusted the Task interface setup for the scheduler. The task structures have been removed from ExtensionStructure and placed in their own header file. Additionally, I started experimenting with using std::variant, as opposed to inheritence, to implement subtype polymorphism on the Merge and Query tasks. The scheduler now has a general task queue that contains both, and std::variant, std::visit, and std::get are used to manipulate them without virtual functions. * Removed Alex.h, as it can't build anyway. There's a branch out there containing the Alex implementation stripped of the C++20 stuff. So there's no need to keep it here. --- include/framework/scheduling/Task.h | 63 +++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 include/framework/scheduling/Task.h (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h new file mode 100644 index 0000000..9e0655a --- /dev/null +++ b/include/framework/scheduling/Task.h @@ -0,0 +1,63 @@ +/* + * + */ +#pragma once + +#include + +#include "framework/util/Configuration.h" + +namespace de { + +enum class TaskType { + MERGE, + QUERY +}; + +struct MergeTask { + level_index m_source_level; + level_index m_target_level; + size_t m_timestamp; + size_t m_size; + TaskType m_type; + + TaskType get_type() const { + return m_type; + } + + friend bool operator<(const MergeTask &self, const MergeTask &other) { + return self.m_timestamp < other.m_timestamp; + } + + friend bool operator>(const MergeTask &self, const MergeTask &other) { + return self.m_timestamp > other.m_timestamp; + } + +}; + +struct QueryTask { + size_t m_timestamp; + size_t m_size; + TaskType m_type; + + TaskType get_type() const { + return m_type; + } + + friend bool operator<(const QueryTask &self, const QueryTask &other) { + return self.m_timestamp < other.m_timestamp; + } + + friend bool operator>(const QueryTask &self, const QueryTask &other) { + return self.m_timestamp > other.m_timestamp; + } +}; + +struct GetTaskType { + TaskType operator()(const MergeTask &t) { return t.get_type(); } + TaskType operator()(const QueryTask &t) { return t.get_type(); } +}; + +typedef std::variant Task; + +} -- cgit v1.2.3 From 1a47cbd7978dcad7ed0b2f2af3f933137eedbfa3 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 20 Oct 2023 15:12:46 -0400 Subject: Checkpointing work I'll probably throw all this out, but I want to stash it just in case. --- include/framework/scheduling/Task.h | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 9e0655a..3c1b158 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "framework/util/Configuration.h" @@ -14,17 +15,52 @@ enum class TaskType { QUERY }; +struct TaskDependency { + std::promise prom; + std::future fut; +}; + struct MergeTask { level_index m_source_level; level_index m_target_level; size_t m_timestamp; size_t m_size; TaskType m_type; + std::unique_ptr m_dep; + + MergeTask() = default; + + MergeTask(level_index source, level_index target, size_t size, size_t timestamp) + : m_source_level(source) + , m_target_level(target) + , m_timestamp(timestamp) + , m_size(size) + , m_type(TaskType::MERGE) + , m_dep(std::make_unique()){} + + + MergeTask(MergeTask &t) + : m_source_level(t.m_source_level) + , m_target_level(t.m_target_level) + , m_timestamp(t.m_timestamp) + , m_size(t.m_size) + , m_type(TaskType::MERGE) + , m_dep(std::move(t.m_dep)) + {} + TaskType get_type() const { return m_type; } + void make_dependent_on(MergeTask &task) { + m_dep->fut = task.m_dep->prom.get_future(); + } + + void make_dependent_on(TaskDependency *dep) { + m_dep->fut = dep->prom.get_future(); + } + friend bool operator<(const MergeTask &self, const MergeTask &other) { return self.m_timestamp < other.m_timestamp; } @@ -39,11 +75,27 @@ struct QueryTask { size_t m_timestamp; size_t m_size; TaskType m_type; + std::unique_ptr m_dep; + + QueryTask(QueryTask &t) + : m_timestamp(t.m_timestamp) + , m_size(t.m_size) + , m_type(t.m_type) + , m_dep(std::move(t.m_dep)) + {} TaskType get_type() const { return m_type; } + void SetDependency(QueryTask &task) { + m_dep->fut = task.m_dep->prom.get_future(); + } + + void SetDependency(TaskDependency *dep) { + m_dep->fut = dep->prom.get_future(); + } + friend bool operator<(const QueryTask &self, const QueryTask &other) { return self.m_timestamp < other.m_timestamp; } -- cgit v1.2.3 From 7ecfb22c32b7986ed1a2439c1abbeed298e4153a Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 20 Oct 2023 17:00:42 -0400 Subject: Initial pass w/ new scheduler setup currently there's a race condition of some type to sort out. --- include/framework/scheduling/Task.h | 112 +++++++++--------------------------- 1 file changed, 27 insertions(+), 85 deletions(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 3c1b158..518159d 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -5,111 +5,53 @@ #include #include +#include #include "framework/util/Configuration.h" namespace de { -enum class TaskType { - MERGE, - QUERY +struct MergeArgs { + void *version; + void *buffer; + std::vector merges; + std::promise result; }; -struct TaskDependency { - std::promise prom; - std::future fut; +template +struct QueryArgs { + void *version; + void *buffer; + std::promise> result_set; + void *query_parms; }; -struct MergeTask { - level_index m_source_level; - level_index m_target_level; - size_t m_timestamp; - size_t m_size; - TaskType m_type; - std::unique_ptr m_dep; - - MergeTask() = default; - - MergeTask(level_index source, level_index target, size_t size, size_t timestamp) - : m_source_level(source) - , m_target_level(target) - , m_timestamp(timestamp) - , m_size(size) - , m_type(TaskType::MERGE) - , m_dep(std::make_unique()){} - +typedef std::function Job; - MergeTask(MergeTask &t) - : m_source_level(t.m_source_level) - , m_target_level(t.m_target_level) - , m_timestamp(t.m_timestamp) - , m_size(t.m_size) - , m_type(TaskType::MERGE) - , m_dep(std::move(t.m_dep)) +struct Task { + Task(size_t size, size_t ts, Job job, void *args) + : m_job(job) + , m_size(size) + , m_timestamp(ts) + , m_args(args) {} - - TaskType get_type() const { - return m_type; - } - - void make_dependent_on(MergeTask &task) { - m_dep->fut = task.m_dep->prom.get_future(); - } - - void make_dependent_on(TaskDependency *dep) { - m_dep->fut = dep->prom.get_future(); - } - - friend bool operator<(const MergeTask &self, const MergeTask &other) { - return self.m_timestamp < other.m_timestamp; - } - - friend bool operator>(const MergeTask &self, const MergeTask &other) { - return self.m_timestamp > other.m_timestamp; - } - -}; - -struct QueryTask { - size_t m_timestamp; + Job m_job; size_t m_size; - TaskType m_type; - std::unique_ptr m_dep; - - QueryTask(QueryTask &t) - : m_timestamp(t.m_timestamp) - , m_size(t.m_size) - , m_type(t.m_type) - , m_dep(std::move(t.m_dep)) - {} - - TaskType get_type() const { - return m_type; - } - - void SetDependency(QueryTask &task) { - m_dep->fut = task.m_dep->prom.get_future(); - } - - void SetDependency(TaskDependency *dep) { - m_dep->fut = dep->prom.get_future(); - } + size_t m_timestamp; + void *m_args; - friend bool operator<(const QueryTask &self, const QueryTask &other) { + friend bool operator<(const Task &self, const Task &other) { return self.m_timestamp < other.m_timestamp; } - friend bool operator>(const QueryTask &self, const QueryTask &other) { + friend bool operator>(const Task &self, const Task &other) { return self.m_timestamp > other.m_timestamp; } -}; -struct GetTaskType { - TaskType operator()(const MergeTask &t) { return t.get_type(); } - TaskType operator()(const QueryTask &t) { return t.get_type(); } + void operator()() { + m_job(m_args); + } }; -typedef std::variant Task; - } -- cgit v1.2.3 From 3afacb7702e6d8fa67749a2a41dc776d315e02a9 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 23 Oct 2023 17:43:22 -0400 Subject: Began moving to an explicit epoch-based system I started moving over to an explicit Epoch based system, which has necessitated a ton of changes throughout the code base. This will ultimately allow for a much cleaner set of abstractions for managing concurrency. --- include/framework/scheduling/Task.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 518159d..94c4d0a 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -8,20 +8,20 @@ #include #include "framework/util/Configuration.h" +#include "framework/scheduling/Epoch.h" namespace de { +template struct MergeArgs { - void *version; - void *buffer; + Epoch *epoch; std::vector merges; std::promise result; }; -template +template struct QueryArgs { - void *version; - void *buffer; + Epoch *epoch; std::promise> result_set; void *query_parms; }; -- cgit v1.2.3 From 39ae3e0441d8297a09197aba98bd494b5ada12c1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 30 Oct 2023 14:17:59 -0400 Subject: Concurrency updates + fixes for compile errors --- include/framework/scheduling/Task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 94c4d0a..d25c7c0 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -17,6 +17,7 @@ struct MergeArgs { Epoch *epoch; std::vector merges; std::promise result; + void *extension; }; template -- cgit v1.2.3 From d2279e1b96d352a0af1d425dcaaf93e8a26a8d52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 30 Oct 2023 17:15:05 -0400 Subject: General Comment + Consistency updates --- include/framework/scheduling/Task.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index d25c7c0..228665f 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -1,9 +1,13 @@ /* + * include/framework/scheduling/Task.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. * */ #pragma once -#include #include #include -- cgit v1.2.3 From 4e4cf858122ca6c1ae6d5f635e839089769fee38 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 6 Nov 2023 10:01:23 -0500 Subject: Scheduling: Switched over to a thread pool model --- include/framework/scheduling/Task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 228665f..6dfd7df 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -54,7 +54,7 @@ struct Task { return self.m_timestamp > other.m_timestamp; } - void operator()() { + void operator()(size_t thrd_id) { m_job(m_args); } }; -- cgit v1.2.3 From 357cab549c2ed33970562b84ff6f83923742343d Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 7 Nov 2023 15:34:24 -0500 Subject: Comment and License updates --- include/framework/scheduling/Task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 6dfd7df..d211fb5 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -3,7 +3,7 @@ * * Copyright (C) 2023 Douglas B. Rumbaugh * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #pragma once -- cgit v1.2.3 From 90bb0614fc1d8f1a185a778e31aaf9027c01aeb8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 13 Nov 2023 11:44:09 -0500 Subject: Tombstone Compaction: re-enabled tombstone compaction Currently, proactive buffer tombstone compaction is disabled by forcing the buffer tombstone capacity to match its record capacity. It isn't clear how to best handle proactive buffer compactions in an environment where new buffers are spawned anyway. --- include/framework/scheduling/Task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index d211fb5..c10ed8b 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -21,6 +21,7 @@ struct MergeArgs { Epoch *epoch; std::vector merges; std::promise result; + bool compaction; void *extension; }; -- cgit v1.2.3 From 3c127eda69295cb306739bdd3c5ddccff6026a8d Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 13 Dec 2023 12:39:54 -0500 Subject: Refactoring: corrected a number of names and added more comments --- include/framework/scheduling/Task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index c10ed8b..16f5e58 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -17,9 +17,9 @@ namespace de { template -struct MergeArgs { +struct ReconstructionArgs { Epoch *epoch; - std::vector merges; + std::vector merges; std::promise result; bool compaction; void *extension; -- cgit v1.2.3 From 38693c342558628c75e0ab0d23c32a95a499ed8b Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Jan 2024 15:58:04 -0500 Subject: Initial rough-out of internal statistics tracker Need to figure out the best way to do the detailed tracking in a concurrent manner. I was thinking just an event log, with parsing routines for extracting statistics. But that'll be pretty slow. --- include/framework/scheduling/Task.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 16f5e58..b14b229 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -10,9 +10,11 @@ #include #include +#include #include "framework/util/Configuration.h" #include "framework/scheduling/Epoch.h" +#include "framework/scheduling/statistics.h" namespace de { @@ -35,17 +37,21 @@ struct QueryArgs { typedef std::function Job; struct Task { - Task(size_t size, size_t ts, Job job, void *args) + Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr) : m_job(job) , m_size(size) , m_timestamp(ts) , m_args(args) + , m_type(type) + , m_stats(stats) {} Job m_job; size_t m_size; size_t m_timestamp; void *m_args; + size_t m_type; + SchedulerStatistics *m_stats; friend bool operator<(const Task &self, const Task &other) { return self.m_timestamp < other.m_timestamp; @@ -56,7 +62,22 @@ struct Task { } void operator()(size_t thrd_id) { + auto start = std::chrono::high_resolution_clock::now(); + if (m_stats) { + m_stats->job_begin(m_timestamp); + } + m_job(m_args); + + if (m_stats) { + m_stats->job_complete(m_timestamp); + } + auto stop = std::chrono::high_resolution_clock::now(); + + if (m_stats) { + auto time = std::chrono::duration_cast(stop - start).count(); + m_stats->log_time_data(time, m_type); + } } }; -- cgit v1.2.3 From f3b7428cfa7f9364c5a8bc85107db3a7cccd53bc Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 18:41:17 -0500 Subject: Adjusted epoch transition methodology --- include/framework/scheduling/Task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index b14b229..6f6b913 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -29,9 +29,9 @@ struct ReconstructionArgs { template struct QueryArgs { - Epoch *epoch; std::promise> result_set; void *query_parms; + void *extension; }; typedef std::function Job; -- cgit v1.2.3 From 10b4425e842d10b7fbfa85978969ed4591d6b98e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 7 Feb 2024 10:56:52 -0500 Subject: Fully implemented Query concept and adjusted queries to use it --- include/framework/scheduling/Task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 6f6b913..ba0001d 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -18,7 +18,7 @@ namespace de { -template +template Q, LayoutPolicy L> struct ReconstructionArgs { Epoch *epoch; std::vector merges; @@ -27,7 +27,7 @@ struct ReconstructionArgs { void *extension; }; -template +template Q, LayoutPolicy L> struct QueryArgs { std::promise> result_set; void *query_parms; -- cgit v1.2.3 From 2c5d549b3618b9ea72e6eece4cb4f3da5a6811a8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 7 Feb 2024 13:42:34 -0500 Subject: Fully realized shard concept interface --- include/framework/scheduling/Task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index ba0001d..008f232 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -18,7 +18,7 @@ namespace de { -template Q, LayoutPolicy L> +template S, QueryInterface Q, LayoutPolicy L> struct ReconstructionArgs { Epoch *epoch; std::vector merges; @@ -27,7 +27,7 @@ struct ReconstructionArgs { void *extension; }; -template Q, LayoutPolicy L> +template S, QueryInterface Q, LayoutPolicy L> struct QueryArgs { std::promise> result_set; void *query_parms; -- cgit v1.2.3 From 402fc269c0aaa671d84a6d15918735ad4b90e6b2 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 9 Feb 2024 12:30:21 -0500 Subject: Comment updates/fixes --- include/framework/scheduling/Task.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/framework/scheduling/Task.h') diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 008f232..d5d4266 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -5,6 +5,11 @@ * * Distributed under the Modified BSD License. * + * An abstraction to represent a job to be scheduled. Currently the + * supported task types are queries and merges. Based on the current plan, + * simple buffer inserts will likely also be made into a task at some + * point. + * */ #pragma once -- cgit v1.2.3