summaryrefslogtreecommitdiffstats
path: root/include/framework
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-02-09 12:30:21 -0500
committerDouglas Rumbaugh <dbr4@psu.edu>2024-02-09 12:30:21 -0500
commit402fc269c0aaa671d84a6d15918735ad4b90e6b2 (patch)
tree145b1658f31a005eda33c9231b2b8ee7bab7915f /include/framework
parent711769574e647839677739192698e400529efe75 (diff)
downloaddynamic-extension-402fc269c0aaa671d84a6d15918735ad4b90e6b2.tar.gz
Comment updates/fixes
Diffstat (limited to 'include/framework')
-rw-r--r--include/framework/DynamicExtension.h73
-rw-r--r--include/framework/interface/Query.h5
-rw-r--r--include/framework/interface/Record.h7
-rw-r--r--include/framework/scheduling/Epoch.h7
-rw-r--r--include/framework/scheduling/FIFOScheduler.h9
-rw-r--r--include/framework/scheduling/Task.h5
-rw-r--r--include/framework/scheduling/statistics.h5
-rw-r--r--include/framework/structure/BufferView.h1
-rw-r--r--include/framework/structure/ExtensionStructure.h64
-rw-r--r--include/framework/structure/InternalLevel.h6
-rw-r--r--include/framework/util/Configuration.h37
11 files changed, 152 insertions, 67 deletions
diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h
index e7dd774..473592d 100644
--- a/include/framework/DynamicExtension.h
+++ b/include/framework/DynamicExtension.h
@@ -13,7 +13,6 @@
#include <cstdio>
#include <vector>
#include <set>
-#include <shared_mutex>
#include <mutex>
#include "framework/interface/Scheduler.h"
@@ -87,10 +86,34 @@ public:
}
}
+ /*
+ * Insert the record `rec` into the index. If the buffer is full and
+ * the framework is blocking on an epoch transition, this call may fail
+ * and return 0. In this case, retry the call again later. If
+ * successful, 1 will be returned. The record will be immediately
+ * visible in the buffer upon the successful return of this function.
+ */
int insert(const R &rec) {
return internal_append(rec, false);
}
+ /*
+ * Erase the record `rec` from the index. It is assumed that `rec`
+ * currently exists--no special checks are made for correctness here.
+ * The behavior if this function will differ depending on if tombstone
+ * or tagged deletes are used.
+ *
+ * Tombstone deletes - inserts a tombstone record for `rec`. This *may*
+ * return 0 and fail if the buffer is full and the framework is
+ * blocking on an epoch transition. In this case, repeat the call
+ * later. 1 will be returned when the tombstone is successfully
+ * inserted.
+ *
+ * Tagging deletes - Does a point lookup for the record across the
+ * entire structure, and sets its delete bit when found. Returns 1 if
+ * the record is found and marked, and 0 if it was not (i.e., if it
+ * isn't present in the index).
+ */
int erase(const R &rec) {
// FIXME: delete tagging will require a lot of extra work to get
// operating "correctly" in a concurrent environment.
@@ -121,10 +144,23 @@ public:
return internal_append(rec, true);
}
+ /*
+ * Execute the query with parameters `parms` and return a future. This
+ * future can be used to access a vector containing the results of the
+ * query.
+ *
+ * The behavior of this function is undefined if `parms` is not a
+ * pointer to a valid query parameter object for the query type used as
+ * a template parameter to construct the framework.
+ */
std::future<std::vector<R>> query(void *parms) {
return schedule_query(parms);
}
+ /*
+ * Returns the number of records (included tagged records and
+ * tombstones) currently within the framework.
+ */
size_t get_record_count() {
auto epoch = get_active_epoch();
auto t = epoch->get_buffer().get_record_count() + epoch->get_structure()->get_record_count();
@@ -133,6 +169,11 @@ public:
return t;
}
+ /*
+ * Returns the number of tombstone records currently within the
+ * framework. This function can be called when tagged deletes are used,
+ * but will always return 0 in that case.
+ */
size_t get_tombstone_count() {
auto epoch = get_active_epoch();
auto t = epoch->get_buffer().get_tombstone_count() + epoch->get_structure()->get_tombstone_count();
@@ -141,6 +182,12 @@ public:
return t;
}
+ /*
+ * Get the number of levels within the framework. This count will
+ * include any empty levels, but will not include the buffer. Note that
+ * this is *not* the same as the number of shards when tiering is used,
+ * as each level can contain multiple shards in that case.
+ */
size_t get_height() {
auto epoch = get_active_epoch();
auto t = epoch->get_structure()->get_height();
@@ -149,6 +196,13 @@ public:
return t;
}
+ /*
+ * Get the number of bytes of memory allocated across the framework for
+ * storing records and associated index information (i.e., internal
+ * ISAM tree nodes). This includes memory that is allocated but
+ * currently unused in the buffer, or in shards themselves
+ * (overallocation due to delete cancellation, etc.).
+ */
size_t get_memory_usage() {
auto epoch = get_active_epoch();
auto t= epoch->get_buffer().get_memory_usage() + epoch->get_structure()->get_memory_usage();
@@ -157,6 +211,11 @@ public:
return t;
}
+ /*
+ * Get the number of bytes of memory allocated across the framework for
+ * auxiliary structures. This can include bloom filters, aux
+ * hashtables, etc.
+ */
size_t get_aux_memory_usage() {
auto epoch = get_active_epoch();
auto t = epoch->get_buffer().get_aux_memory_usage() + epoch->get_structure()->get_aux_memory_usage();
@@ -165,10 +224,22 @@ public:
return t;
}
+ /*
+ * Returns the maximum physical capacity of the buffer, measured in
+ * records.
+ */
size_t get_buffer_capacity() {
return m_buffer->get_capacity();
}
+ /*
+ * Create a new single Shard object containing all of the records
+ * within the framework (buffer and shards). The optional parameter can
+ * be used to specify whether the Shard should be constructed with the
+ * currently active state of the framework (false), or if shard
+ * construction should wait until any ongoing reconstructions have
+ * finished and use that new version (true).
+ */
Shard *create_static_structure(bool await_reconstruction_completion=false) {
if (await_reconstruction_completion) {
await_next_epoch();
diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h
index 8cf9660..3d487f0 100644
--- a/include/framework/interface/Query.h
+++ b/include/framework/interface/Query.h
@@ -9,12 +9,9 @@
#pragma once
#include "framework/QueryRequirements.h"
-#include <concepts>
namespace de{
-// FIXME: The interface is not completely specified yet, as it is pending
-// determining a good way to handle additional template arguments
-// to get the Shard and Record types into play
+
template <typename Q, typename R, typename S>
concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv) {
{Q::get_query_state(sh, p)} -> std::convertible_to<void*>;
diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h
index 29df4b6..5b9f307 100644
--- a/include/framework/interface/Record.h
+++ b/include/framework/interface/Record.h
@@ -138,7 +138,7 @@ struct CosinePoint{
return true;
}
- // lexicographic order
+ /* lexicographic order */
inline bool operator<(const CosinePoint& other) const {
for (size_t i=0; i<D; i++) {
if (data[i] < other.data[i]) {
@@ -182,7 +182,7 @@ struct EuclidPoint{
return true;
}
- // lexicographic order
+ /* lexicographic order */
inline bool operator<(const EuclidPoint& other) const {
for (size_t i=0; i<D; i++) {
if (data[i] < other.data[i]) {
@@ -228,7 +228,4 @@ public:
private:
R *P;
};
-
-
-
}
diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h
index e58bd11..3ffa145 100644
--- a/include/framework/scheduling/Epoch.h
+++ b/include/framework/scheduling/Epoch.h
@@ -44,13 +44,6 @@ public:
}
~Epoch() {
- /* FIXME: this is needed to keep the destructor from sometimes locking
- * up here. But there *shouldn't* be any threads waiting on this signal
- * at object destruction, so something else is going on here that needs
- * looked into
- */
- // m_active_cv.notify_all();
-
if (m_structure) {
m_structure->release_reference();
}
diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h
index c6baf9b..3ed4f49 100644
--- a/include/framework/scheduling/FIFOScheduler.h
+++ b/include/framework/scheduling/FIFOScheduler.h
@@ -5,6 +5,15 @@
*
* Distributed under the Modified BSD License.
*
+ * This scheduler runs just concurrently, using a standard FIFO queue to
+ * determine which jobs to run next. If more jobs are scheduled than there
+ * are available threads, the excess will stall until a thread becomes
+ * available and then run in the order they were received by the scheduler.
+ *
+ * TODO: We need to set up a custom threadpool based on jthreads to support
+ * thread preemption for a later phase of this project. That will allow us
+ * to avoid blocking epoch transitions on long-running queries, or to pause
+ * reconstructions on demand.
*/
#pragma once
diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h
index 008f232..d5d4266 100644
--- a/include/framework/scheduling/Task.h
+++ b/include/framework/scheduling/Task.h
@@ -5,6 +5,11 @@
*
* Distributed under the Modified BSD License.
*
+ * An abstraction to represent a job to be scheduled. Currently the
+ * supported task types are queries and merges. Based on the current plan,
+ * simple buffer inserts will likely also be made into a task at some
+ * point.
+ *
*/
#pragma once
diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h
index 50ba196..6c479cd 100644
--- a/include/framework/scheduling/statistics.h
+++ b/include/framework/scheduling/statistics.h
@@ -5,6 +5,11 @@
*
* Distributed under the Modified BSD License.
*
+ * This is a stub for a statistics tracker to be used in scheduling. It
+ * currently only tracks simple aggregated statistics, but should be
+ * updated in the future for more fine-grained statistics. These will be
+ * used for making scheduling decisions and predicting the runtime of a
+ * given job.
*/
#pragma once
diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h
index 4e3de25..9e0872b 100644
--- a/include/framework/structure/BufferView.h
+++ b/include/framework/structure/BufferView.h
@@ -5,6 +5,7 @@
*
* Distributed under the Modified BSD License.
*
+ * TODO: This file is very poorly commented.
*/
#pragma once
diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h
index 373a1e2..4802bc1 100644
--- a/include/framework/structure/ExtensionStructure.h
+++ b/include/framework/structure/ExtensionStructure.h
@@ -37,19 +37,23 @@ public:
~ExtensionStructure() = default;
/*
- * Create a shallow copy of this extension structure. The copy will share references to the
- * same levels/shards as the original, but will have its own lists. As all of the shards are
- * immutable (with the exception of deletes), the copy can be restructured with reconstructions
- * and flushes without affecting the original. The copied structure will be returned with a reference
- * count of 0; generally you will want to immediately call take_reference() on it.
+ * Create a shallow copy of this extension structure. The copy will share
+ * references to the same levels/shards as the original, but will have its
+ * own lists. As all of the shards are immutable (with the exception of
+ * deletes), the copy can be restructured with reconstructions and flushes
+ * without affecting the original. The copied structure will be returned
+ * with a reference count of 0; generally you will want to immediately call
+ * take_reference() on it.
*
- * NOTE: When using tagged deletes, a delete of a record in the original structure will affect
- * the copy, so long as the copy retains a reference to the same shard as the original. This could
- * cause synchronization problems under tagging with concurrency. Any deletes in this context will
+ * NOTE: When using tagged deletes, a delete of a record in the original
+ * structure will affect the copy, so long as the copy retains a reference
+ * to the same shard as the original. This could cause synchronization
+ * problems under tagging with concurrency. Any deletes in this context will
* need to be forwarded to the appropriate structures manually.
*/
ExtensionStructure<R, S, Q, L> *copy() {
- auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, m_max_delete_prop);
+ auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor,
+ m_max_delete_prop);
for (size_t i=0; i<m_levels.size(); i++) {
new_struct->m_levels.push_back(m_levels[i]->clone());
}
@@ -64,9 +68,9 @@ public:
* setting the delete bit in its wrapped header. Returns 1 if a matching
* record was found and deleted, and 0 if a matching record was not found.
*
- * This function will stop after finding the first matching record. It is assumed
- * that no duplicate records exist. In the case of duplicates, this function will
- * still "work", but in the sense of "delete first match".
+ * This function will stop after finding the first matching record. It is
+ * assumed that no duplicate records exist. In the case of duplicates, this
+ * function will still "work", but in the sense of "delete first match".
*/
int tagged_delete(const R &rec) {
for (auto level : m_levels) {
@@ -77,7 +81,7 @@ public:
/*
* If the record to be erased wasn't found, return 0. The
- * DynamicExtension itself will then search the active
+ * DynamicExtension itself will then search the active
* Buffers.
*/
return 0;
@@ -164,21 +168,23 @@ public:
}
/*
- * Validate that no level in the structure exceeds its maximum tombstone capacity. This is
- * used to trigger preemptive compactions at the end of the reconstruction process.
+ * Validate that no level in the structure exceeds its maximum tombstone
+ * capacity. This is used to trigger preemptive compactions at the end of
+ * the reconstruction process.
*/
bool validate_tombstone_proportion() {
- long double ts_prop;
- for (size_t i=0; i<m_levels.size(); i++) {
- if (m_levels[i]) {
- ts_prop = (long double) m_levels[i]->get_tombstone_count() / (long double) calc_level_record_capacity(i);
- if (ts_prop > (long double) m_max_delete_prop) {
- return false;
- }
- }
+ long double ts_prop;
+ for (size_t i = 0; i < m_levels.size(); i++) {
+ if (m_levels[i]) {
+ ts_prop = (long double)m_levels[i]->get_tombstone_count() /
+ (long double)calc_level_record_capacity(i);
+ if (ts_prop > (long double)m_max_delete_prop) {
+ return false;
+ }
}
+ }
- return true;
+ return true;
}
bool validate_tombstone_proportion(level_index level) {
@@ -224,14 +230,14 @@ public:
/*
* The amount of storage required for the reconstruction accounts
* for the cost of storing the new records, along with the
- * cost of retaining the old records during the process
- * (hence the 2x multiplier).
+ * cost of retaining the old records during the process
+ * (hence the 2x multiplier).
*
- * FIXME: currently does not account for the *actual* size
- * of the shards, only the storage for the records
+ * FIXME: currently does not account for the *actual* size
+ * of the shards, only the storage for the records
* themselves.
*/
- size_t reccnt = m_levels[i-1]->get_record_count();
+ size_t reccnt = m_levels[i - 1]->get_record_count();
if constexpr (L == LayoutPolicy::LEVELING) {
if (can_reconstruct_with(i, reccnt)) {
reccnt += m_levels[i]->get_record_count();
diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h
index d586869..db38946 100644
--- a/include/framework/structure/InternalLevel.h
+++ b/include/framework/structure/InternalLevel.h
@@ -6,6 +6,12 @@
*
* Distributed under the Modified BSD License.
*
+ * The word `Internal` in this class's name refers to memory. The current
+ * model, inherited from the framework in Practical Dynamic Extension for
+ * Sampling Indexes, would use a different ExternalLevel for shards stored
+ * on external storage. This is a distinction that can probably be avoided
+ * with some more thought being put into interface design.
+ *
*/
#pragma once
diff --git a/include/framework/util/Configuration.h b/include/framework/util/Configuration.h
index 8e3d20f..65ca181 100644
--- a/include/framework/util/Configuration.h
+++ b/include/framework/util/Configuration.h
@@ -8,34 +8,29 @@
*/
#pragma once
-#include <atomic>
-#include <numeric>
-#include <cstdio>
-#include <vector>
-
-#include "psu-util/timer.h"
-#include "psu-ds/Alias.h"
+#include <cstdlib>
+#include <utility>
namespace de {
-thread_local size_t sampling_attempts = 0;
-thread_local size_t sampling_rejections = 0;
-thread_local size_t deletion_rejections = 0;
-thread_local size_t bounds_rejections = 0;
-thread_local size_t tombstone_rejections = 0;
-thread_local size_t buffer_rejections = 0;
+static thread_local size_t sampling_attempts = 0;
+static thread_local size_t sampling_rejections = 0;
+static thread_local size_t deletion_rejections = 0;
+static thread_local size_t bounds_rejections = 0;
+static thread_local size_t tombstone_rejections = 0;
+static thread_local size_t buffer_rejections = 0;
/*
* thread_local size_t various_sampling_times go here.
*/
-thread_local size_t sample_range_time = 0;
-thread_local size_t alias_time = 0;
-thread_local size_t alias_query_time = 0;
-thread_local size_t rejection_check_time = 0;
-thread_local size_t buffer_sample_time = 0;
-thread_local size_t memlevel_sample_time = 0;
-thread_local size_t disklevel_sample_time = 0;
-thread_local size_t sampling_bailouts = 0;
+static thread_local size_t sample_range_time = 0;
+static thread_local size_t alias_time = 0;
+static thread_local size_t alias_query_time = 0;
+static thread_local size_t rejection_check_time = 0;
+static thread_local size_t buffer_sample_time = 0;
+static thread_local size_t memlevel_sample_time = 0;
+static thread_local size_t disklevel_sample_time = 0;
+static thread_local size_t sampling_bailouts = 0;
enum class LayoutPolicy {