summaryrefslogtreecommitdiffstats
path: root/include/framework/interface
diff options
context:
space:
mode:
Diffstat (limited to 'include/framework/interface')
-rw-r--r--include/framework/interface/Query.h136
-rw-r--r--include/framework/interface/Record.h335
-rw-r--r--include/framework/interface/Scheduler.h15
-rw-r--r--include/framework/interface/Shard.h66
4 files changed, 332 insertions, 220 deletions
diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h
index 577d6cd..1b64646 100644
--- a/include/framework/interface/Query.h
+++ b/include/framework/interface/Query.h
@@ -1,7 +1,7 @@
/*
* include/framework/interface/Query.h
*
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
*
* Distributed under the Modified BSD License.
*
@@ -10,23 +10,127 @@
#include "framework/QueryRequirements.h"
-namespace de{
+namespace de {
-template <typename Q, typename R, typename S>
-concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv, std::vector<R> &resv) {
- {Q::get_query_state(sh, p)} -> std::convertible_to<void*>;
- {Q::get_buffer_query_state(bv, p)} -> std::convertible_to<void *>;
- {Q::process_query_states(p, s, p)};
- {Q::query(sh, p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>;
- {Q::buffer_query(p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>;
- {Q::merge(rv, p, resv)};
+/*
+ * FIXME: It would probably be best to absorb the std::vector into
+ * this type too; this would allow user-defined collections for
+ * intermediate results, which could allow for more merging
+ * optimizations. However, this would require an alternative
+ * approach to doing delete checks, so we'll leave it for now.
+ */
+template <typename R>
+concept LocalResultInterface = requires(R res) {
+ { res.is_deleted() } -> std::convertible_to<bool>;
+ { res.is_tombstone() } -> std::convertible_to<bool>;
+};
+
+/*
+ *
+ *
+ */
+template <typename QUERY, typename SHARD,
+ typename RESULT = typename QUERY::ResultType,
+ typename LOCAL_RESULT = typename QUERY::LocalResultType,
+ typename PARAMETERS = typename QUERY::Parameters,
+ typename LOCAL = typename QUERY::LocalQuery,
+ typename LOCAL_BUFFER = typename QUERY::LocalQueryBuffer>
+concept QueryInterface = LocalResultInterface<LOCAL_RESULT> &&
+ requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query,
+ SHARD *shard, std::vector<LOCAL *> &local_queries,
+ std::vector<std::vector<LOCAL_RESULT>> &local_results,
+ std::vector<RESULT> &result,
+ BufferView<typename SHARD::RECORD> *bv) {
+
+ /*
+ * Given a set of query parameters and a shard, return a local query
+ * object for that shard.
+ */
+ { QUERY::local_preproc(shard, parameters) } -> std::convertible_to<LOCAL *>;
+
+ /*
+ * Given a set of query parameters and a buffer view, return a local
+ * query object for the buffer.
+ * NOTE: for interface reasons, the pointer to the buffer view MUST be
+ * stored inside of the local query object. The future buffer
+ * query routine will access the buffer by way of this pointer.
+ */
+ {
+ QUERY::local_preproc_buffer(bv, parameters)
+ } -> std::convertible_to<LOCAL_BUFFER *>;
+
+ /*
+ * Given a full set of local queries, and the buffer query, make any
+ * necessary adjustments to the local queries in-place, to account for
+ * global information. If no additional processing is required, this
+ * function can be left empty.
+ */
+ {QUERY::distribute_query(parameters, local_queries, buffer_query)};
+
+ /*
+ * Answer the local query, defined by `local` against `shard` and return
+ * a vector of LOCAL_RESULT objects defining the query result.
+ */
+ {
+ QUERY::local_query(shard, local)
+ } -> std::convertible_to<std::vector<LOCAL_RESULT>>;
+
+ /*
+ * Answer the local query defined by `local` against the buffer (which
+ * should be accessed by a pointer inside of `local`) and return a vector
+ * of LOCAL_RESULT objects defining the query result.
+ */
+ {
+ QUERY::local_query_buffer(buffer_query)
+ } -> std::convertible_to<std::vector<LOCAL_RESULT>>;
+
+ /*
+ * Process the local results from the buffer and all of the shards,
+ * stored in `local_results`, and insert the associated ResultType
+ * objects into the `result` vector, which represents the final result
+ * of the query. Updates to this vector are done in-place.
+ */
+ {QUERY::combine(local_results, parameters, result)};
- {Q::delete_query_state(p)} -> std::same_as<void>;
- {Q::delete_buffer_query_state(p)} -> std::same_as<void>;
+ /*
+ * Process the post-combine `result` vector of ResultType objects,
+ * in the context of the global and local query parameters, to determine
+ * if the query should be repeated. If so, make any necessary adjustments
+ * to the local query objects and return True. Otherwise, return False.
+ *
+ * If no repetition is needed for a given problem type, simply return
+ * False immediately and the query will end.
+ */
+ {
+ QUERY::repeat(parameters, result, local_queries, buffer_query)
+ } -> std::same_as<bool>;
- {Q::repeat(p, resv, s, p)} -> std::same_as<bool>;
+ /*
+ * If this flag is True, then the query will immediately stop and return
+ * a result as soon as the first non-deleted LocalRecordType is found.
+ * Otherwise, every Shard and the buffer will be queried and the results
+ * merged, like normal.
+ *
+ * This is largely an optimization flag for use with point-lookup, or
+ * other single-record result queries
+ */
+ { QUERY::EARLY_ABORT } -> std::convertible_to<bool>;
- {Q::EARLY_ABORT} -> std::convertible_to<bool>;
- {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>;
+ /*
+ * If false, the built-in delete filtering that the framework can
+ * apply to the local results, prior to calling combine, will be skipped.
+ * This general filtering can be inefficient, particularly for tombstone
+ * -based deletes, and so if a more efficient manual filtering can be
+ * performed, it is worth setting this to True and doing that filtering
+ * in the combine step.
+ *
+ * If deletes are not a consideration for your problem, it's also best
+ * to turn this off, as it'll avoid the framework making an extra pass
+ * over the local results prior to combining them.
+ *
+ * TODO: Temporarily disabling this, as we've dropped framework-level
+ * delete filtering for the time being.
+ */
+ /* { QUERY::SKIP_DELETE_FILTER } -> std::convertible_to<bool>; */
};
-}
+} // namespace de
diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h
index 19ccadd..d3e77d8 100644
--- a/include/framework/interface/Record.h
+++ b/include/framework/interface/Record.h
@@ -1,272 +1,247 @@
/*
* include/framework/interface/Record.h
*
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu>
*
* Distributed under the Modified BSD License.
*
- * FIXME: the record implementations could probably be broken out into
+ * FIXME: the record implementations could probably be broken out into
* different files, leaving only the interface here
*/
#pragma once
-#include <cstring>
-#include <concepts>
#include <cmath>
+#include <concepts>
+#include <cstring>
#include "psu-util/hash.h"
namespace de {
-template<typename R>
+template <typename R>
concept RecordInterface = requires(R r, R s) {
- { r < s } ->std::convertible_to<bool>;
- { r == s } ->std::convertible_to<bool>;
+ { r < s } -> std::convertible_to<bool>;
+ { r == s } -> std::convertible_to<bool>;
};
-template<typename R>
+template <typename R>
concept WeightedRecordInterface = requires(R r) {
- {r.weight} -> std::convertible_to<double>;
+ { r.weight } -> std::convertible_to<double>;
};
-template<typename R>
+template <typename R>
concept NDRecordInterface = RecordInterface<R> && requires(R r, R s) {
- {r.calc_distance(s)} -> std::convertible_to<double>;
+ { r.calc_distance(s) } -> std::convertible_to<double>;
};
template <typename R>
concept KVPInterface = RecordInterface<R> && requires(R r) {
- r.key;
- r.value;
+ r.key;
+ r.value;
};
-template<typename R>
+template <typename R>
concept AlexInterface = KVPInterface<R> && requires(R r) {
- {r.key} -> std::convertible_to<size_t>;
- {r.value} -> std::convertible_to<size_t>;
+ { r.key } -> std::convertible_to<size_t>;
+ { r.value } -> std::convertible_to<size_t>;
};
-template<typename R>
-concept WrappedInterface = RecordInterface<R> && requires(R r, R s, bool b, int i) {
- {r.header} -> std::convertible_to<uint32_t>;
- r.rec;
- {r.set_delete()};
- {r.is_deleted()} -> std::convertible_to<bool>;
- {r.set_tombstone(b)};
- {r.is_tombstone()} -> std::convertible_to<bool>;
- {r.set_timestamp(i)};
- {r.get_timestamp()} -> std::convertible_to<uint32_t>;
- {r.clear_timestamp()};
- {r.is_visible()} -> std::convertible_to<bool>;
- {r.set_visible()};
- {r < s} -> std::convertible_to<bool>;
- {r == s} ->std::convertible_to<bool>;
+template <typename R>
+concept WrappedInterface = RecordInterface<R> &&
+ requires(R r, R s, bool b, int i) {
+ { r.header } -> std::convertible_to<uint32_t>;
+ r.rec;
+ {r.set_delete()};
+ { r.is_deleted() } -> std::convertible_to<bool>;
+ {r.set_tombstone(b)};
+ { r.is_tombstone() } -> std::convertible_to<bool>;
+ {r.set_timestamp(i)};
+ { r.get_timestamp() } -> std::convertible_to<uint32_t>;
+ {r.clear_timestamp()};
+ { r.is_visible() } -> std::convertible_to<bool>;
+ {r.set_visible()};
+ { r < s } -> std::convertible_to<bool>;
+ { r == s } -> std::convertible_to<bool>;
};
-template<RecordInterface R>
-struct Wrapped {
- uint32_t header;
- R rec;
+template <RecordInterface R> struct Wrapped {
+ uint32_t header;
+ R rec;
- inline void set_delete() {
- header |= 2;
- }
+ inline void set_delete() { header |= 2; }
- inline bool is_deleted() const {
- return header & 2;
- }
+ inline bool is_deleted() const { return header & 2; }
- inline void set_visible() {
- header |= 4;
- }
+ inline void set_visible() { header |= 4; }
- inline bool is_visible() const {
- return header & 4;
- }
+ inline bool is_visible() const { return header & 4; }
- inline void set_timestamp(int ts) {
- header |= (ts << 3);
- }
-
- inline int get_timestamp() const {
- return header >> 3;
- }
+ inline void set_timestamp(int ts) { header |= (ts << 3); }
- inline void clear_timestamp() {
- header &= 7;
- }
+ inline int get_timestamp() const { return header >> 3; }
- inline void set_tombstone(bool val=true) {
- if (val) {
- header |= 1;
- } else {
- header &= 0;
- }
- }
+ inline void clear_timestamp() { header &= 7; }
- inline bool is_tombstone() const {
- return header & 1;
+ inline void set_tombstone(bool val = true) {
+ if (val) {
+ header |= 1;
+ } else {
+ header &= 0;
}
+ }
- inline bool operator<(const Wrapped& other) const {
- return rec < other.rec || (rec == other.rec && header < other.header);
- }
+ inline bool is_tombstone() const { return header & 1; }
- inline bool operator==(const Wrapped& other) const {
- return rec == other.rec;
- }
+ inline bool operator<(const Wrapped &other) const {
+ return rec < other.rec || (rec == other.rec && header < other.header);
+ }
+ inline bool operator==(const Wrapped &other) const {
+ return rec == other.rec;
+ }
};
-template <typename K, typename V>
-struct Record {
- K key;
- V value;
+template <typename K, typename V> struct Record {
+ K key;
+ V value;
- inline bool operator<(const Record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
+ inline bool operator<(const Record &other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
- inline bool operator==(const Record& other) const {
- return key == other.key && value == other.value;
- }
+ inline bool operator==(const Record &other) const {
+ return key == other.key && value == other.value;
+ }
};
-template<typename V>
-struct Record<const char*, V> {
- const char* key;
- V value;
- size_t len;
+template <typename V> struct Record<const char *, V> {
+ const char *key;
+ V value;
+ size_t len;
- inline bool operator<(const Record& other) const {
- size_t n = std::min(len, other.len) + 1;
- return strncmp(key, other.key, n) < 0;
- }
+ inline bool operator<(const Record &other) const {
+ size_t n = std::min(len, other.len) + 1;
+ return strncmp(key, other.key, n) < 0;
+ }
- inline bool operator==(const Record& other) const {
- size_t n = std::min(len, other.len) + 1;
- return strncmp(key, other.key, n) == 0;
- }
+ inline bool operator==(const Record &other) const {
+ size_t n = std::min(len, other.len) + 1;
+ return strncmp(key, other.key, n) == 0;
+ }
};
-template <typename K, typename V, typename W>
-struct WeightedRecord {
- K key;
- V value;
- W weight = 1;
+template <typename K, typename V, typename W> struct WeightedRecord {
+ K key;
+ V value;
+ W weight = 1;
- inline bool operator==(const WeightedRecord& other) const {
- return key == other.key && value == other.value;
- }
+ inline bool operator==(const WeightedRecord &other) const {
+ return key == other.key && value == other.value;
+ }
- inline bool operator<(const WeightedRecord& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
+ inline bool operator<(const WeightedRecord &other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
};
+template <typename V, size_t D = 2> struct CosinePoint {
+ V data[D];
-template <typename V, size_t D=2>
-struct CosinePoint{
- V data[D];
-
- inline bool operator==(const CosinePoint& other) const {
- for (size_t i=0; i<D; i++) {
- if (data[i] != other.data[i]) {
- return false;
- }
- }
-
- return true;
+ inline bool operator==(const CosinePoint &other) const {
+ for (size_t i = 0; i < D; i++) {
+ if (data[i] != other.data[i]) {
+ return false;
+ }
}
- /* lexicographic order */
- inline bool operator<(const CosinePoint& other) const {
- for (size_t i=0; i<D; i++) {
- if (data[i] < other.data[i]) {
- return true;
- } else if (data[i] > other.data[i]) {
- return false;
- }
- }
+ return true;
+ }
+ /* lexicographic order */
+ inline bool operator<(const CosinePoint &other) const {
+ for (size_t i = 0; i < D; i++) {
+ if (data[i] < other.data[i]) {
+ return true;
+ } else if (data[i] > other.data[i]) {
return false;
+ }
}
- inline double calc_distance(const CosinePoint& other) const {
+ return false;
+ }
- double prod = 0;
- double asquared = 0;
- double bsquared = 0;
+ inline double calc_distance(const CosinePoint &other) const {
- for (size_t i=0; i<D; i++) {
- prod += data[i] * other.data[i];
- asquared += data[i]*data[i];
- bsquared += other.data[i]*other.data[i];
- }
+ double prod = 0;
+ double asquared = 0;
+ double bsquared = 0;
- return prod / std::sqrt(asquared * bsquared);
+ for (size_t i = 0; i < D; i++) {
+ prod += data[i] * other.data[i];
+ asquared += data[i] * data[i];
+ bsquared += other.data[i] * other.data[i];
}
+
+ return prod / std::sqrt(asquared * bsquared);
+ }
};
+template <typename V, size_t D = 2> struct EuclidPoint {
+ V data[D];
-template <typename V, size_t D=2>
-struct EuclidPoint{
- V data[D];
+ inline bool operator==(const EuclidPoint &other) const {
+ for (size_t i = 0; i < D; i++) {
+ if (data[i] != other.data[i]) {
+ return false;
+ }
+ }
- inline bool operator==(const EuclidPoint& other) const {
- for (size_t i=0; i<D; i++) {
- if (data[i] != other.data[i]) {
- return false;
- }
- }
+ return true;
+ }
+ /* lexicographic order */
+ inline bool operator<(const EuclidPoint &other) const {
+ for (size_t i = 0; i < D; i++) {
+ if (data[i] < other.data[i]) {
return true;
+ } else if (data[i] > other.data[i]) {
+ return false;
+ }
}
- /* lexicographic order */
- inline bool operator<(const EuclidPoint& other) const {
- for (size_t i=0; i<D; i++) {
- if (data[i] < other.data[i]) {
- return true;
- } else if (data[i] > other.data[i]) {
- return false;
- }
- }
+ return false;
+ }
- return false;
+ inline double calc_distance(const EuclidPoint &other) const {
+ double dist = 0;
+ for (size_t i = 0; i < D; i++) {
+ dist += (data[i] - other.data[i]) * (data[i] - other.data[i]);
}
- inline double calc_distance(const EuclidPoint& other) const {
- double dist = 0;
- for (size_t i=0; i<D; i++) {
- dist += (data[i] - other.data[i]) * (data[i] - other.data[i]);
- }
-
- return std::sqrt(dist);
- }
+ return std::sqrt(dist);
+ }
};
-template<RecordInterface R>
-struct RecordHash {
- size_t operator()(R const &rec) const {
- return psudb::hash_bytes((std::byte *) &rec, sizeof(R));
- }
+template <RecordInterface R> struct RecordHash {
+ size_t operator()(R const &rec) const {
+ return psudb::hash_bytes((std::byte *)&rec, sizeof(R));
+ }
};
-template <typename R>
-class DistCmpMax {
+template <typename R> class DistCmpMax {
public:
- DistCmpMax(R *baseline) : P(baseline) {}
+ DistCmpMax(R *baseline) : P(baseline) {}
- inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
- return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec);
- }
+ inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
+ return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec);
+ }
- inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){
- return a->calc_distance(*P) > b->calc_distance(*P);
- }
+ inline bool operator()(const R *a,
+ const R *b) requires(!WrappedInterface<R>) {
+ return a->calc_distance(*P) > b->calc_distance(*P);
+ }
private:
- R *P;
+ R *P;
};
-}
+} // namespace de
diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h
index 451ddd2..d76a6c8 100644
--- a/include/framework/interface/Scheduler.h
+++ b/include/framework/interface/Scheduler.h
@@ -1,7 +1,7 @@
/*
* include/framework/interface/Scheduler.h
*
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
*
* Distributed under the Modified BSD License.
*
@@ -10,10 +10,11 @@
#include "framework/scheduling/Task.h"
-template <typename S>
-concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) {
- {S(i, i)};
- {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>;
- {s.shutdown()};
- {s.print_statistics()};
+template <typename SchedType>
+concept SchedulerInterface = requires(SchedType s, size_t i, void *vp,
+ de::Job j) {
+ {SchedType(i, i)};
+ {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>;
+ {s.shutdown()};
+ {s.print_statistics()};
};
diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h
index c4a9180..bd980c0 100644
--- a/include/framework/interface/Shard.h
+++ b/include/framework/interface/Shard.h
@@ -1,7 +1,7 @@
/*
* include/framework/interface/Shard.h
*
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
*
* Distributed under the Modified BSD License.
*
@@ -12,25 +12,57 @@
namespace de {
-template <typename S, typename R>
-concept ShardInterface = RecordInterface<R> && requires(S s, std::vector<S*> spp, void *p, bool b, size_t i, BufferView<R> bv, R r) {
- {S(spp)};
- {S(std::move(bv))};
+template <typename SHARD>
+concept ShardInterface = RecordInterface<typename SHARD::RECORD> &&
+ requires(SHARD shard, const std::vector<SHARD *> &shard_vector, bool b,
+ BufferView<typename SHARD::RECORD> bv,
+ typename SHARD::RECORD rec) {
+ /* construct a shard from a vector of shards of the same type */
+ {SHARD(shard_vector)};
- {s.point_lookup(r, b) } -> std::same_as<Wrapped<R>*>;
- {s.get_data()} -> std::same_as<Wrapped<R>*>;
+ /* construct a shard from a buffer view (i.e., unsorted array of records) */
+ {SHARD(std::move(bv))};
+
+ /* perform a lookup for a record matching rec and return a pointer to it */
+ {
+ shard.point_lookup(rec, b)
+ } -> std::same_as<Wrapped<typename SHARD::RECORD> *>;
+
+ /*
+ * return the number of records in the shard -- used to determine when
+ * reconstructions occur
+ */
+ { shard.get_record_count() } -> std::convertible_to<size_t>;
+
+ /*
+ * return the number of tombstones in the shard -- can simply return
+ * 0 if tombstones are not in use.
+ */
+ { shard.get_tombstone_count() } -> std::convertible_to<size_t>;
+
+ /*
+ * return the number of bytes of memory used by the main data structure
+ * within the shard -- informational use only at the moment
+ */
+ { shard.get_memory_usage() } -> std::convertible_to<size_t>;
+
+ /*
+ * return the number of bytes of memory used by auxilliary data
+ * structures (bloom filters, etc.) within the shard -- informational
+ * use only at the moment
+ */
+ { shard.get_aux_memory_usage() } -> std::convertible_to<size_t>;
- {s.get_record_count()} -> std::convertible_to<size_t>;
- {s.get_tombstone_count()} -> std::convertible_to<size_t>;
- {s.get_memory_usage()} -> std::convertible_to<size_t>;
- {s.get_aux_memory_usage()} -> std::convertible_to<size_t>;
};
-template <typename S, typename R>
-concept SortedShardInterface = ShardInterface<S, R> && requires(S s, R r, R *rp, size_t i) {
- {s.lower_bound(r)} -> std::convertible_to<size_t>;
- {s.upper_bound(r)} -> std::convertible_to<size_t>;
- {s.get_record_at(i)} -> std::same_as<Wrapped<R>*>;
+template <typename SHARD>
+concept SortedShardInterface = ShardInterface<SHARD> &&
+ requires(SHARD shard, typename SHARD::RECORD rec, size_t index) {
+ { shard.lower_bound(rec) } -> std::convertible_to<size_t>;
+ { shard.upper_bound(rec) } -> std::convertible_to<size_t>;
+ {
+ shard.get_record_at(index)
+ } -> std::same_as<Wrapped<typename SHARD::RECORD> *>;
};
-}
+} // namespace de