diff options
Diffstat (limited to 'include/framework/interface')
| -rw-r--r-- | include/framework/interface/Query.h | 136 | ||||
| -rw-r--r-- | include/framework/interface/Record.h | 335 | ||||
| -rw-r--r-- | include/framework/interface/Scheduler.h | 15 | ||||
| -rw-r--r-- | include/framework/interface/Shard.h | 66 |
4 files changed, 332 insertions, 220 deletions
diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h index 577d6cd..1b64646 100644 --- a/include/framework/interface/Query.h +++ b/include/framework/interface/Query.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Query.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -10,23 +10,127 @@ #include "framework/QueryRequirements.h" -namespace de{ +namespace de { -template <typename Q, typename R, typename S> -concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv, std::vector<R> &resv) { - {Q::get_query_state(sh, p)} -> std::convertible_to<void*>; - {Q::get_buffer_query_state(bv, p)} -> std::convertible_to<void *>; - {Q::process_query_states(p, s, p)}; - {Q::query(sh, p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; - {Q::buffer_query(p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; - {Q::merge(rv, p, resv)}; +/* + * FIXME: It would probably be best to absorb the std::vector into + * this type too; this would allow user-defined collections for + * intermediate results, which could allow for more merging + * optimizations. However, this would require an alternative + * approach to doing delete checks, so we'll leave it for now. + */ +template <typename R> +concept LocalResultInterface = requires(R res) { + { res.is_deleted() } -> std::convertible_to<bool>; + { res.is_tombstone() } -> std::convertible_to<bool>; +}; + +/* + * + * + */ +template <typename QUERY, typename SHARD, + typename RESULT = typename QUERY::ResultType, + typename LOCAL_RESULT = typename QUERY::LocalResultType, + typename PARAMETERS = typename QUERY::Parameters, + typename LOCAL = typename QUERY::LocalQuery, + typename LOCAL_BUFFER = typename QUERY::LocalQueryBuffer> +concept QueryInterface = LocalResultInterface<LOCAL_RESULT> && + requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query, + SHARD *shard, std::vector<LOCAL *> &local_queries, + std::vector<std::vector<LOCAL_RESULT>> &local_results, + std::vector<RESULT> &result, + BufferView<typename SHARD::RECORD> *bv) { + + /* + * Given a set of query parameters and a shard, return a local query + * object for that shard. + */ + { QUERY::local_preproc(shard, parameters) } -> std::convertible_to<LOCAL *>; + + /* + * Given a set of query parameters and a buffer view, return a local + * query object for the buffer. + * NOTE: for interface reasons, the pointer to the buffer view MUST be + * stored inside of the local query object. The future buffer + * query routine will access the buffer by way of this pointer. + */ + { + QUERY::local_preproc_buffer(bv, parameters) + } -> std::convertible_to<LOCAL_BUFFER *>; + + /* + * Given a full set of local queries, and the buffer query, make any + * necessary adjustments to the local queries in-place, to account for + * global information. If no additional processing is required, this + * function can be left empty. + */ + {QUERY::distribute_query(parameters, local_queries, buffer_query)}; + + /* + * Answer the local query, defined by `local` against `shard` and return + * a vector of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query(shard, local) + } -> std::convertible_to<std::vector<LOCAL_RESULT>>; + + /* + * Answer the local query defined by `local` against the buffer (which + * should be accessed by a pointer inside of `local`) and return a vector + * of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query_buffer(buffer_query) + } -> std::convertible_to<std::vector<LOCAL_RESULT>>; + + /* + * Process the local results from the buffer and all of the shards, + * stored in `local_results`, and insert the associated ResultType + * objects into the `result` vector, which represents the final result + * of the query. Updates to this vector are done in-place. + */ + {QUERY::combine(local_results, parameters, result)}; - {Q::delete_query_state(p)} -> std::same_as<void>; - {Q::delete_buffer_query_state(p)} -> std::same_as<void>; + /* + * Process the post-combine `result` vector of ResultType objects, + * in the context of the global and local query parameters, to determine + * if the query should be repeated. If so, make any necessary adjustments + * to the local query objects and return True. Otherwise, return False. + * + * If no repetition is needed for a given problem type, simply return + * False immediately and the query will end. + */ + { + QUERY::repeat(parameters, result, local_queries, buffer_query) + } -> std::same_as<bool>; - {Q::repeat(p, resv, s, p)} -> std::same_as<bool>; + /* + * If this flag is True, then the query will immediately stop and return + * a result as soon as the first non-deleted LocalRecordType is found. + * Otherwise, every Shard and the buffer will be queried and the results + * merged, like normal. + * + * This is largely an optimization flag for use with point-lookup, or + * other single-record result queries + */ + { QUERY::EARLY_ABORT } -> std::convertible_to<bool>; - {Q::EARLY_ABORT} -> std::convertible_to<bool>; - {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>; + /* + * If false, the built-in delete filtering that the framework can + * apply to the local results, prior to calling combine, will be skipped. + * This general filtering can be inefficient, particularly for tombstone + * -based deletes, and so if a more efficient manual filtering can be + * performed, it is worth setting this to True and doing that filtering + * in the combine step. + * + * If deletes are not a consideration for your problem, it's also best + * to turn this off, as it'll avoid the framework making an extra pass + * over the local results prior to combining them. + * + * TODO: Temporarily disabling this, as we've dropped framework-level + * delete filtering for the time being. + */ + /* { QUERY::SKIP_DELETE_FILTER } -> std::convertible_to<bool>; */ }; -} +} // namespace de diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h index 19ccadd..d3e77d8 100644 --- a/include/framework/interface/Record.h +++ b/include/framework/interface/Record.h @@ -1,272 +1,247 @@ /* * include/framework/interface/Record.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * - * FIXME: the record implementations could probably be broken out into + * FIXME: the record implementations could probably be broken out into * different files, leaving only the interface here */ #pragma once -#include <cstring> -#include <concepts> #include <cmath> +#include <concepts> +#include <cstring> #include "psu-util/hash.h" namespace de { -template<typename R> +template <typename R> concept RecordInterface = requires(R r, R s) { - { r < s } ->std::convertible_to<bool>; - { r == s } ->std::convertible_to<bool>; + { r < s } -> std::convertible_to<bool>; + { r == s } -> std::convertible_to<bool>; }; -template<typename R> +template <typename R> concept WeightedRecordInterface = requires(R r) { - {r.weight} -> std::convertible_to<double>; + { r.weight } -> std::convertible_to<double>; }; -template<typename R> +template <typename R> concept NDRecordInterface = RecordInterface<R> && requires(R r, R s) { - {r.calc_distance(s)} -> std::convertible_to<double>; + { r.calc_distance(s) } -> std::convertible_to<double>; }; template <typename R> concept KVPInterface = RecordInterface<R> && requires(R r) { - r.key; - r.value; + r.key; + r.value; }; -template<typename R> +template <typename R> concept AlexInterface = KVPInterface<R> && requires(R r) { - {r.key} -> std::convertible_to<size_t>; - {r.value} -> std::convertible_to<size_t>; + { r.key } -> std::convertible_to<size_t>; + { r.value } -> std::convertible_to<size_t>; }; -template<typename R> -concept WrappedInterface = RecordInterface<R> && requires(R r, R s, bool b, int i) { - {r.header} -> std::convertible_to<uint32_t>; - r.rec; - {r.set_delete()}; - {r.is_deleted()} -> std::convertible_to<bool>; - {r.set_tombstone(b)}; - {r.is_tombstone()} -> std::convertible_to<bool>; - {r.set_timestamp(i)}; - {r.get_timestamp()} -> std::convertible_to<uint32_t>; - {r.clear_timestamp()}; - {r.is_visible()} -> std::convertible_to<bool>; - {r.set_visible()}; - {r < s} -> std::convertible_to<bool>; - {r == s} ->std::convertible_to<bool>; +template <typename R> +concept WrappedInterface = RecordInterface<R> && + requires(R r, R s, bool b, int i) { + { r.header } -> std::convertible_to<uint32_t>; + r.rec; + {r.set_delete()}; + { r.is_deleted() } -> std::convertible_to<bool>; + {r.set_tombstone(b)}; + { r.is_tombstone() } -> std::convertible_to<bool>; + {r.set_timestamp(i)}; + { r.get_timestamp() } -> std::convertible_to<uint32_t>; + {r.clear_timestamp()}; + { r.is_visible() } -> std::convertible_to<bool>; + {r.set_visible()}; + { r < s } -> std::convertible_to<bool>; + { r == s } -> std::convertible_to<bool>; }; -template<RecordInterface R> -struct Wrapped { - uint32_t header; - R rec; +template <RecordInterface R> struct Wrapped { + uint32_t header; + R rec; - inline void set_delete() { - header |= 2; - } + inline void set_delete() { header |= 2; } - inline bool is_deleted() const { - return header & 2; - } + inline bool is_deleted() const { return header & 2; } - inline void set_visible() { - header |= 4; - } + inline void set_visible() { header |= 4; } - inline bool is_visible() const { - return header & 4; - } + inline bool is_visible() const { return header & 4; } - inline void set_timestamp(int ts) { - header |= (ts << 3); - } - - inline int get_timestamp() const { - return header >> 3; - } + inline void set_timestamp(int ts) { header |= (ts << 3); } - inline void clear_timestamp() { - header &= 7; - } + inline int get_timestamp() const { return header >> 3; } - inline void set_tombstone(bool val=true) { - if (val) { - header |= 1; - } else { - header &= 0; - } - } + inline void clear_timestamp() { header &= 7; } - inline bool is_tombstone() const { - return header & 1; + inline void set_tombstone(bool val = true) { + if (val) { + header |= 1; + } else { + header &= 0; } + } - inline bool operator<(const Wrapped& other) const { - return rec < other.rec || (rec == other.rec && header < other.header); - } + inline bool is_tombstone() const { return header & 1; } - inline bool operator==(const Wrapped& other) const { - return rec == other.rec; - } + inline bool operator<(const Wrapped &other) const { + return rec < other.rec || (rec == other.rec && header < other.header); + } + inline bool operator==(const Wrapped &other) const { + return rec == other.rec; + } }; -template <typename K, typename V> -struct Record { - K key; - V value; +template <typename K, typename V> struct Record { + K key; + V value; - inline bool operator<(const Record& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const Record &other) const { + return key < other.key || (key == other.key && value < other.value); + } - inline bool operator==(const Record& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const Record &other) const { + return key == other.key && value == other.value; + } }; -template<typename V> -struct Record<const char*, V> { - const char* key; - V value; - size_t len; +template <typename V> struct Record<const char *, V> { + const char *key; + V value; + size_t len; - inline bool operator<(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) < 0; - } + inline bool operator<(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) < 0; + } - inline bool operator==(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) == 0; - } + inline bool operator==(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) == 0; + } }; -template <typename K, typename V, typename W> -struct WeightedRecord { - K key; - V value; - W weight = 1; +template <typename K, typename V, typename W> struct WeightedRecord { + K key; + V value; + W weight = 1; - inline bool operator==(const WeightedRecord& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const WeightedRecord &other) const { + return key == other.key && value == other.value; + } - inline bool operator<(const WeightedRecord& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const WeightedRecord &other) const { + return key < other.key || (key == other.key && value < other.value); + } }; +template <typename V, size_t D = 2> struct CosinePoint { + V data[D]; -template <typename V, size_t D=2> -struct CosinePoint{ - V data[D]; - - inline bool operator==(const CosinePoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] != other.data[i]) { - return false; - } - } - - return true; + inline bool operator==(const CosinePoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] != other.data[i]) { + return false; + } } - /* lexicographic order */ - inline bool operator<(const CosinePoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] < other.data[i]) { - return true; - } else if (data[i] > other.data[i]) { - return false; - } - } + return true; + } + /* lexicographic order */ + inline bool operator<(const CosinePoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] < other.data[i]) { + return true; + } else if (data[i] > other.data[i]) { return false; + } } - inline double calc_distance(const CosinePoint& other) const { + return false; + } - double prod = 0; - double asquared = 0; - double bsquared = 0; + inline double calc_distance(const CosinePoint &other) const { - for (size_t i=0; i<D; i++) { - prod += data[i] * other.data[i]; - asquared += data[i]*data[i]; - bsquared += other.data[i]*other.data[i]; - } + double prod = 0; + double asquared = 0; + double bsquared = 0; - return prod / std::sqrt(asquared * bsquared); + for (size_t i = 0; i < D; i++) { + prod += data[i] * other.data[i]; + asquared += data[i] * data[i]; + bsquared += other.data[i] * other.data[i]; } + + return prod / std::sqrt(asquared * bsquared); + } }; +template <typename V, size_t D = 2> struct EuclidPoint { + V data[D]; -template <typename V, size_t D=2> -struct EuclidPoint{ - V data[D]; + inline bool operator==(const EuclidPoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] != other.data[i]) { + return false; + } + } - inline bool operator==(const EuclidPoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] != other.data[i]) { - return false; - } - } + return true; + } + /* lexicographic order */ + inline bool operator<(const EuclidPoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] < other.data[i]) { return true; + } else if (data[i] > other.data[i]) { + return false; + } } - /* lexicographic order */ - inline bool operator<(const EuclidPoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] < other.data[i]) { - return true; - } else if (data[i] > other.data[i]) { - return false; - } - } + return false; + } - return false; + inline double calc_distance(const EuclidPoint &other) const { + double dist = 0; + for (size_t i = 0; i < D; i++) { + dist += (data[i] - other.data[i]) * (data[i] - other.data[i]); } - inline double calc_distance(const EuclidPoint& other) const { - double dist = 0; - for (size_t i=0; i<D; i++) { - dist += (data[i] - other.data[i]) * (data[i] - other.data[i]); - } - - return std::sqrt(dist); - } + return std::sqrt(dist); + } }; -template<RecordInterface R> -struct RecordHash { - size_t operator()(R const &rec) const { - return psudb::hash_bytes((std::byte *) &rec, sizeof(R)); - } +template <RecordInterface R> struct RecordHash { + size_t operator()(R const &rec) const { + return psudb::hash_bytes((std::byte *)&rec, sizeof(R)); + } }; -template <typename R> -class DistCmpMax { +template <typename R> class DistCmpMax { public: - DistCmpMax(R *baseline) : P(baseline) {} + DistCmpMax(R *baseline) : P(baseline) {} - inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { - return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); - } + inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { + return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); + } - inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){ - return a->calc_distance(*P) > b->calc_distance(*P); - } + inline bool operator()(const R *a, + const R *b) requires(!WrappedInterface<R>) { + return a->calc_distance(*P) > b->calc_distance(*P); + } private: - R *P; + R *P; }; -} +} // namespace de diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h index 451ddd2..d76a6c8 100644 --- a/include/framework/interface/Scheduler.h +++ b/include/framework/interface/Scheduler.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Scheduler.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -10,10 +10,11 @@ #include "framework/scheduling/Task.h" -template <typename S> -concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) { - {S(i, i)}; - {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>; - {s.shutdown()}; - {s.print_statistics()}; +template <typename SchedType> +concept SchedulerInterface = requires(SchedType s, size_t i, void *vp, + de::Job j) { + {SchedType(i, i)}; + {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>; + {s.shutdown()}; + {s.print_statistics()}; }; diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h index c4a9180..bd980c0 100644 --- a/include/framework/interface/Shard.h +++ b/include/framework/interface/Shard.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Shard.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -12,25 +12,57 @@ namespace de { -template <typename S, typename R> -concept ShardInterface = RecordInterface<R> && requires(S s, std::vector<S*> spp, void *p, bool b, size_t i, BufferView<R> bv, R r) { - {S(spp)}; - {S(std::move(bv))}; +template <typename SHARD> +concept ShardInterface = RecordInterface<typename SHARD::RECORD> && + requires(SHARD shard, const std::vector<SHARD *> &shard_vector, bool b, + BufferView<typename SHARD::RECORD> bv, + typename SHARD::RECORD rec) { + /* construct a shard from a vector of shards of the same type */ + {SHARD(shard_vector)}; - {s.point_lookup(r, b) } -> std::same_as<Wrapped<R>*>; - {s.get_data()} -> std::same_as<Wrapped<R>*>; + /* construct a shard from a buffer view (i.e., unsorted array of records) */ + {SHARD(std::move(bv))}; + + /* perform a lookup for a record matching rec and return a pointer to it */ + { + shard.point_lookup(rec, b) + } -> std::same_as<Wrapped<typename SHARD::RECORD> *>; + + /* + * return the number of records in the shard -- used to determine when + * reconstructions occur + */ + { shard.get_record_count() } -> std::convertible_to<size_t>; + + /* + * return the number of tombstones in the shard -- can simply return + * 0 if tombstones are not in use. + */ + { shard.get_tombstone_count() } -> std::convertible_to<size_t>; + + /* + * return the number of bytes of memory used by the main data structure + * within the shard -- informational use only at the moment + */ + { shard.get_memory_usage() } -> std::convertible_to<size_t>; + + /* + * return the number of bytes of memory used by auxilliary data + * structures (bloom filters, etc.) within the shard -- informational + * use only at the moment + */ + { shard.get_aux_memory_usage() } -> std::convertible_to<size_t>; - {s.get_record_count()} -> std::convertible_to<size_t>; - {s.get_tombstone_count()} -> std::convertible_to<size_t>; - {s.get_memory_usage()} -> std::convertible_to<size_t>; - {s.get_aux_memory_usage()} -> std::convertible_to<size_t>; }; -template <typename S, typename R> -concept SortedShardInterface = ShardInterface<S, R> && requires(S s, R r, R *rp, size_t i) { - {s.lower_bound(r)} -> std::convertible_to<size_t>; - {s.upper_bound(r)} -> std::convertible_to<size_t>; - {s.get_record_at(i)} -> std::same_as<Wrapped<R>*>; +template <typename SHARD> +concept SortedShardInterface = ShardInterface<SHARD> && + requires(SHARD shard, typename SHARD::RECORD rec, size_t index) { + { shard.lower_bound(rec) } -> std::convertible_to<size_t>; + { shard.upper_bound(rec) } -> std::convertible_to<size_t>; + { + shard.get_record_at(index) + } -> std::same_as<Wrapped<typename SHARD::RECORD> *>; }; -} +} // namespace de |