From 9fe305c7d28e993e55c55427f377ae7e3251ea4f Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Fri, 6 Dec 2024 13:13:51 -0500 Subject: Interface update (#5) * Query Interface Adjustments/Refactoring Began the process of adjusting the query interface (and also the shard interface, to a lesser degree) to better accommodate the user. In particular the following changes have been made, 1. The number of necessary template arguments for the query type has been drastically reduced, while also removing the void pointers and manual delete functions from the interface. This was accomplished by requiring many of the sub-types associated with a query (parameters, etc.) to be nested inside the main query class, and by forcing the SHARD type to expose its associated record type. 2. User-defined query return types are now supported. Queries no longer are required to return strictly sets of records. Instead, the query now has LocalResultType and ResultType template parameters (which can be defaulted using a typedef in the Query type itself), allowing much more flexibility. Note that, at least for the short term, the LocalResultType must still expose the same is_deleted/is_tombstone interface as a Wrapped used to, as this is currently needed for delete filtering. A better approach to this is, hopefully, forthcoming. 3. Updated the ISAMTree.h shard and rangequery.h query to use the new interfaces, and adjusted the associated unit tests as well. 4. Dropped the unnecessary "get_data()" function from the ShardInterface concept. 5. Dropped the need to specify a record type in the ShardInterface concept. This is now handled using a required Shard::RECORD member of the Shard class itself, which should expose the name of the record type. * Updates to framework to support new Query/Shard interfaces Pretty extensive adjustments to the framework, particularly to the templates themselves, along with some type-renaming work, to support the new query and shard interfaces. Adjusted the external query interface to take an rvalue reference, rather than a pointer, to the query parameters. * Removed framework-level delete filtering This was causing some issues with the new query interface, and should probably be reworked anyway, so I'm temporarily (TM) removing the feature. * Updated benchmarks + remaining code for new interface --- include/framework/interface/Query.h | 136 +++++++++++-- include/framework/interface/Record.h | 335 +++++++++++++++----------------- include/framework/interface/Scheduler.h | 15 +- include/framework/interface/Shard.h | 66 +++++-- 4 files changed, 332 insertions(+), 220 deletions(-) (limited to 'include/framework/interface') diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h index 577d6cd..1b64646 100644 --- a/include/framework/interface/Query.h +++ b/include/framework/interface/Query.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Query.h * - * Copyright (C) 2023 Douglas B. Rumbaugh + * Copyright (C) 2023-2024 Douglas B. Rumbaugh * * Distributed under the Modified BSD License. * @@ -10,23 +10,127 @@ #include "framework/QueryRequirements.h" -namespace de{ +namespace de { -template -concept QueryInterface = requires(void *p, S *sh, std::vector &s, std::vector>> &rv, BufferView *bv, std::vector &resv) { - {Q::get_query_state(sh, p)} -> std::convertible_to; - {Q::get_buffer_query_state(bv, p)} -> std::convertible_to; - {Q::process_query_states(p, s, p)}; - {Q::query(sh, p, p)} -> std::convertible_to>>; - {Q::buffer_query(p, p)} -> std::convertible_to>>; - {Q::merge(rv, p, resv)}; +/* + * FIXME: It would probably be best to absorb the std::vector into + * this type too; this would allow user-defined collections for + * intermediate results, which could allow for more merging + * optimizations. However, this would require an alternative + * approach to doing delete checks, so we'll leave it for now. + */ +template +concept LocalResultInterface = requires(R res) { + { res.is_deleted() } -> std::convertible_to; + { res.is_tombstone() } -> std::convertible_to; +}; + +/* + * + * + */ +template +concept QueryInterface = LocalResultInterface && + requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query, + SHARD *shard, std::vector &local_queries, + std::vector> &local_results, + std::vector &result, + BufferView *bv) { + + /* + * Given a set of query parameters and a shard, return a local query + * object for that shard. + */ + { QUERY::local_preproc(shard, parameters) } -> std::convertible_to; + + /* + * Given a set of query parameters and a buffer view, return a local + * query object for the buffer. + * NOTE: for interface reasons, the pointer to the buffer view MUST be + * stored inside of the local query object. The future buffer + * query routine will access the buffer by way of this pointer. + */ + { + QUERY::local_preproc_buffer(bv, parameters) + } -> std::convertible_to; + + /* + * Given a full set of local queries, and the buffer query, make any + * necessary adjustments to the local queries in-place, to account for + * global information. If no additional processing is required, this + * function can be left empty. + */ + {QUERY::distribute_query(parameters, local_queries, buffer_query)}; + + /* + * Answer the local query, defined by `local` against `shard` and return + * a vector of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query(shard, local) + } -> std::convertible_to>; + + /* + * Answer the local query defined by `local` against the buffer (which + * should be accessed by a pointer inside of `local`) and return a vector + * of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query_buffer(buffer_query) + } -> std::convertible_to>; + + /* + * Process the local results from the buffer and all of the shards, + * stored in `local_results`, and insert the associated ResultType + * objects into the `result` vector, which represents the final result + * of the query. Updates to this vector are done in-place. + */ + {QUERY::combine(local_results, parameters, result)}; - {Q::delete_query_state(p)} -> std::same_as; - {Q::delete_buffer_query_state(p)} -> std::same_as; + /* + * Process the post-combine `result` vector of ResultType objects, + * in the context of the global and local query parameters, to determine + * if the query should be repeated. If so, make any necessary adjustments + * to the local query objects and return True. Otherwise, return False. + * + * If no repetition is needed for a given problem type, simply return + * False immediately and the query will end. + */ + { + QUERY::repeat(parameters, result, local_queries, buffer_query) + } -> std::same_as; - {Q::repeat(p, resv, s, p)} -> std::same_as; + /* + * If this flag is True, then the query will immediately stop and return + * a result as soon as the first non-deleted LocalRecordType is found. + * Otherwise, every Shard and the buffer will be queried and the results + * merged, like normal. + * + * This is largely an optimization flag for use with point-lookup, or + * other single-record result queries + */ + { QUERY::EARLY_ABORT } -> std::convertible_to; - {Q::EARLY_ABORT} -> std::convertible_to; - {Q::SKIP_DELETE_FILTER} -> std::convertible_to; + /* + * If false, the built-in delete filtering that the framework can + * apply to the local results, prior to calling combine, will be skipped. + * This general filtering can be inefficient, particularly for tombstone + * -based deletes, and so if a more efficient manual filtering can be + * performed, it is worth setting this to True and doing that filtering + * in the combine step. + * + * If deletes are not a consideration for your problem, it's also best + * to turn this off, as it'll avoid the framework making an extra pass + * over the local results prior to combining them. + * + * TODO: Temporarily disabling this, as we've dropped framework-level + * delete filtering for the time being. + */ + /* { QUERY::SKIP_DELETE_FILTER } -> std::convertible_to; */ }; -} +} // namespace de diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h index 19ccadd..d3e77d8 100644 --- a/include/framework/interface/Record.h +++ b/include/framework/interface/Record.h @@ -1,272 +1,247 @@ /* * include/framework/interface/Record.h * - * Copyright (C) 2023 Douglas Rumbaugh + * Copyright (C) 2023-2024 Douglas Rumbaugh * * Distributed under the Modified BSD License. * - * FIXME: the record implementations could probably be broken out into + * FIXME: the record implementations could probably be broken out into * different files, leaving only the interface here */ #pragma once -#include -#include #include +#include +#include #include "psu-util/hash.h" namespace de { -template +template concept RecordInterface = requires(R r, R s) { - { r < s } ->std::convertible_to; - { r == s } ->std::convertible_to; + { r < s } -> std::convertible_to; + { r == s } -> std::convertible_to; }; -template +template concept WeightedRecordInterface = requires(R r) { - {r.weight} -> std::convertible_to; + { r.weight } -> std::convertible_to; }; -template +template concept NDRecordInterface = RecordInterface && requires(R r, R s) { - {r.calc_distance(s)} -> std::convertible_to; + { r.calc_distance(s) } -> std::convertible_to; }; template concept KVPInterface = RecordInterface && requires(R r) { - r.key; - r.value; + r.key; + r.value; }; -template +template concept AlexInterface = KVPInterface && requires(R r) { - {r.key} -> std::convertible_to; - {r.value} -> std::convertible_to; + { r.key } -> std::convertible_to; + { r.value } -> std::convertible_to; }; -template -concept WrappedInterface = RecordInterface && requires(R r, R s, bool b, int i) { - {r.header} -> std::convertible_to; - r.rec; - {r.set_delete()}; - {r.is_deleted()} -> std::convertible_to; - {r.set_tombstone(b)}; - {r.is_tombstone()} -> std::convertible_to; - {r.set_timestamp(i)}; - {r.get_timestamp()} -> std::convertible_to; - {r.clear_timestamp()}; - {r.is_visible()} -> std::convertible_to; - {r.set_visible()}; - {r < s} -> std::convertible_to; - {r == s} ->std::convertible_to; +template +concept WrappedInterface = RecordInterface && + requires(R r, R s, bool b, int i) { + { r.header } -> std::convertible_to; + r.rec; + {r.set_delete()}; + { r.is_deleted() } -> std::convertible_to; + {r.set_tombstone(b)}; + { r.is_tombstone() } -> std::convertible_to; + {r.set_timestamp(i)}; + { r.get_timestamp() } -> std::convertible_to; + {r.clear_timestamp()}; + { r.is_visible() } -> std::convertible_to; + {r.set_visible()}; + { r < s } -> std::convertible_to; + { r == s } -> std::convertible_to; }; -template -struct Wrapped { - uint32_t header; - R rec; +template struct Wrapped { + uint32_t header; + R rec; - inline void set_delete() { - header |= 2; - } + inline void set_delete() { header |= 2; } - inline bool is_deleted() const { - return header & 2; - } + inline bool is_deleted() const { return header & 2; } - inline void set_visible() { - header |= 4; - } + inline void set_visible() { header |= 4; } - inline bool is_visible() const { - return header & 4; - } + inline bool is_visible() const { return header & 4; } - inline void set_timestamp(int ts) { - header |= (ts << 3); - } - - inline int get_timestamp() const { - return header >> 3; - } + inline void set_timestamp(int ts) { header |= (ts << 3); } - inline void clear_timestamp() { - header &= 7; - } + inline int get_timestamp() const { return header >> 3; } - inline void set_tombstone(bool val=true) { - if (val) { - header |= 1; - } else { - header &= 0; - } - } + inline void clear_timestamp() { header &= 7; } - inline bool is_tombstone() const { - return header & 1; + inline void set_tombstone(bool val = true) { + if (val) { + header |= 1; + } else { + header &= 0; } + } - inline bool operator<(const Wrapped& other) const { - return rec < other.rec || (rec == other.rec && header < other.header); - } + inline bool is_tombstone() const { return header & 1; } - inline bool operator==(const Wrapped& other) const { - return rec == other.rec; - } + inline bool operator<(const Wrapped &other) const { + return rec < other.rec || (rec == other.rec && header < other.header); + } + inline bool operator==(const Wrapped &other) const { + return rec == other.rec; + } }; -template -struct Record { - K key; - V value; +template struct Record { + K key; + V value; - inline bool operator<(const Record& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const Record &other) const { + return key < other.key || (key == other.key && value < other.value); + } - inline bool operator==(const Record& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const Record &other) const { + return key == other.key && value == other.value; + } }; -template -struct Record { - const char* key; - V value; - size_t len; +template struct Record { + const char *key; + V value; + size_t len; - inline bool operator<(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) < 0; - } + inline bool operator<(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) < 0; + } - inline bool operator==(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) == 0; - } + inline bool operator==(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) == 0; + } }; -template -struct WeightedRecord { - K key; - V value; - W weight = 1; +template struct WeightedRecord { + K key; + V value; + W weight = 1; - inline bool operator==(const WeightedRecord& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const WeightedRecord &other) const { + return key == other.key && value == other.value; + } - inline bool operator<(const WeightedRecord& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const WeightedRecord &other) const { + return key < other.key || (key == other.key && value < other.value); + } }; +template struct CosinePoint { + V data[D]; -template -struct CosinePoint{ - V data[D]; - - inline bool operator==(const CosinePoint& other) const { - for (size_t i=0; i other.data[i]) { - return false; - } - } + return true; + } + /* lexicographic order */ + inline bool operator<(const CosinePoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] < other.data[i]) { + return true; + } else if (data[i] > other.data[i]) { return false; + } } - inline double calc_distance(const CosinePoint& other) const { + return false; + } - double prod = 0; - double asquared = 0; - double bsquared = 0; + inline double calc_distance(const CosinePoint &other) const { - for (size_t i=0; i struct EuclidPoint { + V data[D]; -template -struct EuclidPoint{ - V data[D]; + inline bool operator==(const EuclidPoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] != other.data[i]) { + return false; + } + } - inline bool operator==(const EuclidPoint& other) const { - for (size_t i=0; i other.data[i]) { + return false; + } } - /* lexicographic order */ - inline bool operator<(const EuclidPoint& other) const { - for (size_t i=0; i other.data[i]) { - return false; - } - } + return false; + } - return false; + inline double calc_distance(const EuclidPoint &other) const { + double dist = 0; + for (size_t i = 0; i < D; i++) { + dist += (data[i] - other.data[i]) * (data[i] - other.data[i]); } - inline double calc_distance(const EuclidPoint& other) const { - double dist = 0; - for (size_t i=0; i -struct RecordHash { - size_t operator()(R const &rec) const { - return psudb::hash_bytes((std::byte *) &rec, sizeof(R)); - } +template struct RecordHash { + size_t operator()(R const &rec) const { + return psudb::hash_bytes((std::byte *)&rec, sizeof(R)); + } }; -template -class DistCmpMax { +template class DistCmpMax { public: - DistCmpMax(R *baseline) : P(baseline) {} + DistCmpMax(R *baseline) : P(baseline) {} - inline bool operator()(const R *a, const R *b) requires WrappedInterface { - return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); - } + inline bool operator()(const R *a, const R *b) requires WrappedInterface { + return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); + } - inline bool operator()(const R *a, const R *b) requires (!WrappedInterface){ - return a->calc_distance(*P) > b->calc_distance(*P); - } + inline bool operator()(const R *a, + const R *b) requires(!WrappedInterface) { + return a->calc_distance(*P) > b->calc_distance(*P); + } private: - R *P; + R *P; }; -} +} // namespace de diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h index 451ddd2..d76a6c8 100644 --- a/include/framework/interface/Scheduler.h +++ b/include/framework/interface/Scheduler.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Scheduler.h * - * Copyright (C) 2023 Douglas B. Rumbaugh + * Copyright (C) 2023-2024 Douglas B. Rumbaugh * * Distributed under the Modified BSD License. * @@ -10,10 +10,11 @@ #include "framework/scheduling/Task.h" -template -concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) { - {S(i, i)}; - {s.schedule_job(j, i, vp, i)} -> std::convertible_to; - {s.shutdown()}; - {s.print_statistics()}; +template +concept SchedulerInterface = requires(SchedType s, size_t i, void *vp, + de::Job j) { + {SchedType(i, i)}; + {s.schedule_job(j, i, vp, i)} -> std::convertible_to; + {s.shutdown()}; + {s.print_statistics()}; }; diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h index c4a9180..bd980c0 100644 --- a/include/framework/interface/Shard.h +++ b/include/framework/interface/Shard.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Shard.h * - * Copyright (C) 2023 Douglas B. Rumbaugh + * Copyright (C) 2023-2024 Douglas B. Rumbaugh * * Distributed under the Modified BSD License. * @@ -12,25 +12,57 @@ namespace de { -template -concept ShardInterface = RecordInterface && requires(S s, std::vector spp, void *p, bool b, size_t i, BufferView bv, R r) { - {S(spp)}; - {S(std::move(bv))}; +template +concept ShardInterface = RecordInterface && + requires(SHARD shard, const std::vector &shard_vector, bool b, + BufferView bv, + typename SHARD::RECORD rec) { + /* construct a shard from a vector of shards of the same type */ + {SHARD(shard_vector)}; - {s.point_lookup(r, b) } -> std::same_as*>; - {s.get_data()} -> std::same_as*>; + /* construct a shard from a buffer view (i.e., unsorted array of records) */ + {SHARD(std::move(bv))}; + + /* perform a lookup for a record matching rec and return a pointer to it */ + { + shard.point_lookup(rec, b) + } -> std::same_as *>; + + /* + * return the number of records in the shard -- used to determine when + * reconstructions occur + */ + { shard.get_record_count() } -> std::convertible_to; + + /* + * return the number of tombstones in the shard -- can simply return + * 0 if tombstones are not in use. + */ + { shard.get_tombstone_count() } -> std::convertible_to; + + /* + * return the number of bytes of memory used by the main data structure + * within the shard -- informational use only at the moment + */ + { shard.get_memory_usage() } -> std::convertible_to; + + /* + * return the number of bytes of memory used by auxilliary data + * structures (bloom filters, etc.) within the shard -- informational + * use only at the moment + */ + { shard.get_aux_memory_usage() } -> std::convertible_to; - {s.get_record_count()} -> std::convertible_to; - {s.get_tombstone_count()} -> std::convertible_to; - {s.get_memory_usage()} -> std::convertible_to; - {s.get_aux_memory_usage()} -> std::convertible_to; }; -template -concept SortedShardInterface = ShardInterface && requires(S s, R r, R *rp, size_t i) { - {s.lower_bound(r)} -> std::convertible_to; - {s.upper_bound(r)} -> std::convertible_to; - {s.get_record_at(i)} -> std::same_as*>; +template +concept SortedShardInterface = ShardInterface && + requires(SHARD shard, typename SHARD::RECORD rec, size_t index) { + { shard.lower_bound(rec) } -> std::convertible_to; + { shard.upper_bound(rec) } -> std::convertible_to; + { + shard.get_record_at(index) + } -> std::same_as *>; }; -} +} // namespace de -- cgit v1.2.3