From fb4312a883dd0e382ecbcfe1119479e6f44d32a6 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 22 Mar 2024 15:35:14 -0400 Subject: PointLookup: added a point lookup query for unique indexes, and some tests --- include/query/pointlookup.h | 117 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 include/query/pointlookup.h (limited to 'include/query') diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h new file mode 100644 index 0000000..caaa320 --- /dev/null +++ b/include/query/pointlookup.h @@ -0,0 +1,117 @@ +/* + * include/query/pointlookup.h + * + * Copyright (C) 2024 Douglas B. Rumbaugh + * + * Distributed under the Modified BSD License. + * + * A query class for point lookup operations. + * + * TODO: Currently, this only supports point lookups for unique keys (which + * is the case for the trie that we're building this to use). It would be + * pretty straightforward to extend it to return *all* records that match + * the search_key (including tombstone cancellation--it's invertible) to + * support non-unique indexes, or at least those implementing + * lower_bound(). + */ +#pragma once + +#include "framework/QueryRequirements.h" + +namespace de { namespace pl { + +template +struct Parms { + decltype(R::key) search_key; +}; + +template +struct State { +}; + +template +struct BufferState { + BufferView *buffer; + + BufferState(BufferView *buffer) + : buffer(buffer) {} +}; + +template S> +class Query { +public: + constexpr static bool EARLY_ABORT=true; + constexpr static bool SKIP_DELETE_FILTER=true; + + static void *get_query_state(S *shard, void *parms) { + return nullptr; + } + + static void* get_buffer_query_state(BufferView *buffer, void *parms) { + auto res = new BufferState(buffer); + + return res; + } + + static void process_query_states(void *query_parms, std::vector &shard_states, void* buffer_state) { + return; + } + + static std::vector> query(S *shard, void *q_state, void *parms) { + auto p = (Parms *) parms; + auto s = (State *) q_state; + + std::vector> result; + + auto r = shard->point_lookup({p->search_key, 0}); + + if (r) { + result.push_back(*r); + } + + return result; + } + + static std::vector> buffer_query(void *state, void *parms) { + auto p = (Parms *) parms; + auto s = (BufferState *) state; + + std::vector> records; + for (size_t i=0; ibuffer->get_record_count(); i++) { + auto rec = s->buffer->get(i); + + if (rec->rec.key == p->search_key) { + records.push_back(*rec); + return records; + } + } + + return records; + } + + static std::vector merge(std::vector>> &results, void *parms) { + std::vector output; + for (auto r : results) { + if (r.size() > 0) { + if (r[0].is_deleted() || r[0].is_tombstone()) { + return output; + } + + output.append(r[0].rec); + return output; + } + } + } + + static void delete_query_state(void *state) { + auto s = (State *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState *) state; + delete s; + } +}; + +}} -- cgit v1.2.3 From b1b5ab106122e6917f6b34452be95e617506f05d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 25 Mar 2024 12:54:17 -0400 Subject: Updates for build on OpenBSD Necessary updates to get the codebase building under OpenBSD 7.5 with clang. This is a minimal set of changes to get building to work, which includes disabling several things that aren't directly compatable. More work will be necessary to get full functionality. In particular, Triespline, PGM, and the reference M-tree do not currently build on OpenBSD with clang due to GNU dependencies or other gcc specific features. --- include/query/rangequery.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/query') diff --git a/include/query/rangequery.h b/include/query/rangequery.h index 24b38ec..e6ab581 100644 --- a/include/query/rangequery.h +++ b/include/query/rangequery.h @@ -121,7 +121,7 @@ public: for (size_t i = 0; i < tmp_n; ++i) if (results[i].size() > 0){ auto base = results[i].data(); - cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); + cursors.emplace_back(Cursor>{base, base + results[i].size(), 0, results[i].size()}); assert(i == cursors.size() - 1); total += results[i].size(); pq.push(cursors[i].ptr, tmp_n - i - 1); -- cgit v1.2.3 From 7e7fd9f7339eee2f1ae974c662a447532dfb1b1a Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Tue, 26 Mar 2024 16:35:12 -0400 Subject: Updated FSTrie benchmark and some minor fixes --- include/query/pointlookup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/query') diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h index caaa320..35d38e3 100644 --- a/include/query/pointlookup.h +++ b/include/query/pointlookup.h @@ -97,10 +97,12 @@ public: return output; } - output.append(r[0].rec); + output.push_back(r[0].rec); return output; } } + + return output; } static void delete_query_state(void *state) { -- cgit v1.2.3 From 8479f3ce863dfb6d3b20ff4678fa6fe92ee86b52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 16:50:18 -0400 Subject: Fixed some benchmarking bugs --- include/query/irs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/query') diff --git a/include/query/irs.h b/include/query/irs.h index e2d9325..51eb4e2 100644 --- a/include/query/irs.h +++ b/include/query/irs.h @@ -103,7 +103,7 @@ public: weights.push_back((bs) ? bs->records.size() : 0); } - size_t total_weight = 0; + size_t total_weight = weights[0]; for (auto &s : shard_states) { auto state = (State *) s; total_weight += state->total_weight; -- cgit v1.2.3 From 438feac7e56fee425d9c6f1a43298ff9dc5b71d1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 17:38:16 -0400 Subject: Properly implemented support for iteratively decomposable problems --- include/query/irs.h | 85 +++++++++++++++++++++++++++++---------------- include/query/knn.h | 7 ++-- include/query/pointlookup.h | 8 +++-- include/query/rangecount.h | 8 +++-- include/query/rangequery.h | 7 ++-- include/query/wirs.h | 13 +++++-- include/query/wss.h | 13 +++++-- 7 files changed, 97 insertions(+), 44 deletions(-) (limited to 'include/query') diff --git a/include/query/irs.h b/include/query/irs.h index 51eb4e2..879d070 100644 --- a/include/query/irs.h +++ b/include/query/irs.h @@ -40,7 +40,12 @@ struct BufferState { size_t sample_size; BufferView *buffer; + psudb::Alias *alias; + BufferState(BufferView *buffer) : buffer(buffer) {} + ~BufferState() { + delete alias; + } }; template S, bool Rejection=true> @@ -72,6 +77,7 @@ public: res->cutoff = res->buffer->get_record_count(); res->sample_size = 0; + res->alias = nullptr; if constexpr (Rejection) { return res; @@ -96,39 +102,51 @@ public: std::vector shard_sample_sizes(shard_states.size()+1, 0); size_t buffer_sz = 0; - std::vector weights; - if constexpr (Rejection) { - weights.push_back((bs) ? bs->cutoff : 0); - } else { - weights.push_back((bs) ? bs->records.size() : 0); + /* for simplicity of static structure testing */ + if (!bs) { + assert(shard_states.size() == 1); + auto state = (State *) shard_states[0]; + state->sample_size = p->sample_size; + return; } - size_t total_weight = weights[0]; - for (auto &s : shard_states) { - auto state = (State *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } + /* we only need to build the shard alias on the first call */ + if (bs->alias == nullptr) { + std::vector weights; + if constexpr (Rejection) { + weights.push_back((bs) ? bs->cutoff : 0); + } else { + weights.push_back((bs) ? bs->records.size() : 0); + } - // if no valid records fall within the query range, just - // set all of the sample sizes to 0 and bail out. - if (total_weight == 0) { - for (size_t i=0; i *) shard_states[i]; - state->sample_size = 0; + size_t total_weight = weights[0]; + for (auto &s : shard_states) { + auto state = (State *) s; + total_weight += state->total_weight; + weights.push_back(state->total_weight); } - return; - } + // if no valid records fall within the query range, just + // set all of the sample sizes to 0 and bail out. + if (total_weight == 0) { + for (size_t i=0; i *) shard_states[i]; + state->sample_size = 0; + } - std::vector normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); + return; + } + + std::vector normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double) w / (double) total_weight); + } + + bs->alias = new psudb::Alias(normalized_weights); } - auto shard_alias = psudb::Alias(normalized_weights); for (size_t i=0; isample_size; i++) { - auto idx = shard_alias.get(p->rng); + auto idx = bs->alias->get(p->rng); if (idx == 0) { buffer_sz++; } else { @@ -198,16 +216,12 @@ public: return result; } - static std::vector merge(std::vector>> &results, void *parms) { - std::vector output; - + static void merge(std::vector>> &results, void *parms, std::vector &output) { for (size_t i=0; i *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + auto p = (Parms *) parms; + + if (results.size() < p->sample_size) { + auto q = *p; + q.sample_size -= results.size(); + process_query_states(&q, states, buffer_state); + return true; + } + + return false; + } }; }} diff --git a/include/query/knn.h b/include/query/knn.h index 19dcf5c..c856a74 100644 --- a/include/query/knn.h +++ b/include/query/knn.h @@ -114,7 +114,7 @@ public: return results; } - static std::vector merge(std::vector>> &results, void *parms) { + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { Parms *p = (Parms *) parms; R rec = p->point; size_t k = p->k; @@ -136,7 +136,6 @@ public: } } - std::vector output; while (pq.size() > 0) { output.emplace_back(*pq.peek().data); pq.pop(); @@ -154,6 +153,10 @@ public: auto s = (BufferState *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + return false; + } }; }} diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h index 35d38e3..94c2bce 100644 --- a/include/query/pointlookup.h +++ b/include/query/pointlookup.h @@ -89,8 +89,7 @@ public: return records; } - static std::vector merge(std::vector>> &results, void *parms) { - std::vector output; + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { for (auto r : results) { if (r.size() > 0) { if (r[0].is_deleted() || r[0].is_tombstone()) { @@ -114,6 +113,11 @@ public: auto s = (BufferState *) state; delete s; } + + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + return false; + } }; }} diff --git a/include/query/rangecount.h b/include/query/rangecount.h index 6c57809..c20feaa 100644 --- a/include/query/rangecount.h +++ b/include/query/rangecount.h @@ -134,12 +134,10 @@ public: return records; } - static std::vector merge(std::vector>> &results, void *parms) { - + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { R res; res.key = 0; res.value = 0; - std::vector output; output.emplace_back(res); for (size_t i=0; i *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + return false; + } }; }} diff --git a/include/query/rangequery.h b/include/query/rangequery.h index e6ab581..e0690e6 100644 --- a/include/query/rangequery.h +++ b/include/query/rangequery.h @@ -109,7 +109,7 @@ public: return records; } - static std::vector merge(std::vector>> &results, void *parms) { + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { std::vector>> cursors; cursors.reserve(results.size()); @@ -133,7 +133,6 @@ public: return std::vector(); } - std::vector output; output.reserve(total); while (pq.size()) { @@ -169,6 +168,10 @@ public: auto s = (BufferState *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + return false; + } }; }} diff --git a/include/query/wirs.h b/include/query/wirs.h index ae82194..62b43f6 100644 --- a/include/query/wirs.h +++ b/include/query/wirs.h @@ -219,9 +219,7 @@ public: return result; } - static std::vector merge(std::vector>> &results, void *parms) { - std::vector output; - + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { for (size_t i=0; i *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + auto p = (Parms *) parms; + + if (results.size() < p->sample_size) { + return true; + } + return false; + } }; }} diff --git a/include/query/wss.h b/include/query/wss.h index 8797035..fb0b414 100644 --- a/include/query/wss.h +++ b/include/query/wss.h @@ -183,9 +183,7 @@ public: return result; } - static std::vector merge(std::vector>> &results, void *parms) { - std::vector output; - + static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { for (size_t i=0; i *) state; delete s; } + + static bool repeat(void *parms, std::vector &results, std::vector states, void* buffer_state) { + auto p = (Parms *) parms; + + if (results.size() < p->sample_size) { + return true; + } + return false; + } }; }} -- cgit v1.2.3 From 5576c5524b48e43e4d6070c28de7c3c66582ed97 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:05:07 -0400 Subject: Query optimizations --- include/query/knn.h | 4 ++-- include/query/rangecount.h | 42 ++++++++++++++++++++---------------------- 2 files changed, 22 insertions(+), 24 deletions(-) (limited to 'include/query') diff --git a/include/query/knn.h b/include/query/knn.h index c856a74..a227293 100644 --- a/include/query/knn.h +++ b/include/query/knn.h @@ -111,7 +111,7 @@ public: pq.pop(); } - return results; + return std::move(results); } static std::vector merge(std::vector>> &results, void *parms, std::vector &output) { @@ -141,7 +141,7 @@ public: pq.pop(); } - return output; + return std::move(output); } static void delete_query_state(void *state) { diff --git a/include/query/rangecount.h b/include/query/rangecount.h index c20feaa..5a18ed4 100644 --- a/include/query/rangecount.h +++ b/include/query/rangecount.h @@ -42,13 +42,7 @@ public: constexpr static bool SKIP_DELETE_FILTER=true; static void *get_query_state(S *shard, void *parms) { - auto res = new State(); - auto p = (Parms *) parms; - - res->start_idx = shard->get_lower_bound(p->lower_bound); - res->stop_idx = shard->get_record_count(); - - return res; + return nullptr; } static void* get_buffer_query_state(BufferView *buffer, void *parms) { @@ -74,37 +68,43 @@ public: res.rec.value = 0; // tombstones records.emplace_back(res); + + auto start_idx = shard->get_lower_bound(p->lower_bound); + auto stop_idx = shard->get_lower_bound(p->upper_bound); + /* * if the returned index is one past the end of the * records for the PGM, then there are not records * in the index falling into the specified range. */ - if (s->start_idx == shard->get_record_count()) { + if (start_idx == shard->get_record_count()) { return records; } - auto ptr = shard->get_record_at(s->start_idx); /* * roll the pointer forward to the first record that is * greater than or equal to the lower bound. */ - while(ptr < shard->get_data() + s->stop_idx && ptr->rec.key < p->lower_bound) { - ptr++; + auto recs = shard->get_data(); + while(start_idx < stop_idx && recs[start_idx].rec.key < p->lower_bound) { + start_idx++; } - while (ptr < shard->get_data() + s->stop_idx && ptr->rec.key <= p->upper_bound) { - if (!ptr->is_deleted()) { - if (ptr->is_tombstone()) { - records[0].rec.value++; - } else { - records[0].rec.key++; - } - } + while (stop_idx < shard->get_record_count() && recs[stop_idx].rec.key <= p->upper_bound) { + stop_idx++; + } + size_t idx = start_idx; + size_t ts_cnt = 0; - ptr++; + while (idx < stop_idx) { + ts_cnt += recs[idx].is_tombstone() * 2 + recs[idx].is_deleted(); + idx++; } + records[0].rec.key = idx - start_idx; + records[0].rec.value = ts_cnt; + return records; } @@ -150,8 +150,6 @@ public: } static void delete_query_state(void *state) { - auto s = (State *) state; - delete s; } static void delete_buffer_query_state(void *state) { -- cgit v1.2.3 From a23bc3341923509be9b2f587ece8cd5a650f6386 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 8 May 2024 13:20:44 -0400 Subject: TSParmsweep: enabled forcing a full buffer scan --- include/query/rangecount.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/query') diff --git a/include/query/rangecount.h b/include/query/rangecount.h index 5a18ed4..5b95cdd 100644 --- a/include/query/rangecount.h +++ b/include/query/rangecount.h @@ -35,7 +35,7 @@ struct BufferState { : buffer(buffer) {} }; -template S> +template S, bool FORCE_SCAN=false> class Query { public: constexpr static bool EARLY_ABORT=false; @@ -119,8 +119,16 @@ public: res.rec.value = 0; // tombstones records.emplace_back(res); + size_t stop_idx; + if constexpr (FORCE_SCAN) { + stop_idx = s->buffer->get_capacity() / 2; + } else { + stop_idx = s->buffer->get_record_count(); + } + for (size_t i=0; ibuffer->get_record_count(); i++) { auto rec = s->buffer->get(i); + if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound && !rec->is_deleted()) { if (rec->is_tombstone()) { -- cgit v1.2.3