diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2023-07-27 18:21:26 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2023-07-27 18:21:26 -0400 |
| commit | d6e08e9d8d3ac9b356ac50cee22b41f828160247 (patch) | |
| tree | 2c1f62f6385072e1a824e557693634acc3136cdd /include/shard | |
| parent | f462921cbc52688fb478c6ac86a891e596fd6053 (diff) | |
| download | dynamic-extension-d6e08e9d8d3ac9b356ac50cee22b41f828160247.tar.gz | |
Expanded query interface
Query interface now enables skipping of delete processing and stopping
query processing when first match is found.
Diffstat (limited to 'include/shard')
| -rw-r--r-- | include/shard/Alex.h | 2 | ||||
| -rw-r--r-- | include/shard/MemISAM.h | 57 | ||||
| -rw-r--r-- | include/shard/PGM.h | 128 | ||||
| -rw-r--r-- | include/shard/TrieSpline.h | 48 | ||||
| -rw-r--r-- | include/shard/VPTree.h | 13 | ||||
| -rw-r--r-- | include/shard/WIRS.h | 10 | ||||
| -rw-r--r-- | include/shard/WSS.h | 10 |
7 files changed, 226 insertions, 42 deletions
diff --git a/include/shard/Alex.h b/include/shard/Alex.h index 9ba9666..be5222c 100644 --- a/include/shard/Alex.h +++ b/include/shard/Alex.h @@ -271,7 +271,7 @@ public: return res; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { return; } diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h index 3e3215f..3000a6a 100644 --- a/include/shard/MemISAM.h +++ b/include/shard/MemISAM.h @@ -378,6 +378,9 @@ template <RecordInterface R, bool Rejection=true> class IRSQuery { public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + static void *get_query_state(MemISAM<R> *isam, void *parms) { auto res = new IRSState<R>(); decltype(R::key) lower_key = ((irs_query_parms<R> *) parms)->lower_bound; @@ -418,7 +421,7 @@ public: return res; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { auto p = (irs_query_parms<R> *) query_parms; auto bs = (buff_state) ? (IRSBufferState<R> *) buff_state : nullptr; @@ -527,12 +530,12 @@ public: return result; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { std::vector<R> output; for (size_t i=0; i<results.size(); i++) { for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j]); + output.emplace_back(results[i][j].rec); } } @@ -554,6 +557,10 @@ public: template <RecordInterface R> class ISAMRangeQuery { public: + + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + static void *get_query_state(MemISAM<R> *ts, void *parms) { auto res = new ISAMRangeQueryState<R>(); auto p = (ISAMRangeQueryParms<R> *) parms; @@ -571,7 +578,7 @@ public: return res; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { return; } @@ -618,11 +625,25 @@ public: return records; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(results.size()); + + PriorityQueue<Wrapped<R>> pq(results.size()); size_t total = 0; - for (size_t i=0; i<results.size(); i++) { - total += results[i].size(); - } + size_t tmp_n = results.size(); + + + for (size_t i = 0; i < tmp_n; ++i) + if (results[i].size() > 0){ + auto base = results[i].data(); + cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); + assert(i == cursors.size() - 1); + total += results[i].size(); + pq.push(cursors[i].ptr, tmp_n - i - 1); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } if (total == 0) { return std::vector<R>(); @@ -631,8 +652,24 @@ public: std::vector<R> output; output.reserve(total); - for (size_t i=0; i<results.size(); i++) { - std::move(results[i].begin(), results[i].end(), std::back_inserter(output)); + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[tmp_n - now.version - 1]; + auto& cursor2 = cursors[tmp_n - next.version - 1]; + if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[tmp_n - now.version - 1]; + if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); + pq.pop(); + + if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); + } } return output; diff --git a/include/shard/PGM.h b/include/shard/PGM.h index 65d548e..aba5227 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -34,9 +34,17 @@ struct pgm_range_query_parms { }; template <RecordInterface R> +struct PGMPointLookupParms { + decltype(R::key) target_key; +}; + +template <RecordInterface R> class PGMRangeQuery; template <RecordInterface R> +class PGMPointLookup; + +template <RecordInterface R> struct PGMState { size_t start_idx; size_t stop_idx; @@ -45,12 +53,6 @@ struct PGMState { template <RecordInterface R> struct PGMBufferState { size_t cutoff; - Alias* alias; - - ~PGMBufferState() { - delete alias; - } - }; template <RecordInterface R, size_t epsilon=128> @@ -64,6 +66,7 @@ public: // FIXME: there has to be a better way to do this friend class PGMRangeQuery<R>; + friend class PGMPointLookup<R>; PGM(MutableBuffer<R>* buffer) : m_reccnt(0), m_tombstone_cnt(0) { @@ -274,11 +277,80 @@ private: pgm::PGMIndex<K, epsilon> m_pgm; BloomFilter<R> *m_bf; }; +template <RecordInterface R> +class PGMPointLookup { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + + static void *get_query_state(PGM<R> *ts, void *parms) { + return nullptr; + } + + static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { + return nullptr; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { + return; + } + + static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) { + std::vector<Wrapped<R>> records; + auto p = (PGMPointLookupParms<R> *) parms; + auto s = (PGMState<R> *) q_state; + + size_t idx = ts->get_lower_bound(p->target_key); + if (ts->get_record_at(idx)->rec.key == p->target_key) { + records.emplace_back(*ts->get_record_at(idx)); + } + + return records; + } + + static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { + auto p = (PGMPointLookupParms<R> *) parms; + auto s = (PGMBufferState<R> *) state; + + std::vector<Wrapped<R>> records; + for (size_t i=0; i<buffer->get_record_count(); i++) { + auto rec = buffer->get_data() + i; + if (rec->rec.key == p->target_key) { + records.emplace_back(*rec); + return records; + } + } + + return records; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<R> output; + for (size_t i=0 ;i<results.size(); i++) { + if (results[i].size() > 0) { + output.emplace_back(results[i][0].rec); + return output; + } + } + + return output; + } + + static void delete_query_state(void *state) { + } + + static void delete_buffer_query_state(void *state) { + } +}; + template <RecordInterface R> class PGMRangeQuery { public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + static void *get_query_state(PGM<R> *ts, void *parms) { auto res = new PGMState<R>(); auto p = (pgm_range_query_parms<R> *) parms; @@ -296,7 +368,7 @@ public: return res; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { return; } @@ -343,11 +415,25 @@ public: return records; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(results.size()); + + PriorityQueue<Wrapped<R>> pq(results.size()); size_t total = 0; - for (size_t i=0; i<results.size(); i++) { - total += results[i].size(); - } + size_t tmp_n = results.size(); + + + for (size_t i = 0; i < tmp_n; ++i) + if (results[i].size() > 0){ + auto base = results[i].data(); + cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); + assert(i == cursors.size() - 1); + total += results[i].size(); + pq.push(cursors[i].ptr, tmp_n - i - 1); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } if (total == 0) { return std::vector<R>(); @@ -356,8 +442,24 @@ public: std::vector<R> output; output.reserve(total); - for (size_t i=0; i<results.size(); i++) { - std::move(results[i].begin(), results[i].end(), std::back_inserter(output)); + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[tmp_n - now.version - 1]; + auto& cursor2 = cursors[tmp_n - next.version - 1]; + if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[tmp_n - now.version - 1]; + if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); + pq.pop(); + + if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); + } } return output; diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index f06756f..7d1a90d 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -300,6 +300,9 @@ private: template <RecordInterface R> class TrieSplineRangeQuery { public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + static void *get_query_state(TrieSpline<R> *ts, void *parms) { auto res = new TrieSplineState<R>(); auto p = (ts_range_query_parms<R> *) parms; @@ -317,7 +320,7 @@ public: return res; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { return; } @@ -367,11 +370,25 @@ public: return records; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(results.size()); + + PriorityQueue<Wrapped<R>> pq(results.size()); size_t total = 0; - for (size_t i=0; i<results.size(); i++) { - total += results[i].size(); - } + size_t tmp_n = results.size(); + + + for (size_t i = 0; i < tmp_n; ++i) + if (results[i].size() > 0){ + auto base = results[i].data(); + cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); + assert(i == cursors.size() - 1); + total += results[i].size(); + pq.push(cursors[i].ptr, tmp_n - i - 1); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } if (total == 0) { return std::vector<R>(); @@ -380,11 +397,28 @@ public: std::vector<R> output; output.reserve(total); - for (size_t i=0; i<results.size(); i++) { - std::move(results[i].begin(), results[i].end(), std::back_inserter(output)); + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[tmp_n - now.version - 1]; + auto& cursor2 = cursors[tmp_n - next.version - 1]; + if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[tmp_n - now.version - 1]; + if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); + pq.pop(); + + if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); + } } return output; + } static void delete_query_state(void *state) { diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h index 5f740dc..86f4ab7 100644 --- a/include/shard/VPTree.h +++ b/include/shard/VPTree.h @@ -424,6 +424,9 @@ private: template <NDRecordInterface R> class KNNQuery { public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + static void *get_query_state(VPTree<R> *wss, void *parms) { return nullptr; } @@ -432,7 +435,7 @@ public: return nullptr; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { return; } @@ -494,7 +497,7 @@ public: return results; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms; R rec = p->point; size_t k = p->k; @@ -503,14 +506,14 @@ public: for (size_t i=0; i<results.size(); i++) { for (size_t j=0; j<results[i].size(); j++) { if (pq.size() < k) { - pq.push(&results[i][j]); + pq.push(&results[i][j].rec); } else { double head_dist = pq.peek().data->calc_distance(rec); - double cur_dist = results[i][j].calc_distance(rec); + double cur_dist = results[i][j].rec.calc_distance(rec); if (cur_dist < head_dist) { pq.pop(); - pq.push(&results[i][j]); + pq.push(&results[i][j].rec); } } } diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h index 1a63092..fafdef0 100644 --- a/include/shard/WIRS.h +++ b/include/shard/WIRS.h @@ -370,6 +370,10 @@ private: template <WeightedRecordInterface R, bool Rejection=true> class WIRSQuery { public: + + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + static void *get_query_state(WIRS<R> *wirs, void *parms) { auto res = new WIRSState<R>(); decltype(R::key) lower_key = ((wirs_query_parms<R> *) parms)->lower_bound; @@ -440,7 +444,7 @@ public: return state; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { auto p = (wirs_query_parms<R> *) query_parms; auto bs = (WIRSBufferState<R> *) buff_state; @@ -549,12 +553,12 @@ public: return result; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { std::vector<R> output; for (size_t i=0; i<results.size(); i++) { for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j]); + output.emplace_back(results[i][j].rec); } } diff --git a/include/shard/WSS.h b/include/shard/WSS.h index 17e9eb9..435e799 100644 --- a/include/shard/WSS.h +++ b/include/shard/WSS.h @@ -286,6 +286,10 @@ private: template <WeightedRecordInterface R, bool Rejection=true> class WSSQuery { public: + + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + static void *get_query_state(WSS<R> *wss, void *parms) { auto res = new WSSState<R>(); res->total_weight = wss->m_total_weight; @@ -325,7 +329,7 @@ public: return state; } - static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) { + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { auto p = (wss_query_parms<R> *) query_parms; auto bs = (WSSBufferState<R> *) buff_state; @@ -415,12 +419,12 @@ public: return result; } - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { std::vector<R> output; for (size_t i=0; i<results.size(); i++) { for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j]); + output.emplace_back(results[i][j].rec); } } |