summaryrefslogtreecommitdiffstats
path: root/include/shard
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-07-27 18:21:26 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-07-27 18:21:26 -0400
commitd6e08e9d8d3ac9b356ac50cee22b41f828160247 (patch)
tree2c1f62f6385072e1a824e557693634acc3136cdd /include/shard
parentf462921cbc52688fb478c6ac86a891e596fd6053 (diff)
downloaddynamic-extension-d6e08e9d8d3ac9b356ac50cee22b41f828160247.tar.gz
Expanded query interface
Query interface now enables skipping of delete processing and stopping query processing when first match is found.
Diffstat (limited to 'include/shard')
-rw-r--r--include/shard/Alex.h2
-rw-r--r--include/shard/MemISAM.h57
-rw-r--r--include/shard/PGM.h128
-rw-r--r--include/shard/TrieSpline.h48
-rw-r--r--include/shard/VPTree.h13
-rw-r--r--include/shard/WIRS.h10
-rw-r--r--include/shard/WSS.h10
7 files changed, 226 insertions, 42 deletions
diff --git a/include/shard/Alex.h b/include/shard/Alex.h
index 9ba9666..be5222c 100644
--- a/include/shard/Alex.h
+++ b/include/shard/Alex.h
@@ -271,7 +271,7 @@ public:
return res;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
return;
}
diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index 3e3215f..3000a6a 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -378,6 +378,9 @@ template <RecordInterface R, bool Rejection=true>
class IRSQuery {
public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
static void *get_query_state(MemISAM<R> *isam, void *parms) {
auto res = new IRSState<R>();
decltype(R::key) lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
@@ -418,7 +421,7 @@ public:
return res;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
auto p = (irs_query_parms<R> *) query_parms;
auto bs = (buff_state) ? (IRSBufferState<R> *) buff_state : nullptr;
@@ -527,12 +530,12 @@ public:
return result;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
std::vector<R> output;
for (size_t i=0; i<results.size(); i++) {
for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j]);
+ output.emplace_back(results[i][j].rec);
}
}
@@ -554,6 +557,10 @@ public:
template <RecordInterface R>
class ISAMRangeQuery {
public:
+
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=true;
+
static void *get_query_state(MemISAM<R> *ts, void *parms) {
auto res = new ISAMRangeQueryState<R>();
auto p = (ISAMRangeQueryParms<R> *) parms;
@@ -571,7 +578,7 @@ public:
return res;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
return;
}
@@ -618,11 +625,25 @@ public:
return records;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<Cursor<Wrapped<R>>> cursors;
+ cursors.reserve(results.size());
+
+ PriorityQueue<Wrapped<R>> pq(results.size());
size_t total = 0;
- for (size_t i=0; i<results.size(); i++) {
- total += results[i].size();
- }
+ size_t tmp_n = results.size();
+
+
+ for (size_t i = 0; i < tmp_n; ++i)
+ if (results[i].size() > 0){
+ auto base = results[i].data();
+ cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
+ assert(i == cursors.size() - 1);
+ total += results[i].size();
+ pq.push(cursors[i].ptr, tmp_n - i - 1);
+ } else {
+ cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+ }
if (total == 0) {
return std::vector<R>();
@@ -631,8 +652,24 @@ public:
std::vector<R> output;
output.reserve(total);
- for (size_t i=0; i<results.size(); i++) {
- std::move(results[i].begin(), results[i].end(), std::back_inserter(output));
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop(); pq.pop();
+ auto& cursor1 = cursors[tmp_n - now.version - 1];
+ auto& cursor2 = cursors[tmp_n - next.version - 1];
+ if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+ if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+ } else {
+ auto& cursor = cursors[tmp_n - now.version - 1];
+ if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
+ pq.pop();
+
+ if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+ }
}
return output;
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 65d548e..aba5227 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -34,9 +34,17 @@ struct pgm_range_query_parms {
};
template <RecordInterface R>
+struct PGMPointLookupParms {
+ decltype(R::key) target_key;
+};
+
+template <RecordInterface R>
class PGMRangeQuery;
template <RecordInterface R>
+class PGMPointLookup;
+
+template <RecordInterface R>
struct PGMState {
size_t start_idx;
size_t stop_idx;
@@ -45,12 +53,6 @@ struct PGMState {
template <RecordInterface R>
struct PGMBufferState {
size_t cutoff;
- Alias* alias;
-
- ~PGMBufferState() {
- delete alias;
- }
-
};
template <RecordInterface R, size_t epsilon=128>
@@ -64,6 +66,7 @@ public:
// FIXME: there has to be a better way to do this
friend class PGMRangeQuery<R>;
+ friend class PGMPointLookup<R>;
PGM(MutableBuffer<R>* buffer)
: m_reccnt(0), m_tombstone_cnt(0) {
@@ -274,11 +277,80 @@ private:
pgm::PGMIndex<K, epsilon> m_pgm;
BloomFilter<R> *m_bf;
};
+template <RecordInterface R>
+class PGMPointLookup {
+public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
+ static void *get_query_state(PGM<R> *ts, void *parms) {
+ return nullptr;
+ }
+
+ static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
+ return nullptr;
+ }
+
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
+ return;
+ }
+
+ static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) {
+ std::vector<Wrapped<R>> records;
+ auto p = (PGMPointLookupParms<R> *) parms;
+ auto s = (PGMState<R> *) q_state;
+
+ size_t idx = ts->get_lower_bound(p->target_key);
+ if (ts->get_record_at(idx)->rec.key == p->target_key) {
+ records.emplace_back(*ts->get_record_at(idx));
+ }
+
+ return records;
+ }
+
+ static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
+ auto p = (PGMPointLookupParms<R> *) parms;
+ auto s = (PGMBufferState<R> *) state;
+
+ std::vector<Wrapped<R>> records;
+ for (size_t i=0; i<buffer->get_record_count(); i++) {
+ auto rec = buffer->get_data() + i;
+ if (rec->rec.key == p->target_key) {
+ records.emplace_back(*rec);
+ return records;
+ }
+ }
+
+ return records;
+ }
+
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<R> output;
+ for (size_t i=0 ;i<results.size(); i++) {
+ if (results[i].size() > 0) {
+ output.emplace_back(results[i][0].rec);
+ return output;
+ }
+ }
+
+ return output;
+ }
+
+ static void delete_query_state(void *state) {
+ }
+
+ static void delete_buffer_query_state(void *state) {
+ }
+};
+
template <RecordInterface R>
class PGMRangeQuery {
public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
static void *get_query_state(PGM<R> *ts, void *parms) {
auto res = new PGMState<R>();
auto p = (pgm_range_query_parms<R> *) parms;
@@ -296,7 +368,7 @@ public:
return res;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
return;
}
@@ -343,11 +415,25 @@ public:
return records;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<Cursor<Wrapped<R>>> cursors;
+ cursors.reserve(results.size());
+
+ PriorityQueue<Wrapped<R>> pq(results.size());
size_t total = 0;
- for (size_t i=0; i<results.size(); i++) {
- total += results[i].size();
- }
+ size_t tmp_n = results.size();
+
+
+ for (size_t i = 0; i < tmp_n; ++i)
+ if (results[i].size() > 0){
+ auto base = results[i].data();
+ cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
+ assert(i == cursors.size() - 1);
+ total += results[i].size();
+ pq.push(cursors[i].ptr, tmp_n - i - 1);
+ } else {
+ cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+ }
if (total == 0) {
return std::vector<R>();
@@ -356,8 +442,24 @@ public:
std::vector<R> output;
output.reserve(total);
- for (size_t i=0; i<results.size(); i++) {
- std::move(results[i].begin(), results[i].end(), std::back_inserter(output));
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop(); pq.pop();
+ auto& cursor1 = cursors[tmp_n - now.version - 1];
+ auto& cursor2 = cursors[tmp_n - next.version - 1];
+ if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+ if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+ } else {
+ auto& cursor = cursors[tmp_n - now.version - 1];
+ if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
+ pq.pop();
+
+ if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+ }
}
return output;
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index f06756f..7d1a90d 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -300,6 +300,9 @@ private:
template <RecordInterface R>
class TrieSplineRangeQuery {
public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=true;
+
static void *get_query_state(TrieSpline<R> *ts, void *parms) {
auto res = new TrieSplineState<R>();
auto p = (ts_range_query_parms<R> *) parms;
@@ -317,7 +320,7 @@ public:
return res;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
return;
}
@@ -367,11 +370,25 @@ public:
return records;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<Cursor<Wrapped<R>>> cursors;
+ cursors.reserve(results.size());
+
+ PriorityQueue<Wrapped<R>> pq(results.size());
size_t total = 0;
- for (size_t i=0; i<results.size(); i++) {
- total += results[i].size();
- }
+ size_t tmp_n = results.size();
+
+
+ for (size_t i = 0; i < tmp_n; ++i)
+ if (results[i].size() > 0){
+ auto base = results[i].data();
+ cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
+ assert(i == cursors.size() - 1);
+ total += results[i].size();
+ pq.push(cursors[i].ptr, tmp_n - i - 1);
+ } else {
+ cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+ }
if (total == 0) {
return std::vector<R>();
@@ -380,11 +397,28 @@ public:
std::vector<R> output;
output.reserve(total);
- for (size_t i=0; i<results.size(); i++) {
- std::move(results[i].begin(), results[i].end(), std::back_inserter(output));
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop(); pq.pop();
+ auto& cursor1 = cursors[tmp_n - now.version - 1];
+ auto& cursor2 = cursors[tmp_n - next.version - 1];
+ if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+ if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+ } else {
+ auto& cursor = cursors[tmp_n - now.version - 1];
+ if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
+ pq.pop();
+
+ if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+ }
}
return output;
+
}
static void delete_query_state(void *state) {
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 5f740dc..86f4ab7 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -424,6 +424,9 @@ private:
template <NDRecordInterface R>
class KNNQuery {
public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=true;
+
static void *get_query_state(VPTree<R> *wss, void *parms) {
return nullptr;
}
@@ -432,7 +435,7 @@ public:
return nullptr;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
return;
}
@@ -494,7 +497,7 @@ public:
return results;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms;
R rec = p->point;
size_t k = p->k;
@@ -503,14 +506,14 @@ public:
for (size_t i=0; i<results.size(); i++) {
for (size_t j=0; j<results[i].size(); j++) {
if (pq.size() < k) {
- pq.push(&results[i][j]);
+ pq.push(&results[i][j].rec);
} else {
double head_dist = pq.peek().data->calc_distance(rec);
- double cur_dist = results[i][j].calc_distance(rec);
+ double cur_dist = results[i][j].rec.calc_distance(rec);
if (cur_dist < head_dist) {
pq.pop();
- pq.push(&results[i][j]);
+ pq.push(&results[i][j].rec);
}
}
}
diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
index 1a63092..fafdef0 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/WIRS.h
@@ -370,6 +370,10 @@ private:
template <WeightedRecordInterface R, bool Rejection=true>
class WIRSQuery {
public:
+
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
static void *get_query_state(WIRS<R> *wirs, void *parms) {
auto res = new WIRSState<R>();
decltype(R::key) lower_key = ((wirs_query_parms<R> *) parms)->lower_bound;
@@ -440,7 +444,7 @@ public:
return state;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
auto p = (wirs_query_parms<R> *) query_parms;
auto bs = (WIRSBufferState<R> *) buff_state;
@@ -549,12 +553,12 @@ public:
return result;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
std::vector<R> output;
for (size_t i=0; i<results.size(); i++) {
for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j]);
+ output.emplace_back(results[i][j].rec);
}
}
diff --git a/include/shard/WSS.h b/include/shard/WSS.h
index 17e9eb9..435e799 100644
--- a/include/shard/WSS.h
+++ b/include/shard/WSS.h
@@ -286,6 +286,10 @@ private:
template <WeightedRecordInterface R, bool Rejection=true>
class WSSQuery {
public:
+
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
static void *get_query_state(WSS<R> *wss, void *parms) {
auto res = new WSSState<R>();
res->total_weight = wss->m_total_weight;
@@ -325,7 +329,7 @@ public:
return state;
}
- static void process_query_states(void *query_parms, std::vector<void*> shard_states, void *buff_state) {
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
auto p = (wss_query_parms<R> *) query_parms;
auto bs = (WSSBufferState<R> *) buff_state;
@@ -415,12 +419,12 @@ public:
return result;
}
- static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
std::vector<R> output;
for (size_t i=0; i<results.size(); i++) {
for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j]);
+ output.emplace_back(results[i][j].rec);
}
}