summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-04-30 14:18:08 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2024-04-30 14:18:08 -0400
commit47a386b50d904d3f1b7ce3cfc13c29ea96dd1e43 (patch)
treee37b21e18dd9977e4ba90af46605ad3b5c928b45 /benchmarks
parentc61164545f4c113fb17eb993e393bbf97373cfb3 (diff)
downloaddynamic-extension-47a386b50d904d3f1b7ce3cfc13c29ea96dd1e43.tar.gz
Added VPTree BSM benchmark
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/include/standard_benchmarks.h39
-rw-r--r--benchmarks/include/triespline_bsm.h3
-rw-r--r--benchmarks/include/vptree_bsm.h317
-rw-r--r--benchmarks/vldb/mtree_bench.cpp2
-rw-r--r--benchmarks/vldb/vptree_bench.cpp6
5 files changed, 362 insertions, 5 deletions
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
index aaef679..76b5f7c 100644
--- a/benchmarks/include/standard_benchmarks.h
+++ b/benchmarks/include/standard_benchmarks.h
@@ -34,6 +34,14 @@ static void run_queries(DE *extension, std::vector<QP> &queries) {
result.emplace_back(itr->data);
itr++;
}
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (auto &r : result) {
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ #endif
} else if constexpr (std::is_same_v<PGM, DE>) {
size_t tot = 0;
auto ptr = extension->find(queries[i].lower_bound);
@@ -44,7 +52,26 @@ static void run_queries(DE *extension, std::vector<QP> &queries) {
} else {
auto res = extension->query(&queries[i]);
if constexpr (!BSM) {
- auto r = res.get();
+ auto result = res.get();
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=result.size()-1; i>=0; i--) {
+ auto &r = result[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ } else {
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
}
}
}
@@ -73,6 +100,16 @@ static void run_static_queries(S *shard, std::vector<QP> &queries) {
Q::process_query_states(q, states, nullptr);
auto res = Q::query(shard, state, q);
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i].rec;
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
}
}
diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h
index 4276074..dfddc43 100644
--- a/benchmarks/include/triespline_bsm.h
+++ b/benchmarks/include/triespline_bsm.h
@@ -2,6 +2,7 @@
#include <cstdlib>
#include <vector>
+#include <algorithm>
#include "ts/builder.h"
template <typename K, typename V, size_t E=1024>
@@ -52,7 +53,7 @@ public:
return std::move(rs);
}
- std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb) {
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void *parms) {
rsa[0].first += rsb[0].first;
return std::move(rsa);
}
diff --git a/benchmarks/include/vptree_bsm.h b/benchmarks/include/vptree_bsm.h
new file mode 100644
index 0000000..2f12fcb
--- /dev/null
+++ b/benchmarks/include/vptree_bsm.h
@@ -0,0 +1,317 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <limits>
+#include <gsl/gsl_rng.h>
+
+#include "psu-ds/PriorityQueue.h"
+#include "framework/interface/Record.h"
+
+template <typename R, size_t LEAFSZ=100>
+class BSMVPTree {
+public:
+ struct KNNQueryParms {
+ R point;
+ size_t k;
+ };
+
+public:
+ static BSMVPTree *build(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ static BSMVPTree *build_presorted(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ return rs;
+ }
+
+ auto parms = (BSMVPTree::KNNQueryParms*) q;
+ auto pq = psudb::PriorityQueue<R, de::DistCmpMax<R>>(parms->k, &parms->point);
+
+ if (parms->k >= m_data.size()) {
+ for (size_t i=0; i<m_data.size(); i++) {
+ if (m_ptrs[i].ptr != nullptr) {
+ pq.push(m_ptrs[i].ptr);
+ }
+ }
+ } else {
+ double farthest = std::numeric_limits<double>::max();
+ internal_search(m_root, parms->point, parms->k, pq, &farthest);
+ }
+
+ size_t i=0;
+ while (pq.size() > 0) {
+ rs.push_back(*pq.peek().data);
+ pq.pop();
+ }
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void* parms) {
+ KNNQueryParms *p = (KNNQueryParms *) parms;
+ R rec = p->point;
+ size_t k = p->k;
+
+ std::vector<R> output;
+
+ psudb::PriorityQueue<R, de::DistCmpMax<R>> pq(k, &rec);
+
+ for (size_t i=0; i<rsa.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsa[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsa[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsa[i]);
+ }
+ }
+ }
+
+ for (size_t i=0; i<rsb.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsb[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsb[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsb[i]);
+ }
+ }
+ }
+
+ while (pq.size() > 0) {
+ output.emplace_back(*pq.peek().data);
+ pq.pop();
+ }
+
+ return std::move(output);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+ ~BSMVPTree() {
+ delete m_root;
+ }
+
+private:
+
+ struct vp_ptr {
+ R *ptr;
+ double dist;
+ };
+
+ struct vpnode {
+ size_t start;
+ size_t stop;
+ bool leaf;
+
+ double radius;
+ vpnode *inside;
+ vpnode *outside;
+
+ vpnode() : start(0), stop(0), leaf(false), radius(0.0), inside(nullptr), outside(nullptr) {}
+
+ ~vpnode() {
+ delete inside;
+ delete outside;
+ }
+ };
+
+ std::vector<R> m_data;
+ std::vector<vp_ptr> m_ptrs;
+ vpnode *m_root;
+
+ size_t m_node_cnt;
+
+ BSMVPTree(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_node_cnt = 0;
+
+ for (size_t i=0; i<m_data.size(); i++) {
+ m_ptrs.push_back({&m_data[i], 0});
+ }
+
+ m_root = build_vptree();
+ }
+
+ vpnode *build_vptree() {
+ if (m_data.size() == 0) {
+ return nullptr;
+ }
+
+ size_t lower = 0;
+ size_t upper = m_data.size() - 1;
+
+ auto rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto root = build_subtree(lower, upper, rng);
+ gsl_rng_free(rng);
+ return root;
+ }
+
+ vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) {
+ /*
+ * base-case: sometimes happens (probably because of the +1 and -1
+ * in the first recursive call)
+ */
+ if (start > stop) {
+ return nullptr;
+ }
+
+ /* base-case: create a leaf node */
+ if (stop - start <= LEAFSZ) {
+ vpnode *node = new vpnode();
+ node->start = start;
+ node->stop = stop;
+ node->leaf = true;
+
+ m_node_cnt++;
+ return node;
+ }
+
+ /*
+ * select a random element to be the root of the
+ * subtree
+ */
+ auto i = start + gsl_rng_uniform_int(rng, stop - start + 1);
+ swap(start, i);
+
+ /* for efficiency, we'll pre-calculate the distances between each point and the root */
+ for (size_t i=start+1; i<=stop; i++) {
+ m_ptrs[i].dist = m_ptrs[start].ptr->calc_distance(*m_ptrs[i].ptr);
+ }
+
+ /*
+ * partition elements based on their distance from the start,
+ * with those elements with distance falling below the median
+ * distance going into the left sub-array and those above
+ * the median in the right. This is easily done using QuickSelect.
+ */
+ auto mid = (start + 1 + stop) / 2;
+ quickselect(start + 1, stop, mid, m_ptrs[start].ptr, rng);
+
+ /* Create a new node based on this partitioning */
+ vpnode *node = new vpnode();
+ node->start = start;
+
+ /* store the radius of the circle used for partitioning the node. */
+ node->radius = m_ptrs[start].ptr->calc_distance(*m_ptrs[mid].ptr);
+ m_ptrs[start].dist = node->radius;
+
+ /* recursively construct the left and right subtrees */
+ node->inside = build_subtree(start + 1, mid-1, rng);
+ node->outside = build_subtree(mid, stop, rng);
+
+ m_node_cnt++;
+
+ return node;
+ }
+
+ void quickselect(size_t start, size_t stop, size_t k, R *p, gsl_rng *rng) {
+ if (start == stop) return;
+
+ auto pivot = partition(start, stop, p, rng);
+
+ if (k < pivot) {
+ quickselect(start, pivot - 1, k, p, rng);
+ } else if (k > pivot) {
+ quickselect(pivot + 1, stop, k, p, rng);
+ }
+ }
+
+ size_t partition(size_t start, size_t stop, R *p, gsl_rng *rng) {
+ auto pivot = start + gsl_rng_uniform_int(rng, stop - start);
+
+ swap(pivot, stop);
+
+ size_t j = start;
+ for (size_t i=start; i<stop; i++) {
+ if (m_ptrs[i].dist < m_ptrs[stop].dist) {
+ swap(j++, i);
+ }
+ }
+
+ swap(j, stop);
+ return j;
+ }
+
+ void swap(size_t idx1, size_t idx2) {
+ auto tmp = m_ptrs[idx1];
+ m_ptrs[idx1] = m_ptrs[idx2];
+ m_ptrs[idx2] = tmp;
+ }
+
+ void internal_search(vpnode *node, const R &point, size_t k, psudb::PriorityQueue<R,
+ de::DistCmpMax<R>> &pq, double *farthest) {
+
+ if (node == nullptr) return;
+
+ if (node->leaf) {
+ for (size_t i=node->start; i<=node->stop; i++) {
+ double d = point.calc_distance(*m_ptrs[i].ptr);
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ pq.pop();
+ }
+
+ pq.push(m_ptrs[i].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+ }
+
+ return;
+ }
+
+ double d = point.calc_distance(*m_ptrs[node->start].ptr);
+
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ auto t = pq.peek().data;
+ pq.pop();
+ }
+ pq.push(m_ptrs[node->start].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+
+ if (d < node->radius) {
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+ } else {
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+ }
+ }
+};
diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp
index 35f56be..60425da 100644
--- a/benchmarks/vldb/mtree_bench.cpp
+++ b/benchmarks/vldb/mtree_bench.cpp
@@ -46,7 +46,7 @@ int main(int argc, char **argv) {
}
}
fprintf(stderr, "[I] Reading Queries\n");
- auto queries = read_knn_queries<QP>(q_fname, 10);
+ auto queries = read_knn_queries<QP>(q_fname, 1000);
fprintf(stderr, "[I] Warming up structure...\n");
/* warmup structure w/ 10% of records */
diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp
index b17a57b..0b98a52 100644
--- a/benchmarks/vldb/vptree_bench.cpp
+++ b/benchmarks/vldb/vptree_bench.cpp
@@ -38,7 +38,7 @@ int main(int argc, char **argv) {
std::string d_fname = std::string(argv[2]);
std::string q_fname = std::string(argv[3]);
- auto extension = new Ext(100, 1000, 8, 0, 64);
+ auto extension = new Ext(1400, 1400, 8, 0, 64);
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
fprintf(stderr, "[I] Reading data file...\n");
@@ -53,7 +53,7 @@ int main(int argc, char **argv) {
}
}
fprintf(stderr, "[I] Reading Queries\n");
- auto queries = read_knn_queries<QP>(q_fname, 10);
+ auto queries = read_knn_queries<QP>(q_fname, 1000);
fprintf(stderr, "[I] Warming up structure...\n");
/* warmup structure w/ 10% of records */
@@ -82,6 +82,7 @@ int main(int argc, char **argv) {
auto shard = extension->create_static_structure();
+ fprintf(stderr, "Running Static query tests\n\n");
TIMER_START();
run_static_queries<Shard, QP, Q>(shard, queries);
TIMER_STOP();
@@ -96,5 +97,6 @@ int main(int argc, char **argv) {
gsl_rng_free(rng);
delete extension;
fflush(stderr);
+ fflush(stdout);
}