summaryrefslogtreecommitdiffstats
path: root/benchmarks/include
diff options
context:
space:
mode:
authorDouglas B. Rumbaugh <dbr4@psu.edu>2024-05-14 16:31:05 -0400
committerGitHub <noreply@github.com>2024-05-14 16:31:05 -0400
commit47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc (patch)
treeee5613ce182b2c9caa228d3abeb65dc27fef2db3 /benchmarks/include
parent4a834497d5f82c817d634925250158d85ca825c2 (diff)
parent8643fe194dec05b4e3f3ea31e162ac0b2b00e162 (diff)
downloaddynamic-extension-47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc.tar.gz
Merge pull request #4 from dbrumbaugh/master
Updates for VLDB revision
Diffstat (limited to 'benchmarks/include')
-rw-r--r--benchmarks/include/benchmark_types.h68
-rw-r--r--benchmarks/include/btree-util.h27
-rw-r--r--benchmarks/include/data-proc.h258
-rw-r--r--benchmarks/include/file_util.h294
-rw-r--r--benchmarks/include/standard_benchmarks.h380
-rw-r--r--benchmarks/include/triespline_bsm.h156
-rw-r--r--benchmarks/include/vptree_bsm.h317
7 files changed, 1215 insertions, 285 deletions
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h
new file mode 100644
index 0000000..51fc52d
--- /dev/null
+++ b/benchmarks/include/benchmark_types.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <cstdlib>
+#include "psu-ds/BTree.h"
+#include "framework/interface/Record.h"
+#include "pgm/pgm_index_dynamic.hpp"
+
+/* BTree definitions*/
+template <typename K, typename V>
+struct btree_record {
+ K key;
+ V value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+template <typename K, typename V>
+struct btree_key_extract {
+ static const K &get(const btree_record<K, V> &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract<int64_t, int64_t>> BenchBTree;
+
+
+/*MTree Definitions*/
+
+const size_t W2V_SIZE = 300;
+typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec;
+
+const size_t ANNSize = 128;
+typedef de::EuclidPoint<uint64_t, ANNSize> ANNRec;
+
+struct euclidean_distance {
+ double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<W2V_SIZE; i++) {
+ dist += (first.data[i] - second.data[i]) * (first.data[i] - second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
+
+ double operator()(const ANNRec &first, const ANNRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<ANNSize; i++) {
+ dist += ((double) first.data[i] - (double) second.data[i]) * ((double) first.data[i] - (double) second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
+};
+
+#ifdef _GNU_SOURCE
+#include "mtree.h"
+typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
+typedef mt::mtree<ANNRec, euclidean_distance> MTree_alt;
+#endif
+
+typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM;
+
diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h
deleted file mode 100644
index 571c073..0000000
--- a/benchmarks/include/btree-util.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <cstdlib>
-#include "psu-ds/BTree.h"
-
-struct btree_record {
- int64_t key;
- int64_t value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const int64_t &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-
diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h
deleted file mode 100644
index 444cb94..0000000
--- a/benchmarks/include/data-proc.h
+++ /dev/null
@@ -1,258 +0,0 @@
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <gsl/gsl_rng.h>
-#include <cstring>
-#include <vector>
-
-#include "psu-ds/BTree.h"
-
-#pragma once
-
-typedef int64_t key_type;
-typedef int64_t value_type;
-typedef uint64_t weight_type;
-
-static gsl_rng *g_rng;
-static bool g_osm_data;
-
-struct btree_record {
- key_type key;
- value_type value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const key_type &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-static key_type g_min_key = UINT64_MAX;
-static key_type g_max_key = 0;
-
-static size_t g_max_record_cnt = 0;
-static size_t g_reccnt = 0;
-
-static constexpr unsigned int DEFAULT_SEED = 0;
-
-static unsigned int get_random_seed()
-{
- unsigned int seed = 0;
- std::fstream urandom;
- urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
- urandom.read((char *) &seed, sizeof(seed));
- urandom.close();
-
- return seed;
-}
-
-static key_type osm_to_key(const char *key_field) {
- double tmp_key = (atof(key_field) + 180) * 10e6;
- return (key_type) tmp_key;
-}
-
-static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
-{
- g_rng = gsl_rng_alloc(type);
- gsl_rng_set(g_rng, seed);
-}
-
-static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
-{
- unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
- init_bench_rng(seed, gsl_rng_mt19937);
- g_osm_data = osm_correction;
- g_max_record_cnt = max_reccnt;
- g_reccnt = 0;
-}
-
-static void delete_bench_env()
-{
- gsl_rng_free(g_rng);
-}
-
-
-template <typename QP>
-static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.target_key = start;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.lower_bound = start;
- q.upper_bound = stop;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- char *line = NULL;
- size_t len = 0;
-
- while (getline(&line, &len, qf) > 0) {
- char *token;
- QP query;
- size_t idx = 0;
-
- token = strtok(line, " ");
- do {
- query.point.data[idx++] = atof(token);
- } while ((token = strtok(NULL, " ")));
-
- query.k = k;
- queries.emplace_back(query);
- }
-
- free(line);
- fclose(qf);
-
- return queries;
-}
-
-/*
- * NOTE: The QP type must have lower_bound and upper_bound attributes, which
- * this function will initialize. Any other query parameter attributes must
- * be manually initialized after the call.
- */
-template <typename R>
-static bool next_vector_record(std::fstream &file, R &record, bool binary=false) {
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- for (size_t i=0; i<300; i++) {
- std::string dimension;
-
- std::getline(line_stream, dimension, ' ');
- record.data[i] = atof(dimension.c_str());
- }
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-
-}
-
-template <typename R>
-static bool next_record(std::fstream &file, R &record, bool binary=false)
-{
- static value_type value = 1;
- if (g_reccnt >= g_max_record_cnt) return false;
-
- if (binary) {
- if (file.good()) {
- decltype(R::key) key;
-
- file.read((char*) &key, sizeof(key));
- record.key = key;
- record.value = value;
- value++;
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- return true;
- }
-
- return false;
- }
-
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- std::string key_field;
- std::string value_field;
- std::string weight_field;
-
- std::getline(line_stream, value_field, '\t');
- std::getline(line_stream, key_field, '\t');
- std::getline(line_stream, weight_field, '\t');
-
- record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
- record.value = atol(value_field.c_str());
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-}
-
-template <typename R>
-static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
- vec.clear();
-
- size_t cnt = 0;
- while (cnt < n) {
- if (to_delete.size() == 0) {
- return false;
- }
-
- auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
- vec.emplace_back(to_delete[i]);
- to_delete.erase(to_delete.begin() + i);
- }
-td:
- return true;
-}
-
-static std::vector<int64_t> read_sosd_file(std::string &fname, size_t n) {
- std::fstream file;
- file.open(fname, std::ios::in | std::ios::binary);
-
- std::vector<int64_t> records(n);
- for (size_t i=0; i<n; i++) {
- file.read((char*) &(records[i]), sizeof(int64_t));
- }
-
- return records;
-}
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
new file mode 100644
index 0000000..41eb18c
--- /dev/null
+++ b/benchmarks/include/file_util.h
@@ -0,0 +1,294 @@
+#pragma once
+
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <gsl/gsl_rng.h>
+#include <cstring>
+#include <vector>
+#include <memory>
+
+#include "psu-util/progress.h"
+
+
+template <typename QP>
+static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.target_key = start;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) {
+ std::vector<QP> queries;
+
+ for (size_t i=0; i<cnt; i++) {
+ auto idx = gsl_rng_uniform_int(rng, strings.size());
+ QP q;
+ q.search_key = strings[idx].get();
+ queries.push_back(q);
+ }
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.00001) {
+ QP q;
+ q.lower_bound = start;
+ q.upper_bound = stop;
+
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+
+template <typename QP>
+static std::vector<QP> read_binary_knn_queries(std::string fname, size_t k, size_t n) {
+ std::vector<QP> queries;
+ queries.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ for (size_t i=0; i<n; i++) {
+ QP query;
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ query.point.data[j] = val;
+ }
+ query.k = k;
+ queries.push_back(query);
+ }
+
+ return queries;
+}
+
+
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+template<typename R>
+static std::vector<R> read_sosd_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records(n);
+ for (size_t i=0; i<n; i++) {
+ decltype(R::key) k;
+ file.read((char*) &(k), sizeof(R::key));
+ records[i].key = k;
+ records[i].value = i;
+ }
+
+ return records;
+}
+
+template<typename K, typename V>
+static std::vector<std::pair<K, V>> read_sosd_file_pair(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<std::pair<K,V>> records(n);
+ for (size_t i=0; i<n; i++) {
+ K k;
+ file.read((char*) &(k), sizeof(K));
+ records[i].first = k;
+ records[i].second = i;
+ }
+
+ return records;
+}
+
+/*
+ * This function expects a plaintext file with each vector on its own line.
+ * There should be D dimensions (or more) for each record, separated by
+ * whitespace. The function will generate a vector of n records, each
+ * record built from the first D dimensions of the data in the file.
+ */
+template <typename R, size_t D>
+static std::vector<R> read_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ for (size_t i=0; i<n; i++) {
+ std::string line;
+ if (!std::getline(file, line, '\n')) break;
+
+ std::stringstream line_stream(line);
+ R rec;
+ for (size_t j=0; j<D; j++) {
+ std::string dim;
+ if (!std::getline(line_stream, dim, ' ')) break;
+
+ rec.data[j] = atof(dim.c_str());
+ }
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
+
+template <typename R>
+static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ R rec;
+ for (size_t i=0; i<n; i++) {
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ rec.data[j] = val;
+ }
+
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
+
+static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<std::unique_ptr<char[]>> strings;
+ strings.reserve(n);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str())));
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+
+ return strings;
+}
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
new file mode 100644
index 0000000..b805c08
--- /dev/null
+++ b/benchmarks/include/standard_benchmarks.h
@@ -0,0 +1,380 @@
+/*
+ * benchmarks/include/bench.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <gsl/gsl_rng.h>
+
+#include "framework/DynamicExtension.h"
+#include "framework/interface/Query.h"
+#include "query/irs.h"
+#include "psu-util/progress.h"
+#include "benchmark_types.h"
+#include "psu-util/bentley-saxe.h"
+
+static size_t g_deleted_records = 0;
+static double delete_proportion = 0.05;
+
+static volatile size_t total = 0;
+
+template<typename DE, typename QP, typename R>
+static void run_queries(DE *extension, DE *ghost, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ std::vector<R> res = extension->query(&queries[i]);
+ std::vector<R> negres = ghost->query(&queries[i]);
+ auto result = res[0].first - negres[0].first;
+ total = result;
+ }
+}
+
+
+template<typename DE, typename QP, bool BSM=false>
+static void run_queries(DE *extension, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ if constexpr (std::is_same_v<MTree, DE>) {
+ std::vector<Word2VecRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (auto &r : result) {
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ #endif
+ } else if constexpr (std::is_same_v<MTree_alt, DE>) {
+ std::vector<ANNRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+ }else if constexpr (std::is_same_v<PGM, DE>) {
+ size_t tot = 0;
+ auto ptr = extension->find(queries[i].lower_bound);
+ while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) {
+ tot++;
+ ++ptr;
+ }
+ } else {
+ auto res = extension->query(&queries[i]);
+ if constexpr (!BSM) {
+ auto result = res.get();
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=result.size()-1; i>=0; i--) {
+ auto &r = result[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ } else {
+ total = res.size();
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ }
+ }
+ }
+}
+
+template <typename R>
+static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) {
+ std::vector<int64_t> sample_set;
+ sample_set.reserve(queries[0].sample_size);
+
+ for (size_t i=0; i<queries.size(); i++) {
+ btree->range_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng);
+ }
+}
+
+
+template<typename S, typename QP, typename Q>
+static void run_static_queries(S *shard, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ auto q = &queries[i];
+
+ auto state = Q::get_query_state(shard, q);
+
+ std::vector<void*> shards = {shard};
+ std::vector<void*> states = {state};
+
+ Q::process_query_states(q, states, nullptr);
+ auto res = Q::query(shard, state, q);
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i].rec;
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ }
+}
+
+
+/*
+ * Insert records into a standard Bentley-Saxe extension. Deletes are not
+ * supported.
+ */
+template<typename DS, typename R, bool MDSP=false>
+static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
+ size_t start, size_t stop, std::vector<R> &records) {
+
+ psudb::progress_update(0, "Insert Progress");
+ for (size_t i=start; i<stop; i++) {
+ extension->insert(records[i]);
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+
+template<typename DS, typename R, bool MDSP=false>
+static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
+ psudb::bsm::BentleySaxe<R, DS, MDSP> *ghost,
+ size_t start, size_t stop, std::vector<R> &records,
+ std::vector<size_t> &to_delete, size_t &delete_idx,
+ gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+
+ extension->insert(records[i]);
+
+ if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
+ ghost->insert(records[to_delete[delete_idx]]);
+ delete_idx++;
+ g_deleted_records++;
+ }
+
+ }
+
+}
+
+
+template<typename DE, typename R>
+static void insert_records(DE *structure, size_t start, size_t stop,
+ std::vector<R> &records, std::vector<size_t> &to_delete,
+ size_t &delete_idx, bool delete_records, gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->insert(records[i]);
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ structure->add(records[i]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->insert_or_assign(records[i].key, records[i].value);
+ } else {
+ while (!structure->insert(records[i])) {
+ psudb::progress_update((double) i / (double)(stop - start), "Insert Progress");
+ usleep(1);
+ }
+ }
+
+ if (delete_records && gsl_rng_uniform(rng) <=
+ delete_proportion && to_delete[delete_idx] <= i) {
+
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->erase_one(records[to_delete[delete_idx]].key);
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ structure->remove(records[to_delete[delete_idx]]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->erase(records[to_delete[delete_idx]].key);
+ } else {
+ while (!structure->erase(records[to_delete[delete_idx]])) {
+ usleep(1);
+ }
+ }
+ delete_idx++;
+ g_deleted_records++;
+ }
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
+static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
+ double delete_prop, gsl_rng *rng, std::vector<R> &to_delete, bool binary=false) {
+
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) {
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ de_index.erase_one(delete_vec[delete_idx++].key);
+ #ifdef _GNU_SOURCE
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ de_index.remove(delete_vec[delete_idx++]);
+ #endif
+ } else {
+ de_index.erase(delete_vec[delete_idx++]);
+ }
+ applied_deletes++;
+ }
+
+ // insert the record;
+ #ifdef _GNU_SOURCE
+ if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ de_index.add(insert_vec[i]);
+ } else {
+ de_index.insert(insert_vec[i]);
+ }
+ #else
+ de_index.insert(insert_vec[i]);
+ #endif
+
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update(1.0, "inserting:");
+ }
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "%ld\t", throughput);
+
+ return continue_benchmark;
+}
+
+template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true>
+static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ auto res = de_index.query(&queries[j]);
+
+ total_results += res.size();
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
+
+
+template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true>
+static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ std::vector<void *> states(1);
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ states[0] = Q::get_query_state(shard, &queries[j]);
+ Q::process_query_states(&queries[j], states, nullptr);
+ auto res = Q::query(shard, states[0], &queries[j]);
+ total_results += res.size();
+ Q::delete_query_state(states[0]);
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h
new file mode 100644
index 0000000..eaf1079
--- /dev/null
+++ b/benchmarks/include/triespline_bsm.h
@@ -0,0 +1,156 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include "ts/builder.h"
+
+template <typename K, typename V, size_t E=1024>
+class BSMTrieSpline {
+private:
+ typedef std::pair<K, V> R;
+
+public:
+ struct RangeQueryParameters {
+ K lower_bound;
+ K upper_bound;
+ };
+
+public:
+ static BSMTrieSpline *build(std::vector<R> &records) {
+ if (records.size() == 0) {
+ return nullptr;
+ }
+
+ std::sort(records.begin(), records.end());
+ return new BSMTrieSpline(records);
+ }
+
+ static BSMTrieSpline *build_presorted(std::vector<R> &records) {
+ if (records.size() == 0) {
+ return nullptr;
+ }
+
+ return new BSMTrieSpline(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ rs.push_back({0, 0});
+ return rs;
+ }
+
+ auto parms = (BSMTrieSpline::RangeQueryParameters*) q;
+
+ size_t idx = lower_bound(parms->lower_bound);
+
+ size_t cnt = 0;
+ while (idx < m_data.size() && m_data[idx].first <= parms->upper_bound) {
+ cnt++;
+ idx++;
+ }
+
+ rs.push_back({cnt, 0});
+
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void *parms) {
+ /* initialize rsa on the first merge */
+ if (rsa.size() == 0) {
+ rsa.push_back({0, 0});
+ }
+ rsa[0].first += rsb[0].first;
+ return std::move(rsa);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+ ~BSMTrieSpline() = default;
+
+private:
+ std::vector<R> m_data;
+ K m_max_key;
+ K m_min_key;
+ ts::TrieSpline<K> m_ts;
+
+ BSMTrieSpline(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_min_key = m_data[0].first;
+ m_max_key = m_data[m_data.size() - 1].first;
+
+ if (m_data.size() < 50) {
+ return;
+ }
+
+ auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
+ for (size_t i=0; i<m_data.size(); i++) {
+ bldr.AddKey(m_data[i].first);
+ }
+
+ if (m_data.size() > 1) {
+ m_ts = bldr.Finalize();
+ }
+ }
+
+ size_t lower_bound(K key) {
+ if (m_data.size() == 0) {
+ return 1;
+ } else if (m_data.size() < 50) {
+ for (size_t i=0; i<m_data.size(); i++) {
+ if (m_data[i].first >= key) {
+ return i;
+ }
+ }
+
+ return m_data.size();
+ }
+
+ auto bound = m_ts.GetSearchBound(key);
+ size_t idx = bound.begin;
+
+ if (idx >= m_data.size()) {
+ return m_data.size();
+ }
+
+ // If the region to search is less than some pre-specified
+ // amount, perform a linear scan to locate the record.
+ if (bound.end - bound.begin < 256) {
+ while (idx < bound.end && m_data[idx].first < key) {
+ idx++;
+ }
+ } else {
+ // Otherwise, perform a binary search
+ idx = bound.begin;
+ size_t max = bound.end;
+
+ while (idx < max) {
+ size_t mid = (idx + max) / 2;
+ if (key > m_data[mid].first) {
+ idx = mid + 1;
+ } else {
+ max = mid;
+ }
+ }
+ }
+
+ if (idx == m_data.size()) {
+ return m_data.size();
+ }
+
+ if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) {
+ return idx-1;
+ }
+
+ return idx;
+ }
+};
diff --git a/benchmarks/include/vptree_bsm.h b/benchmarks/include/vptree_bsm.h
new file mode 100644
index 0000000..2f12fcb
--- /dev/null
+++ b/benchmarks/include/vptree_bsm.h
@@ -0,0 +1,317 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <limits>
+#include <gsl/gsl_rng.h>
+
+#include "psu-ds/PriorityQueue.h"
+#include "framework/interface/Record.h"
+
+template <typename R, size_t LEAFSZ=100>
+class BSMVPTree {
+public:
+ struct KNNQueryParms {
+ R point;
+ size_t k;
+ };
+
+public:
+ static BSMVPTree *build(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ static BSMVPTree *build_presorted(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ return rs;
+ }
+
+ auto parms = (BSMVPTree::KNNQueryParms*) q;
+ auto pq = psudb::PriorityQueue<R, de::DistCmpMax<R>>(parms->k, &parms->point);
+
+ if (parms->k >= m_data.size()) {
+ for (size_t i=0; i<m_data.size(); i++) {
+ if (m_ptrs[i].ptr != nullptr) {
+ pq.push(m_ptrs[i].ptr);
+ }
+ }
+ } else {
+ double farthest = std::numeric_limits<double>::max();
+ internal_search(m_root, parms->point, parms->k, pq, &farthest);
+ }
+
+ size_t i=0;
+ while (pq.size() > 0) {
+ rs.push_back(*pq.peek().data);
+ pq.pop();
+ }
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void* parms) {
+ KNNQueryParms *p = (KNNQueryParms *) parms;
+ R rec = p->point;
+ size_t k = p->k;
+
+ std::vector<R> output;
+
+ psudb::PriorityQueue<R, de::DistCmpMax<R>> pq(k, &rec);
+
+ for (size_t i=0; i<rsa.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsa[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsa[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsa[i]);
+ }
+ }
+ }
+
+ for (size_t i=0; i<rsb.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsb[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsb[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsb[i]);
+ }
+ }
+ }
+
+ while (pq.size() > 0) {
+ output.emplace_back(*pq.peek().data);
+ pq.pop();
+ }
+
+ return std::move(output);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+ ~BSMVPTree() {
+ delete m_root;
+ }
+
+private:
+
+ struct vp_ptr {
+ R *ptr;
+ double dist;
+ };
+
+ struct vpnode {
+ size_t start;
+ size_t stop;
+ bool leaf;
+
+ double radius;
+ vpnode *inside;
+ vpnode *outside;
+
+ vpnode() : start(0), stop(0), leaf(false), radius(0.0), inside(nullptr), outside(nullptr) {}
+
+ ~vpnode() {
+ delete inside;
+ delete outside;
+ }
+ };
+
+ std::vector<R> m_data;
+ std::vector<vp_ptr> m_ptrs;
+ vpnode *m_root;
+
+ size_t m_node_cnt;
+
+ BSMVPTree(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_node_cnt = 0;
+
+ for (size_t i=0; i<m_data.size(); i++) {
+ m_ptrs.push_back({&m_data[i], 0});
+ }
+
+ m_root = build_vptree();
+ }
+
+ vpnode *build_vptree() {
+ if (m_data.size() == 0) {
+ return nullptr;
+ }
+
+ size_t lower = 0;
+ size_t upper = m_data.size() - 1;
+
+ auto rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto root = build_subtree(lower, upper, rng);
+ gsl_rng_free(rng);
+ return root;
+ }
+
+ vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) {
+ /*
+ * base-case: sometimes happens (probably because of the +1 and -1
+ * in the first recursive call)
+ */
+ if (start > stop) {
+ return nullptr;
+ }
+
+ /* base-case: create a leaf node */
+ if (stop - start <= LEAFSZ) {
+ vpnode *node = new vpnode();
+ node->start = start;
+ node->stop = stop;
+ node->leaf = true;
+
+ m_node_cnt++;
+ return node;
+ }
+
+ /*
+ * select a random element to be the root of the
+ * subtree
+ */
+ auto i = start + gsl_rng_uniform_int(rng, stop - start + 1);
+ swap(start, i);
+
+ /* for efficiency, we'll pre-calculate the distances between each point and the root */
+ for (size_t i=start+1; i<=stop; i++) {
+ m_ptrs[i].dist = m_ptrs[start].ptr->calc_distance(*m_ptrs[i].ptr);
+ }
+
+ /*
+ * partition elements based on their distance from the start,
+ * with those elements with distance falling below the median
+ * distance going into the left sub-array and those above
+ * the median in the right. This is easily done using QuickSelect.
+ */
+ auto mid = (start + 1 + stop) / 2;
+ quickselect(start + 1, stop, mid, m_ptrs[start].ptr, rng);
+
+ /* Create a new node based on this partitioning */
+ vpnode *node = new vpnode();
+ node->start = start;
+
+ /* store the radius of the circle used for partitioning the node. */
+ node->radius = m_ptrs[start].ptr->calc_distance(*m_ptrs[mid].ptr);
+ m_ptrs[start].dist = node->radius;
+
+ /* recursively construct the left and right subtrees */
+ node->inside = build_subtree(start + 1, mid-1, rng);
+ node->outside = build_subtree(mid, stop, rng);
+
+ m_node_cnt++;
+
+ return node;
+ }
+
+ void quickselect(size_t start, size_t stop, size_t k, R *p, gsl_rng *rng) {
+ if (start == stop) return;
+
+ auto pivot = partition(start, stop, p, rng);
+
+ if (k < pivot) {
+ quickselect(start, pivot - 1, k, p, rng);
+ } else if (k > pivot) {
+ quickselect(pivot + 1, stop, k, p, rng);
+ }
+ }
+
+ size_t partition(size_t start, size_t stop, R *p, gsl_rng *rng) {
+ auto pivot = start + gsl_rng_uniform_int(rng, stop - start);
+
+ swap(pivot, stop);
+
+ size_t j = start;
+ for (size_t i=start; i<stop; i++) {
+ if (m_ptrs[i].dist < m_ptrs[stop].dist) {
+ swap(j++, i);
+ }
+ }
+
+ swap(j, stop);
+ return j;
+ }
+
+ void swap(size_t idx1, size_t idx2) {
+ auto tmp = m_ptrs[idx1];
+ m_ptrs[idx1] = m_ptrs[idx2];
+ m_ptrs[idx2] = tmp;
+ }
+
+ void internal_search(vpnode *node, const R &point, size_t k, psudb::PriorityQueue<R,
+ de::DistCmpMax<R>> &pq, double *farthest) {
+
+ if (node == nullptr) return;
+
+ if (node->leaf) {
+ for (size_t i=node->start; i<=node->stop; i++) {
+ double d = point.calc_distance(*m_ptrs[i].ptr);
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ pq.pop();
+ }
+
+ pq.push(m_ptrs[i].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+ }
+
+ return;
+ }
+
+ double d = point.calc_distance(*m_ptrs[node->start].ptr);
+
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ auto t = pq.peek().data;
+ pq.pop();
+ }
+ pq.push(m_ptrs[node->start].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+
+ if (d < node->radius) {
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+ } else {
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+ }
+ }
+};