summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas B. Rumbaugh <dbr4@psu.edu>2024-05-14 16:31:05 -0400
committerGitHub <noreply@github.com>2024-05-14 16:31:05 -0400
commit47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc (patch)
treeee5613ce182b2c9caa228d3abeb65dc27fef2db3 /benchmarks
parent4a834497d5f82c817d634925250158d85ca825c2 (diff)
parent8643fe194dec05b4e3f3ea31e162ac0b2b00e162 (diff)
downloaddynamic-extension-47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc.tar.gz
Merge pull request #4 from dbrumbaugh/master
Updates for VLDB revision
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/bigann_sample.cpp55
-rw-r--r--benchmarks/cedar_trie.cpp97
-rw-r--r--benchmarks/hat_trie.cpp98
-rw-r--r--benchmarks/include/benchmark_types.h68
-rw-r--r--benchmarks/include/btree-util.h27
-rw-r--r--benchmarks/include/data-proc.h258
-rw-r--r--benchmarks/include/file_util.h294
-rw-r--r--benchmarks/include/standard_benchmarks.h380
-rw-r--r--benchmarks/include/triespline_bsm.h156
-rw-r--r--benchmarks/include/vptree_bsm.h317
-rw-r--r--benchmarks/irs_bench.cpp125
-rw-r--r--benchmarks/old-bench/include/bench_utility.h2
-rw-r--r--benchmarks/poplar_trie.cpp102
-rw-r--r--benchmarks/string_insertion_tput.cpp114
-rw-r--r--benchmarks/vldb/alex_bench.cpp144
-rw-r--r--benchmarks/vldb/btree_bench.cpp90
-rw-r--r--benchmarks/vldb/btree_thread_scaling_bench.cpp (renamed from benchmarks/btree_insert_query_tput.cpp)20
-rw-r--r--benchmarks/vldb/dynamic_pgm_bench.cpp77
-rw-r--r--benchmarks/vldb/fst_bench.cpp100
-rw-r--r--benchmarks/vldb/fst_bsm_bench.cpp100
-rw-r--r--benchmarks/vldb/irs_bench.cpp97
-rw-r--r--benchmarks/vldb/mtree_bench.cpp82
-rw-r--r--benchmarks/vldb/mtree_bench_alt.cpp82
-rw-r--r--benchmarks/vldb/pgm_bench.cpp94
-rw-r--r--benchmarks/vldb/thread_scaling_bench.cpp (renamed from benchmarks/insert_query_tput.cpp)59
-rw-r--r--benchmarks/vldb/ts_bench.cpp95
-rw-r--r--benchmarks/vldb/ts_bsm_bench.cpp95
-rw-r--r--benchmarks/vldb/ts_mdsp_bench.cpp70
-rw-r--r--benchmarks/vldb/ts_parmsweep.cpp124
-rw-r--r--benchmarks/vldb/vptree_bench.cpp102
-rw-r--r--benchmarks/vldb/vptree_bench_alt.cpp102
-rw-r--r--benchmarks/vldb/vptree_bsm_bench.cpp102
-rw-r--r--benchmarks/vldb/vptree_bsm_bench_alt.cpp92
-rw-r--r--benchmarks/vldb/vptree_parmsweep.cpp129
-rw-r--r--benchmarks/watermark_testing.cpp25
-rw-r--r--benchmarks/watermark_testing_knn.cpp61
36 files changed, 3581 insertions, 454 deletions
diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp
new file mode 100644
index 0000000..aa12f91
--- /dev/null
+++ b/benchmarks/bigann_sample.cpp
@@ -0,0 +1,55 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "file_util.h"
+#include "benchmark_types.h"
+
+#include <gsl/gsl_rng.h>
+
+typedef ANNRec Rec;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile sampcnt\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ size_t m = atol(argv[3]);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ std::vector<size_t> to_delete(m);
+
+ std::unordered_map<Rec, size_t, de::RecordHash<Rec>> filter;
+ double ratio = (double) data.size() / (double) m;
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= ratio && filter.find(data[i]) == filter.end()) {
+ to_delete[j++] = i;
+ filter.insert({data[i], i});
+ }
+ }
+
+ for (size_t i=0; i<to_delete.size(); i++) {
+ for (size_t j=0; j<ANNSize; j++ ) {
+ fprintf(stdout, "%ld ", data[to_delete[i]].data[j]);
+ }
+ fprintf(stdout, "\n");
+ }
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/cedar_trie.cpp b/benchmarks/cedar_trie.cpp
new file mode 100644
index 0000000..7499ce7
--- /dev/null
+++ b/benchmarks/cedar_trie.cpp
@@ -0,0 +1,97 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "cedar.h"
+
+#include "psu-util/timer.h"
+#include "psu-util/progress.h"
+
+std::vector<std::string> strings;
+
+typedef cedar::da<int> Trie;
+
+void insert_thread(int64_t start, int64_t end, Trie * trie) {
+ for (uint64_t i=start; i<end; i++) {
+ auto res = trie->update(strings[i].c_str(), strings[i].size(), i+1);
+ }
+}
+
+void read_data(std::string fname, size_t n=10000000) {
+ strings.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(line);
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+}
+
+void usage(char *name) {
+ fprintf(stderr, "Usage:\n%s datafile record_count\n", name);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ std::string fname = std::string(argv[1]);
+ size_t n = atol(argv[2]);
+
+ read_data(fname, n);
+
+ if (strings.size() == 0) {
+ fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n");
+ } else {
+ fprintf(stderr, "Finished reading from file.\n");
+ }
+
+ auto trie = new Trie();
+
+ TIMER_INIT();
+ TIMER_START();
+ insert_thread(0, strings.size(), trie);
+ TIMER_STOP();
+
+ auto total_time = TIMER_RESULT();
+
+ size_t m = 100;
+ TIMER_START();
+ for (size_t i=0; i<m; i++) {
+ size_t j = rand() % strings.size();
+
+ auto res = trie->exactMatchSearch<int>(strings[j].c_str());
+ //assert(*(res)+1 == j);
+ }
+ TIMER_STOP();
+
+ auto query_time = TIMER_RESULT();
+
+
+ double i_tput = (double) n / (double) total_time * 1e9;
+ size_t q_lat = query_time / m;
+
+ fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(),
+ i_tput, q_lat);
+
+ fprintf(stdout, "%ld\n", trie->total_size());
+
+ delete trie;
+
+ fflush(stderr);
+}
+
diff --git a/benchmarks/hat_trie.cpp b/benchmarks/hat_trie.cpp
new file mode 100644
index 0000000..3b4c7d3
--- /dev/null
+++ b/benchmarks/hat_trie.cpp
@@ -0,0 +1,98 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <fstream>
+#include <sstream>
+
+#include "htrie_map.h"
+
+#include "psu-util/timer.h"
+#include "psu-util/progress.h"
+
+std::vector<std::string> strings;
+
+typedef tsl::htrie_map<char, size_t> Trie;
+
+void insert_thread(int64_t start, int64_t end, Trie * trie) {
+ for (uint64_t i=start; i<end; i++) {
+ auto res = trie->insert(strings[i].c_str(), i+1);
+ }
+}
+
+void read_data(std::string fname, size_t n=10000000) {
+ strings.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(line);
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+}
+
+void usage(char *name) {
+ fprintf(stderr, "Usage:\n%s datafile record_count\n", name);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ std::string fname = std::string(argv[1]);
+ size_t n = atol(argv[2]);
+
+ read_data(fname, n);
+
+ if (strings.size() == 0) {
+ fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n");
+ } else {
+ fprintf(stderr, "Finished reading from file.\n");
+ }
+
+ auto trie = new Trie();
+
+ TIMER_INIT();
+ TIMER_START();
+ insert_thread(0, strings.size(), trie);
+ TIMER_STOP();
+
+ auto total_time = TIMER_RESULT();
+
+ size_t m = 100;
+ TIMER_START();
+ for (size_t i=0; i<m; i++) {
+ size_t j = rand() % strings.size();
+
+ auto res = trie->find(strings[j]);
+ if (*res != (j+1)) {
+ fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str());
+ }
+ //assert(*(res)+1 == j);
+ }
+ TIMER_STOP();
+
+ auto query_time = TIMER_RESULT();
+
+
+ double i_tput = (double) n / (double) total_time * 1e9;
+ size_t q_lat = query_time / m;
+
+ fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(),
+ i_tput, q_lat);
+
+
+ delete trie;
+
+ fflush(stderr);
+}
+
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h
new file mode 100644
index 0000000..51fc52d
--- /dev/null
+++ b/benchmarks/include/benchmark_types.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <cstdlib>
+#include "psu-ds/BTree.h"
+#include "framework/interface/Record.h"
+#include "pgm/pgm_index_dynamic.hpp"
+
+/* BTree definitions*/
+template <typename K, typename V>
+struct btree_record {
+ K key;
+ V value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+template <typename K, typename V>
+struct btree_key_extract {
+ static const K &get(const btree_record<K, V> &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract<int64_t, int64_t>> BenchBTree;
+
+
+/*MTree Definitions*/
+
+const size_t W2V_SIZE = 300;
+typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec;
+
+const size_t ANNSize = 128;
+typedef de::EuclidPoint<uint64_t, ANNSize> ANNRec;
+
+struct euclidean_distance {
+ double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<W2V_SIZE; i++) {
+ dist += (first.data[i] - second.data[i]) * (first.data[i] - second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
+
+ double operator()(const ANNRec &first, const ANNRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<ANNSize; i++) {
+ dist += ((double) first.data[i] - (double) second.data[i]) * ((double) first.data[i] - (double) second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
+};
+
+#ifdef _GNU_SOURCE
+#include "mtree.h"
+typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
+typedef mt::mtree<ANNRec, euclidean_distance> MTree_alt;
+#endif
+
+typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM;
+
diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h
deleted file mode 100644
index 571c073..0000000
--- a/benchmarks/include/btree-util.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <cstdlib>
-#include "psu-ds/BTree.h"
-
-struct btree_record {
- int64_t key;
- int64_t value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const int64_t &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-
diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h
deleted file mode 100644
index 444cb94..0000000
--- a/benchmarks/include/data-proc.h
+++ /dev/null
@@ -1,258 +0,0 @@
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <gsl/gsl_rng.h>
-#include <cstring>
-#include <vector>
-
-#include "psu-ds/BTree.h"
-
-#pragma once
-
-typedef int64_t key_type;
-typedef int64_t value_type;
-typedef uint64_t weight_type;
-
-static gsl_rng *g_rng;
-static bool g_osm_data;
-
-struct btree_record {
- key_type key;
- value_type value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const key_type &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-static key_type g_min_key = UINT64_MAX;
-static key_type g_max_key = 0;
-
-static size_t g_max_record_cnt = 0;
-static size_t g_reccnt = 0;
-
-static constexpr unsigned int DEFAULT_SEED = 0;
-
-static unsigned int get_random_seed()
-{
- unsigned int seed = 0;
- std::fstream urandom;
- urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
- urandom.read((char *) &seed, sizeof(seed));
- urandom.close();
-
- return seed;
-}
-
-static key_type osm_to_key(const char *key_field) {
- double tmp_key = (atof(key_field) + 180) * 10e6;
- return (key_type) tmp_key;
-}
-
-static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
-{
- g_rng = gsl_rng_alloc(type);
- gsl_rng_set(g_rng, seed);
-}
-
-static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
-{
- unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
- init_bench_rng(seed, gsl_rng_mt19937);
- g_osm_data = osm_correction;
- g_max_record_cnt = max_reccnt;
- g_reccnt = 0;
-}
-
-static void delete_bench_env()
-{
- gsl_rng_free(g_rng);
-}
-
-
-template <typename QP>
-static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.target_key = start;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.lower_bound = start;
- q.upper_bound = stop;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- char *line = NULL;
- size_t len = 0;
-
- while (getline(&line, &len, qf) > 0) {
- char *token;
- QP query;
- size_t idx = 0;
-
- token = strtok(line, " ");
- do {
- query.point.data[idx++] = atof(token);
- } while ((token = strtok(NULL, " ")));
-
- query.k = k;
- queries.emplace_back(query);
- }
-
- free(line);
- fclose(qf);
-
- return queries;
-}
-
-/*
- * NOTE: The QP type must have lower_bound and upper_bound attributes, which
- * this function will initialize. Any other query parameter attributes must
- * be manually initialized after the call.
- */
-template <typename R>
-static bool next_vector_record(std::fstream &file, R &record, bool binary=false) {
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- for (size_t i=0; i<300; i++) {
- std::string dimension;
-
- std::getline(line_stream, dimension, ' ');
- record.data[i] = atof(dimension.c_str());
- }
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-
-}
-
-template <typename R>
-static bool next_record(std::fstream &file, R &record, bool binary=false)
-{
- static value_type value = 1;
- if (g_reccnt >= g_max_record_cnt) return false;
-
- if (binary) {
- if (file.good()) {
- decltype(R::key) key;
-
- file.read((char*) &key, sizeof(key));
- record.key = key;
- record.value = value;
- value++;
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- return true;
- }
-
- return false;
- }
-
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- std::string key_field;
- std::string value_field;
- std::string weight_field;
-
- std::getline(line_stream, value_field, '\t');
- std::getline(line_stream, key_field, '\t');
- std::getline(line_stream, weight_field, '\t');
-
- record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
- record.value = atol(value_field.c_str());
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-}
-
-template <typename R>
-static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
- vec.clear();
-
- size_t cnt = 0;
- while (cnt < n) {
- if (to_delete.size() == 0) {
- return false;
- }
-
- auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
- vec.emplace_back(to_delete[i]);
- to_delete.erase(to_delete.begin() + i);
- }
-td:
- return true;
-}
-
-static std::vector<int64_t> read_sosd_file(std::string &fname, size_t n) {
- std::fstream file;
- file.open(fname, std::ios::in | std::ios::binary);
-
- std::vector<int64_t> records(n);
- for (size_t i=0; i<n; i++) {
- file.read((char*) &(records[i]), sizeof(int64_t));
- }
-
- return records;
-}
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
new file mode 100644
index 0000000..41eb18c
--- /dev/null
+++ b/benchmarks/include/file_util.h
@@ -0,0 +1,294 @@
+#pragma once
+
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <gsl/gsl_rng.h>
+#include <cstring>
+#include <vector>
+#include <memory>
+
+#include "psu-util/progress.h"
+
+
+template <typename QP>
+static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.target_key = start;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) {
+ std::vector<QP> queries;
+
+ for (size_t i=0; i<cnt; i++) {
+ auto idx = gsl_rng_uniform_int(rng, strings.size());
+ QP q;
+ q.search_key = strings[idx].get();
+ queries.push_back(q);
+ }
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.00001) {
+ QP q;
+ q.lower_bound = start;
+ q.upper_bound = stop;
+
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+
+template <typename QP>
+static std::vector<QP> read_binary_knn_queries(std::string fname, size_t k, size_t n) {
+ std::vector<QP> queries;
+ queries.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ for (size_t i=0; i<n; i++) {
+ QP query;
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ query.point.data[j] = val;
+ }
+ query.k = k;
+ queries.push_back(query);
+ }
+
+ return queries;
+}
+
+
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ if (!qf) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+template<typename R>
+static std::vector<R> read_sosd_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records(n);
+ for (size_t i=0; i<n; i++) {
+ decltype(R::key) k;
+ file.read((char*) &(k), sizeof(R::key));
+ records[i].key = k;
+ records[i].value = i;
+ }
+
+ return records;
+}
+
+template<typename K, typename V>
+static std::vector<std::pair<K, V>> read_sosd_file_pair(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<std::pair<K,V>> records(n);
+ for (size_t i=0; i<n; i++) {
+ K k;
+ file.read((char*) &(k), sizeof(K));
+ records[i].first = k;
+ records[i].second = i;
+ }
+
+ return records;
+}
+
+/*
+ * This function expects a plaintext file with each vector on its own line.
+ * There should be D dimensions (or more) for each record, separated by
+ * whitespace. The function will generate a vector of n records, each
+ * record built from the first D dimensions of the data in the file.
+ */
+template <typename R, size_t D>
+static std::vector<R> read_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ for (size_t i=0; i<n; i++) {
+ std::string line;
+ if (!std::getline(file, line, '\n')) break;
+
+ std::stringstream line_stream(line);
+ R rec;
+ for (size_t j=0; j<D; j++) {
+ std::string dim;
+ if (!std::getline(line_stream, dim, ' ')) break;
+
+ rec.data[j] = atof(dim.c_str());
+ }
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
+
+template <typename R>
+static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ R rec;
+ for (size_t i=0; i<n; i++) {
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ rec.data[j] = val;
+ }
+
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
+
+static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<std::unique_ptr<char[]>> strings;
+ strings.reserve(n);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str())));
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+
+ return strings;
+}
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
new file mode 100644
index 0000000..b805c08
--- /dev/null
+++ b/benchmarks/include/standard_benchmarks.h
@@ -0,0 +1,380 @@
+/*
+ * benchmarks/include/bench.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <gsl/gsl_rng.h>
+
+#include "framework/DynamicExtension.h"
+#include "framework/interface/Query.h"
+#include "query/irs.h"
+#include "psu-util/progress.h"
+#include "benchmark_types.h"
+#include "psu-util/bentley-saxe.h"
+
+static size_t g_deleted_records = 0;
+static double delete_proportion = 0.05;
+
+static volatile size_t total = 0;
+
+template<typename DE, typename QP, typename R>
+static void run_queries(DE *extension, DE *ghost, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ std::vector<R> res = extension->query(&queries[i]);
+ std::vector<R> negres = ghost->query(&queries[i]);
+ auto result = res[0].first - negres[0].first;
+ total = result;
+ }
+}
+
+
+template<typename DE, typename QP, bool BSM=false>
+static void run_queries(DE *extension, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ if constexpr (std::is_same_v<MTree, DE>) {
+ std::vector<Word2VecRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (auto &r : result) {
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ #endif
+ } else if constexpr (std::is_same_v<MTree_alt, DE>) {
+ std::vector<ANNRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+ }else if constexpr (std::is_same_v<PGM, DE>) {
+ size_t tot = 0;
+ auto ptr = extension->find(queries[i].lower_bound);
+ while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) {
+ tot++;
+ ++ptr;
+ }
+ } else {
+ auto res = extension->query(&queries[i]);
+ if constexpr (!BSM) {
+ auto result = res.get();
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=result.size()-1; i>=0; i--) {
+ auto &r = result[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ } else {
+ total = res.size();
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i];
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ }
+ }
+ }
+}
+
+template <typename R>
+static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) {
+ std::vector<int64_t> sample_set;
+ sample_set.reserve(queries[0].sample_size);
+
+ for (size_t i=0; i<queries.size(); i++) {
+ btree->range_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng);
+ }
+}
+
+
+template<typename S, typename QP, typename Q>
+static void run_static_queries(S *shard, std::vector<QP> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ auto q = &queries[i];
+
+ auto state = Q::get_query_state(shard, q);
+
+ std::vector<void*> shards = {shard};
+ std::vector<void*> states = {state};
+
+ Q::process_query_states(q, states, nullptr);
+ auto res = Q::query(shard, state, q);
+
+ #ifdef BENCH_PRINT_RESULTS
+ fprintf(stdout, "\n\n");
+ for (int i=res.size()-1; i>=0; i--) {
+ auto &r = res[i].rec;
+ fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0],
+ r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
+ }
+ fflush(stdout);
+ #endif
+ }
+}
+
+
+/*
+ * Insert records into a standard Bentley-Saxe extension. Deletes are not
+ * supported.
+ */
+template<typename DS, typename R, bool MDSP=false>
+static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
+ size_t start, size_t stop, std::vector<R> &records) {
+
+ psudb::progress_update(0, "Insert Progress");
+ for (size_t i=start; i<stop; i++) {
+ extension->insert(records[i]);
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+
+template<typename DS, typename R, bool MDSP=false>
+static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
+ psudb::bsm::BentleySaxe<R, DS, MDSP> *ghost,
+ size_t start, size_t stop, std::vector<R> &records,
+ std::vector<size_t> &to_delete, size_t &delete_idx,
+ gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+
+ extension->insert(records[i]);
+
+ if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
+ ghost->insert(records[to_delete[delete_idx]]);
+ delete_idx++;
+ g_deleted_records++;
+ }
+
+ }
+
+}
+
+
+template<typename DE, typename R>
+static void insert_records(DE *structure, size_t start, size_t stop,
+ std::vector<R> &records, std::vector<size_t> &to_delete,
+ size_t &delete_idx, bool delete_records, gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->insert(records[i]);
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ structure->add(records[i]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->insert_or_assign(records[i].key, records[i].value);
+ } else {
+ while (!structure->insert(records[i])) {
+ psudb::progress_update((double) i / (double)(stop - start), "Insert Progress");
+ usleep(1);
+ }
+ }
+
+ if (delete_records && gsl_rng_uniform(rng) <=
+ delete_proportion && to_delete[delete_idx] <= i) {
+
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->erase_one(records[to_delete[delete_idx]].key);
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ structure->remove(records[to_delete[delete_idx]]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->erase(records[to_delete[delete_idx]].key);
+ } else {
+ while (!structure->erase(records[to_delete[delete_idx]])) {
+ usleep(1);
+ }
+ }
+ delete_idx++;
+ g_deleted_records++;
+ }
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
+static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
+ double delete_prop, gsl_rng *rng, std::vector<R> &to_delete, bool binary=false) {
+
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) {
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ de_index.erase_one(delete_vec[delete_idx++].key);
+ #ifdef _GNU_SOURCE
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ de_index.remove(delete_vec[delete_idx++]);
+ #endif
+ } else {
+ de_index.erase(delete_vec[delete_idx++]);
+ }
+ applied_deletes++;
+ }
+
+ // insert the record;
+ #ifdef _GNU_SOURCE
+ if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
+ de_index.add(insert_vec[i]);
+ } else {
+ de_index.insert(insert_vec[i]);
+ }
+ #else
+ de_index.insert(insert_vec[i]);
+ #endif
+
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update(1.0, "inserting:");
+ }
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "%ld\t", throughput);
+
+ return continue_benchmark;
+}
+
+template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true>
+static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ auto res = de_index.query(&queries[j]);
+
+ total_results += res.size();
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
+
+
+template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true>
+static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ std::vector<void *> states(1);
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ states[0] = Q::get_query_state(shard, &queries[j]);
+ Q::process_query_states(&queries[j], states, nullptr);
+ auto res = Q::query(shard, states[0], &queries[j]);
+ total_results += res.size();
+ Q::delete_query_state(states[0]);
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h
new file mode 100644
index 0000000..eaf1079
--- /dev/null
+++ b/benchmarks/include/triespline_bsm.h
@@ -0,0 +1,156 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include "ts/builder.h"
+
+template <typename K, typename V, size_t E=1024>
+class BSMTrieSpline {
+private:
+ typedef std::pair<K, V> R;
+
+public:
+ struct RangeQueryParameters {
+ K lower_bound;
+ K upper_bound;
+ };
+
+public:
+ static BSMTrieSpline *build(std::vector<R> &records) {
+ if (records.size() == 0) {
+ return nullptr;
+ }
+
+ std::sort(records.begin(), records.end());
+ return new BSMTrieSpline(records);
+ }
+
+ static BSMTrieSpline *build_presorted(std::vector<R> &records) {
+ if (records.size() == 0) {
+ return nullptr;
+ }
+
+ return new BSMTrieSpline(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ rs.push_back({0, 0});
+ return rs;
+ }
+
+ auto parms = (BSMTrieSpline::RangeQueryParameters*) q;
+
+ size_t idx = lower_bound(parms->lower_bound);
+
+ size_t cnt = 0;
+ while (idx < m_data.size() && m_data[idx].first <= parms->upper_bound) {
+ cnt++;
+ idx++;
+ }
+
+ rs.push_back({cnt, 0});
+
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void *parms) {
+ /* initialize rsa on the first merge */
+ if (rsa.size() == 0) {
+ rsa.push_back({0, 0});
+ }
+ rsa[0].first += rsb[0].first;
+ return std::move(rsa);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+ ~BSMTrieSpline() = default;
+
+private:
+ std::vector<R> m_data;
+ K m_max_key;
+ K m_min_key;
+ ts::TrieSpline<K> m_ts;
+
+ BSMTrieSpline(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_min_key = m_data[0].first;
+ m_max_key = m_data[m_data.size() - 1].first;
+
+ if (m_data.size() < 50) {
+ return;
+ }
+
+ auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
+ for (size_t i=0; i<m_data.size(); i++) {
+ bldr.AddKey(m_data[i].first);
+ }
+
+ if (m_data.size() > 1) {
+ m_ts = bldr.Finalize();
+ }
+ }
+
+ size_t lower_bound(K key) {
+ if (m_data.size() == 0) {
+ return 1;
+ } else if (m_data.size() < 50) {
+ for (size_t i=0; i<m_data.size(); i++) {
+ if (m_data[i].first >= key) {
+ return i;
+ }
+ }
+
+ return m_data.size();
+ }
+
+ auto bound = m_ts.GetSearchBound(key);
+ size_t idx = bound.begin;
+
+ if (idx >= m_data.size()) {
+ return m_data.size();
+ }
+
+ // If the region to search is less than some pre-specified
+ // amount, perform a linear scan to locate the record.
+ if (bound.end - bound.begin < 256) {
+ while (idx < bound.end && m_data[idx].first < key) {
+ idx++;
+ }
+ } else {
+ // Otherwise, perform a binary search
+ idx = bound.begin;
+ size_t max = bound.end;
+
+ while (idx < max) {
+ size_t mid = (idx + max) / 2;
+ if (key > m_data[mid].first) {
+ idx = mid + 1;
+ } else {
+ max = mid;
+ }
+ }
+ }
+
+ if (idx == m_data.size()) {
+ return m_data.size();
+ }
+
+ if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) {
+ return idx-1;
+ }
+
+ return idx;
+ }
+};
diff --git a/benchmarks/include/vptree_bsm.h b/benchmarks/include/vptree_bsm.h
new file mode 100644
index 0000000..2f12fcb
--- /dev/null
+++ b/benchmarks/include/vptree_bsm.h
@@ -0,0 +1,317 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <limits>
+#include <gsl/gsl_rng.h>
+
+#include "psu-ds/PriorityQueue.h"
+#include "framework/interface/Record.h"
+
+template <typename R, size_t LEAFSZ=100>
+class BSMVPTree {
+public:
+ struct KNNQueryParms {
+ R point;
+ size_t k;
+ };
+
+public:
+ static BSMVPTree *build(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ static BSMVPTree *build_presorted(std::vector<R> &records) {
+ return new BSMVPTree(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ return rs;
+ }
+
+ auto parms = (BSMVPTree::KNNQueryParms*) q;
+ auto pq = psudb::PriorityQueue<R, de::DistCmpMax<R>>(parms->k, &parms->point);
+
+ if (parms->k >= m_data.size()) {
+ for (size_t i=0; i<m_data.size(); i++) {
+ if (m_ptrs[i].ptr != nullptr) {
+ pq.push(m_ptrs[i].ptr);
+ }
+ }
+ } else {
+ double farthest = std::numeric_limits<double>::max();
+ internal_search(m_root, parms->point, parms->k, pq, &farthest);
+ }
+
+ size_t i=0;
+ while (pq.size() > 0) {
+ rs.push_back(*pq.peek().data);
+ pq.pop();
+ }
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb, void* parms) {
+ KNNQueryParms *p = (KNNQueryParms *) parms;
+ R rec = p->point;
+ size_t k = p->k;
+
+ std::vector<R> output;
+
+ psudb::PriorityQueue<R, de::DistCmpMax<R>> pq(k, &rec);
+
+ for (size_t i=0; i<rsa.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsa[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsa[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsa[i]);
+ }
+ }
+ }
+
+ for (size_t i=0; i<rsb.size(); i++) {
+ if (pq.size() < k) {
+ pq.push(&rsb[i]);
+ } else {
+ double head_dist = pq.peek().data->calc_distance(rec);
+ double cur_dist = rsb[i].calc_distance(rec);
+
+ if (cur_dist < head_dist) {
+ pq.pop();
+ pq.push(&rsb[i]);
+ }
+ }
+ }
+
+ while (pq.size() > 0) {
+ output.emplace_back(*pq.peek().data);
+ pq.pop();
+ }
+
+ return std::move(output);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+ ~BSMVPTree() {
+ delete m_root;
+ }
+
+private:
+
+ struct vp_ptr {
+ R *ptr;
+ double dist;
+ };
+
+ struct vpnode {
+ size_t start;
+ size_t stop;
+ bool leaf;
+
+ double radius;
+ vpnode *inside;
+ vpnode *outside;
+
+ vpnode() : start(0), stop(0), leaf(false), radius(0.0), inside(nullptr), outside(nullptr) {}
+
+ ~vpnode() {
+ delete inside;
+ delete outside;
+ }
+ };
+
+ std::vector<R> m_data;
+ std::vector<vp_ptr> m_ptrs;
+ vpnode *m_root;
+
+ size_t m_node_cnt;
+
+ BSMVPTree(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_node_cnt = 0;
+
+ for (size_t i=0; i<m_data.size(); i++) {
+ m_ptrs.push_back({&m_data[i], 0});
+ }
+
+ m_root = build_vptree();
+ }
+
+ vpnode *build_vptree() {
+ if (m_data.size() == 0) {
+ return nullptr;
+ }
+
+ size_t lower = 0;
+ size_t upper = m_data.size() - 1;
+
+ auto rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto root = build_subtree(lower, upper, rng);
+ gsl_rng_free(rng);
+ return root;
+ }
+
+ vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) {
+ /*
+ * base-case: sometimes happens (probably because of the +1 and -1
+ * in the first recursive call)
+ */
+ if (start > stop) {
+ return nullptr;
+ }
+
+ /* base-case: create a leaf node */
+ if (stop - start <= LEAFSZ) {
+ vpnode *node = new vpnode();
+ node->start = start;
+ node->stop = stop;
+ node->leaf = true;
+
+ m_node_cnt++;
+ return node;
+ }
+
+ /*
+ * select a random element to be the root of the
+ * subtree
+ */
+ auto i = start + gsl_rng_uniform_int(rng, stop - start + 1);
+ swap(start, i);
+
+ /* for efficiency, we'll pre-calculate the distances between each point and the root */
+ for (size_t i=start+1; i<=stop; i++) {
+ m_ptrs[i].dist = m_ptrs[start].ptr->calc_distance(*m_ptrs[i].ptr);
+ }
+
+ /*
+ * partition elements based on their distance from the start,
+ * with those elements with distance falling below the median
+ * distance going into the left sub-array and those above
+ * the median in the right. This is easily done using QuickSelect.
+ */
+ auto mid = (start + 1 + stop) / 2;
+ quickselect(start + 1, stop, mid, m_ptrs[start].ptr, rng);
+
+ /* Create a new node based on this partitioning */
+ vpnode *node = new vpnode();
+ node->start = start;
+
+ /* store the radius of the circle used for partitioning the node. */
+ node->radius = m_ptrs[start].ptr->calc_distance(*m_ptrs[mid].ptr);
+ m_ptrs[start].dist = node->radius;
+
+ /* recursively construct the left and right subtrees */
+ node->inside = build_subtree(start + 1, mid-1, rng);
+ node->outside = build_subtree(mid, stop, rng);
+
+ m_node_cnt++;
+
+ return node;
+ }
+
+ void quickselect(size_t start, size_t stop, size_t k, R *p, gsl_rng *rng) {
+ if (start == stop) return;
+
+ auto pivot = partition(start, stop, p, rng);
+
+ if (k < pivot) {
+ quickselect(start, pivot - 1, k, p, rng);
+ } else if (k > pivot) {
+ quickselect(pivot + 1, stop, k, p, rng);
+ }
+ }
+
+ size_t partition(size_t start, size_t stop, R *p, gsl_rng *rng) {
+ auto pivot = start + gsl_rng_uniform_int(rng, stop - start);
+
+ swap(pivot, stop);
+
+ size_t j = start;
+ for (size_t i=start; i<stop; i++) {
+ if (m_ptrs[i].dist < m_ptrs[stop].dist) {
+ swap(j++, i);
+ }
+ }
+
+ swap(j, stop);
+ return j;
+ }
+
+ void swap(size_t idx1, size_t idx2) {
+ auto tmp = m_ptrs[idx1];
+ m_ptrs[idx1] = m_ptrs[idx2];
+ m_ptrs[idx2] = tmp;
+ }
+
+ void internal_search(vpnode *node, const R &point, size_t k, psudb::PriorityQueue<R,
+ de::DistCmpMax<R>> &pq, double *farthest) {
+
+ if (node == nullptr) return;
+
+ if (node->leaf) {
+ for (size_t i=node->start; i<=node->stop; i++) {
+ double d = point.calc_distance(*m_ptrs[i].ptr);
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ pq.pop();
+ }
+
+ pq.push(m_ptrs[i].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+ }
+
+ return;
+ }
+
+ double d = point.calc_distance(*m_ptrs[node->start].ptr);
+
+ if (d < *farthest) {
+ if (pq.size() == k) {
+ auto t = pq.peek().data;
+ pq.pop();
+ }
+ pq.push(m_ptrs[node->start].ptr);
+ if (pq.size() == k) {
+ *farthest = point.calc_distance(*pq.peek().data);
+ }
+ }
+
+ if (d < node->radius) {
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+ } else {
+ if (d + (*farthest) >= node->radius) {
+ internal_search(node->outside, point, k, pq, farthest);
+ }
+
+ if (d - (*farthest) <= node->radius) {
+ internal_search(node->inside, point, k, pq, farthest);
+ }
+ }
+ }
+};
diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp
deleted file mode 100644
index ddb4220..0000000
--- a/benchmarks/irs_bench.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- *
- */
-
-#define ENABLE_TIMER
-
-#include <thread>
-
-#include "framework/DynamicExtension.h"
-#include "shard/ISAMTree.h"
-#include "query/irs.h"
-#include "framework/interface/Record.h"
-#include "include/data-proc.h"
-
-#include <gsl/gsl_rng.h>
-
-#include "psu-util/timer.h"
-
-
-typedef de::Record<int64_t, int64_t> Rec;
-typedef de::ISAMTree<Rec> ISAM;
-typedef de::irs::Query<Rec, ISAM> Q;
-typedef de::DynamicExtension<Rec, ISAM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::irs::Parms<Rec> QP;
-
-void run_queries(Ext *extension, std::vector<QP> &queries, gsl_rng *rng) {
- size_t total;
- for (size_t i=0; i<queries.size(); i++) {
- auto q = &queries[i];
- q->rng = rng;
- q->sample_size = 1000;
-
- auto res = extension->query(q);
- auto r = res.get();
- total += r.size();
- }
-
- fprintf(stderr, "%ld\n", total);
-}
-
-size_t g_deleted_records = 0;
-double delete_proportion = 0.05;
-
-void insert_records(Ext *extension, size_t start,
- size_t stop,
- std::vector<int64_t> &records,
- std::vector<size_t> &to_delete,
- size_t &delete_idx,
- bool delete_records,
- gsl_rng *rng) {
- size_t reccnt = 0;
- Rec r;
- for (size_t i=start; i<stop; i++) {
- r.key = records[i];
- r.value = i;
-
- while (!extension->insert(r)) {
- usleep(1);
- }
-
- if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
- r.key = records[to_delete[delete_idx]];
- r.value = (int64_t) (to_delete[delete_idx]);
- while (!extension->erase(r)) {
- usleep(1);
- }
- delete_idx++;
- g_deleted_records++;
- }
- }
-}
-
-int main(int argc, char **argv) {
-
- if (argc < 4) {
- fprintf(stderr, "insert_query_tput reccnt datafile queryfile\n");
- exit(EXIT_FAILURE);
- }
-
- size_t n = atol(argv[1]);
- std::string d_fname = std::string(argv[2]);
- std::string q_fname = std::string(argv[3]);
-
- auto extension = new Ext(12000, 12001, 8, 0, 64);
- gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
-
- auto data = read_sosd_file(d_fname, n);
- std::vector<size_t> to_delete(n * delete_proportion);
- size_t j=0;
- for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
- if (gsl_rng_uniform(rng) <= delete_proportion) {
- to_delete[j++] = i;
- }
- }
- auto queries = read_range_queries<QP>(q_fname, .001);
-
- /* warmup structure w/ 10% of records */
- size_t warmup = .3 * n;
- size_t delete_idx = 0;
- insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
-
- extension->await_next_epoch();
-
- TIMER_INIT();
-
- TIMER_START();
- insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
- TIMER_STOP();
-
- auto insert_latency = TIMER_RESULT();
- size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
-
- TIMER_START();
- run_queries(extension, queries, rng);
- TIMER_STOP();
-
- auto query_latency = TIMER_RESULT() / queries.size();
-
- fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records);
-
- gsl_rng_free(rng);
- delete extension;
- fflush(stderr);
-}
-
diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h
index e33b93d..f495f18 100644
--- a/benchmarks/old-bench/include/bench_utility.h
+++ b/benchmarks/old-bench/include/bench_utility.h
@@ -79,8 +79,6 @@ struct cosine_similarity {
}
};
-typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
-typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
template <de::RecordInterface R>
static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
diff --git a/benchmarks/poplar_trie.cpp b/benchmarks/poplar_trie.cpp
new file mode 100644
index 0000000..6c47465
--- /dev/null
+++ b/benchmarks/poplar_trie.cpp
@@ -0,0 +1,102 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <fstream>
+#include <sstream>
+
+#include "poplar.hpp"
+
+#include "psu-util/timer.h"
+#include "psu-util/progress.h"
+
+std::vector<std::string> strings;
+
+typedef poplar::plain_bonsai_map<int> Trie;
+
+void insert_thread(int64_t start, int64_t end, Trie * trie) {
+ for (uint64_t i=start; i<end; i++) {
+ auto res = trie->update(strings[i]);
+ *res = i+1;
+ }
+}
+
+void read_data(std::string fname, size_t n=10000000) {
+ strings.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(line);
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+}
+
+void usage(char *name) {
+ fprintf(stderr, "Usage:\n%s datafile record_count\n", name);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ std::string fname = std::string(argv[1]);
+ size_t n = atol(argv[2]);
+
+ read_data(fname, n);
+
+ if (strings.size() == 0) {
+ fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n");
+ } else {
+ fprintf(stderr, "Finished reading from file.\n");
+ }
+
+ auto trie = new Trie();
+ size_t warmup = strings.size()*.1;
+ insert_thread(0, warmup, trie);
+
+ TIMER_INIT();
+ TIMER_START();
+ insert_thread(warmup, strings.size(), trie);
+ TIMER_STOP();
+
+ auto total_time = TIMER_RESULT();
+
+ size_t m = 100;
+ TIMER_START();
+ for (size_t i=0; i<m; i++) {
+ size_t j = rand() % strings.size();
+
+ auto res = trie->find(strings[j]);
+ if (*res != (j+1)) {
+ fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str());
+ }
+ //assert(*(res)+1 == j);
+ }
+ TIMER_STOP();
+
+ auto query_time = TIMER_RESULT();
+
+
+ double i_tput = (double) n / (double) total_time * 1e9;
+ size_t q_lat = query_time / m;
+
+ fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(),
+ i_tput, q_lat);
+
+ trie->show_stats(std::cerr, 1);
+
+ delete trie;
+
+ fflush(stderr);
+}
+
diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp
new file mode 100644
index 0000000..c439cb3
--- /dev/null
+++ b/benchmarks/string_insertion_tput.cpp
@@ -0,0 +1,114 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#include "framework/DynamicExtension.h"
+#include "shard/FSTrie.h"
+#include "query/pointlookup.h"
+#include "framework/interface/Record.h"
+
+#include "psu-util/timer.h"
+#include "psu-util/progress.h"
+
+
+typedef de::Record<const char *, uint64_t> Rec;
+typedef de::FSTrie<Rec> Trie;
+typedef de::pl::Query<Rec, Trie> Q;
+typedef de::DynamicExtension<Rec, Trie, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+
+
+void insert_thread(int64_t start, int64_t end, Ext *extension) {
+ for (uint64_t i=start; i<end; i++) {
+ Rec r = {strings[i].get(), i, strlen(strings[i].get())};
+ while (!extension->insert(r)) {
+ _mm_pause();
+ }
+ }
+}
+
+std::vector<std::unique_ptr<char[]>>read_strings(std::string fname, size_t n=10000000) {
+ std::vector<std::unique_ptr<char[]>> strings;
+ strings.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str())));
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+
+ return strings;
+}
+
+void usage(char *name) {
+ fprintf(stderr, "Usage:\n%s datafile record_count\n", name);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ std::string fname = std::string(argv[1]);
+ size_t n = atol(argv[2]);
+
+ read_data(fname, n);
+
+ if (strings.size() == 0) {
+ fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n");
+ } else {
+ fprintf(stderr, "Finished reading from file.\n");
+ }
+
+ std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12};
+ std::vector<size_t> buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000};
+
+ for (auto &sf : scale_factors) {
+ for (auto &bf_sz : buffer_sizes) {
+ auto extension = new Ext(bf_sz, bf_sz, sf);
+
+ TIMER_INIT();
+ TIMER_START();
+ insert_thread(0, strings.size(), extension);
+ TIMER_STOP();
+
+ auto total_time = TIMER_RESULT();
+
+ size_t m = 100;
+ TIMER_START();
+ for (size_t i=0; i<m; i++) {
+ size_t j = rand() % strings.size();
+ de::pl::Parms<Rec> parms = {strings[j].get()};
+
+ auto res = extension->query(&parms);
+ auto ans = res.get();
+ }
+ TIMER_STOP();
+
+ auto query_time = TIMER_RESULT();
+
+ double i_tput = (double) n / (double) total_time * 1e9;
+ size_t q_lat = query_time / m;
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(),
+ bf_sz, sf, i_tput, q_lat, extension->get_memory_usage());
+
+ delete extension;
+
+ }
+ }
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp
new file mode 100644
index 0000000..ba687f3
--- /dev/null
+++ b/benchmarks/vldb/alex_bench.cpp
@@ -0,0 +1,144 @@
+#define ENABLE_TIMER
+
+#include "alex.h"
+
+#include "file_util.h"
+#include "psu-util/progress.h"
+#include "psu-util/timer.h"
+
+typedef uint64_t key_type;
+typedef uint64_t value_type;
+
+typedef alex::Alex<key_type, value_type> Alex;
+
+struct record {
+ key_type key;
+ value_type value;
+};
+
+struct query {
+ key_type lower_bound;
+ key_type upper_bound;
+};
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+static size_t g_deleted_records = 0;
+static double delete_proportion = 0.05;
+
+static void insert_records(Alex *structure, size_t start, size_t stop,
+ std::vector<record> &records, std::vector<size_t> &to_delete,
+ size_t &delete_idx, bool delete_records, gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+ structure->insert(records[i].key, records[i].value);
+
+ if (delete_records && gsl_rng_uniform(rng) <=
+ delete_proportion && to_delete[delete_idx] <= i) {
+
+ structure->erase_one(records[i].key);
+ delete_idx++;
+ g_deleted_records++;
+ }
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+size_t g_global_cnt = 0;
+
+static void run_queries(Alex *alex, std::vector<query> &queries) {
+ for (size_t i=0; i<queries.size(); i++) {
+ size_t cnt=0;
+ auto ptr = alex->find(queries[i].lower_bound);
+ while (ptr != alex->end() && ptr.key() <= queries[i].upper_bound) {
+ cnt++;
+ ptr++;
+ }
+
+ g_global_cnt += cnt;
+ }
+}
+
+Alex *warmup_alex(std::vector<record> records, size_t cnt) {
+ if (cnt >= records.size()) {
+ fprintf(stderr, "[E] Requesting warmup with more records than are available.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ auto alex = new Alex();
+ std::pair<key_type, value_type> *insert_vec = new std::pair<key_type, value_type>[cnt];
+
+ for (size_t i=0; i<cnt; i++) {
+ insert_vec[i] = {records[i].key, records[i].value};
+ }
+
+ std::sort(insert_vec, insert_vec + cnt);
+ alex->bulk_load(insert_vec, cnt);
+ delete[] insert_vec;
+
+ return alex;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+
+ auto data = read_sosd_file<record>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+
+ auto queries = read_range_queries<query>(q_fname, .0001);
+
+
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+
+ auto alex = warmup_alex(data, warmup);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records(alex, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries(alex, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = alex->model_size() + alex->data_size() - (alex->size() * sizeof(record));
+
+ fprintf(stdout, "%ld\t%ld\t%lld\t%ld\n", insert_throughput, query_latency, ext_size, g_global_cnt);
+ fflush(stdout);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+
+ delete alex;
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp
new file mode 100644
index 0000000..fa72831
--- /dev/null
+++ b/benchmarks/vldb/btree_bench.cpp
@@ -0,0 +1,90 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "shard/ISAMTree.h"
+#include "query/irs.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "benchmark_types.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+#include "standard_benchmarks.h"
+#include "psu-ds/BTree.h"
+
+typedef btree_record<int64_t, int64_t> Rec;
+
+typedef de::ISAMTree<Rec> Shard;
+typedef de::irs::Query<Rec, Shard> Q;
+typedef de::irs::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto btree = BenchBTree();
+
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ /* read in the range queries and add sample size and rng for sampling */
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+ for (auto &q : queries) {
+ q.sample_size = 1000;
+ q.rng = rng;
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<BenchBTree, Rec>(&btree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<BenchBTree, Rec>(&btree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_btree_queries<Rec>(&btree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto btree_size = btree.get_stats().inner_nodes * psudb::btree_default_traits<int64_t, Rec>::inner_slots * (sizeof(int64_t) + sizeof(void*));
+
+ /* account for memory wasted on gaps in the structure */
+ btree_size += btree.get_stats().leaves * psudb::btree_default_traits<int64_t, Rec>::leaf_slots * sizeof(Rec);
+ btree_size -= btree.size() * sizeof(Rec);
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, btree_size);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/btree_insert_query_tput.cpp b/benchmarks/vldb/btree_thread_scaling_bench.cpp
index f838f80..557e966 100644
--- a/benchmarks/btree_insert_query_tput.cpp
+++ b/benchmarks/vldb/btree_thread_scaling_bench.cpp
@@ -7,8 +7,8 @@
#include <thread>
#include "query/irs.h"
-#include "include/data-proc.h"
-#include "psu-ds/BTree.h"
+#include "benchmark_types.h"
+#include "file_util.h"
#include <mutex>
#include <gsl/gsl_rng.h>
@@ -16,7 +16,7 @@
#include "psu-util/timer.h"
-typedef de::Record<int64_t, int64_t> Rec;
+typedef btree_record<int64_t, int64_t> Rec;
typedef de::irs::Parms<Rec> QP;
std::atomic<bool> inserts_done = false;
@@ -46,11 +46,11 @@ void query_thread(BenchBTree *tree, std::vector<QP> *queries) {
gsl_rng_free(rng);
}
-void insert_thread(BenchBTree *tree, size_t start, std::vector<int64_t> *records) {
+void insert_thread(BenchBTree *tree, size_t start, std::vector<Rec> *records) {
size_t reccnt = 0;
for (size_t i=start; i<records->size(); i++) {
- btree_record r;
- r.key = (*records)[i];
+ btree_record<int64_t, int64_t> r;
+ r.key = (*records)[i].key;
r.value = i;
g_btree_lock.lock();
@@ -80,15 +80,15 @@ int main(int argc, char **argv) {
auto tree = new BenchBTree();
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto data = read_sosd_file(d_fname, n);
+ auto data = read_sosd_file<Rec>(d_fname, n);
auto queries = read_range_queries<QP>(q_fname, .001);
/* warmup structure w/ 10% of records */
size_t warmup = .1 * n;
for (size_t i=0; i<warmup; i++) {
- btree_record r;
- r.key = data[i];
- r.value = i;
+ btree_record<int64_t, int64_t> r;
+ r.key = data[i].key;
+ r.value = data[i].value;
tree->insert(r);
}
diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp
new file mode 100644
index 0000000..15b130f
--- /dev/null
+++ b/benchmarks/vldb/dynamic_pgm_bench.cpp
@@ -0,0 +1,77 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ std::vector<std::pair<uint64_t, uint64_t>> tmp_data;
+ PGM pgm(tmp_data.begin(), tmp_data.end());
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<PGM, Rec>(&pgm, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<PGM, Rec>(&pgm, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<PGM, QP>(&pgm, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = pgm.index_size_in_bytes();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp
new file mode 100644
index 0000000..276a922
--- /dev/null
+++ b/benchmarks/vldb/fst_bench.cpp
@@ -0,0 +1,100 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/FSTrie.h"
+#include "query/pointlookup.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<const char *, uint64_t> Rec;
+typedef de::FSTrie<Rec> Shard;
+typedef de::pl::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::pl::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+
+ auto extension = new Ext(12000, 12001, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto strings = read_string_file(d_fname, n);
+ auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng);
+
+ std::vector<Rec> data;
+ for (size_t i=0; i<strings.size(); i++) {
+ data.push_back({strings[i].get(), i, strlen(strings[i].get())});
+ }
+
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp
new file mode 100644
index 0000000..15a441a
--- /dev/null
+++ b/benchmarks/vldb/fst_bsm_bench.cpp
@@ -0,0 +1,100 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/FSTrie.h"
+#include "query/pointlookup.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<const char *, uint64_t> Rec;
+typedef de::FSTrie<Rec> Shard;
+typedef de::pl::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::pl::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+
+ auto extension = new Ext(1, 12001, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto strings = read_string_file(d_fname, n);
+ auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng);
+
+ std::vector<Rec> data;
+ for (size_t i=0; i<strings.size(); i++) {
+ data.push_back({strings[i].get(), i, strlen(strings[i].get())});
+ }
+
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp
new file mode 100644
index 0000000..e062e80
--- /dev/null
+++ b/benchmarks/vldb/irs_bench.cpp
@@ -0,0 +1,97 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/ISAMTree.h"
+#include "query/irs.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+#include "standard_benchmarks.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::ISAMTree<Rec> Shard;
+typedef de::irs::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::irs::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(12000, 12001, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ /* read in the range queries and add sample size and rng for sampling */
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+ for (auto &q : queries) {
+ q.sample_size = 1000;
+ q.rng = rng;
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp
new file mode 100644
index 0000000..cc2f41f
--- /dev/null
+++ b/benchmarks/vldb/mtree_bench.cpp
@@ -0,0 +1,82 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "query/knn.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto mtree = new MTree();
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_knn_queries<QP>(q_fname, 1000);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<MTree, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<MTree, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<MTree, QP>(mtree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size());
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size);
+
+ gsl_rng_free(rng);
+ delete mtree;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp
new file mode 100644
index 0000000..50c6117
--- /dev/null
+++ b/benchmarks/vldb/mtree_bench_alt.cpp
@@ -0,0 +1,82 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "query/knn.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto mtree = new MTree_alt();
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<MTree_alt, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<MTree_alt, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<MTree_alt, QP>(mtree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size());
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size);
+
+ gsl_rng_free(rng);
+ delete mtree;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp
new file mode 100644
index 0000000..cec95df
--- /dev/null
+++ b/benchmarks/vldb/pgm_bench.cpp
@@ -0,0 +1,94 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/PGM.h"
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::PGM<Rec> Shard;
+typedef de::rc::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(12000, 12001, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/vldb/thread_scaling_bench.cpp
index ce05264..b679e92 100644
--- a/benchmarks/insert_query_tput.cpp
+++ b/benchmarks/vldb/thread_scaling_bench.cpp
@@ -10,7 +10,8 @@
#include "shard/ISAMTree.h"
#include "query/irs.h"
#include "framework/interface/Record.h"
-#include "include/data-proc.h"
+#include "file_util.h"
+#include <ctime>
#include <gsl/gsl_rng.h>
@@ -25,6 +26,8 @@ typedef de::irs::Parms<Rec> QP;
std::atomic<bool> inserts_done = false;
+struct timespec delay = {0, 500};
+
void query_thread(Ext *extension, std::vector<QP> *queries) {
gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total = 0;
@@ -39,7 +42,7 @@ void query_thread(Ext *extension, std::vector<QP> *queries) {
auto res = extension->query(&q);
auto r = res.get();
total += r.size();
- usleep(1);
+ nanosleep(&delay, nullptr);
}
fprintf(stderr, "%ld\n", total);
@@ -47,47 +50,39 @@ void query_thread(Ext *extension, std::vector<QP> *queries) {
gsl_rng_free(rng);
}
-void insert_thread(Ext *extension, size_t start, std::vector<int64_t> *records) {
- size_t reccnt = 0;
- Rec r;
- for (size_t i=start; i<records->size(); i++) {
- r.key = (*records)[i];
- r.value = i;
-
- while (!extension->insert(r)) {
- usleep(1);
+void insert_thread(Ext *extension, size_t start, size_t stop, std::vector<Rec> *records) {
+ fprintf(stderr, "%ld\t%ld\n", start, stop);
+ for (size_t i=start; i<stop; i++) {
+ while (!extension->insert((*records)[i])) {
+ nanosleep(&delay, nullptr);
}
}
-
- inserts_done.store(true);
}
int main(int argc, char **argv) {
- if (argc < 5) {
- fprintf(stderr, "insert_query_tput reccnt query_threads datafile queryfile\n");
+ if (argc < 6) {
+ fprintf(stderr, "Usage:\n");
+ fprintf(stderr, "%s reccnt insert_threads query_threads datafile queryfile\n", argv[0]);
exit(EXIT_FAILURE);
}
size_t n = atol(argv[1]);
- size_t qthread_cnt = atol(argv[2]);
- std::string d_fname = std::string(argv[3]);
- std::string q_fname = std::string(argv[4]);
+ size_t ithread_cnt = atol(argv[2]);
+ size_t qthread_cnt = atol(argv[3]);
+ std::string d_fname = std::string(argv[4]);
+ std::string q_fname = std::string(argv[5]);
auto extension = new Ext(1000, 12000, 8, 0, 64);
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto data = read_sosd_file(d_fname, n);
+ auto data = read_sosd_file<Rec>(d_fname, n);
auto queries = read_range_queries<QP>(q_fname, .001);
/* warmup structure w/ 10% of records */
size_t warmup = .1 * n;
- Rec r;
for (size_t i=0; i<warmup; i++) {
- r.key = data[i];
- r.value = gsl_rng_uniform_int(rng, n);
-
- while (!extension->insert(r)) {
+ while (!extension->insert(data[i])) {
usleep(1);
}
}
@@ -96,14 +91,26 @@ int main(int argc, char **argv) {
TIMER_INIT();
+ std::vector<std::thread> ithreads(ithread_cnt);
std::vector<std::thread> qthreads(qthread_cnt);
TIMER_START();
- std::thread i_thrd(insert_thread, extension, warmup, &data);
+ size_t start = warmup;
+ size_t per_thread = (n - warmup) / ithread_cnt;
+ for (size_t i=0; i<ithread_cnt; i++) {
+ ithreads[i] = std::thread(insert_thread, extension, start, start + per_thread, &data);
+ start += per_thread;
+ }
+
for (size_t i=0; i<qthread_cnt; i++) {
qthreads[i] = std::thread(query_thread, extension, &queries);
}
- i_thrd.join();
+
+ for (size_t i=0; i<ithread_cnt; i++) {
+ ithreads[i].join();
+ }
+
+ inserts_done.store(true);
TIMER_STOP();
for (size_t i=0; i<qthread_cnt; i++) {
diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp
new file mode 100644
index 0000000..81a430a
--- /dev/null
+++ b/benchmarks/vldb/ts_bench.cpp
@@ -0,0 +1,95 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/TrieSpline.h"
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::TrieSpline<Rec> Shard;
+typedef de::rc::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(8000, 12001, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp
new file mode 100644
index 0000000..4511350
--- /dev/null
+++ b/benchmarks/vldb/ts_bsm_bench.cpp
@@ -0,0 +1,95 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/TrieSpline.h"
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::TrieSpline<Rec> Shard;
+typedef de::rc::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1, 12001, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp
new file mode 100644
index 0000000..cc0cd99
--- /dev/null
+++ b/benchmarks/vldb/ts_mdsp_bench.cpp
@@ -0,0 +1,70 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "triespline_bsm.h"
+#include "psu-util/bentley-saxe.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "query/rangecount.h"
+#include "psu-util/timer.h"
+#include "standard_benchmarks.h"
+
+typedef std::pair<uint64_t, uint64_t> Rec;
+typedef de::Record<uint64_t, uint64_t> FRec;
+
+typedef BSMTrieSpline<uint64_t, uint64_t> Shard;
+typedef de::rc::Parms<FRec> QP;
+typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>();
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n);
+ auto queries = read_range_queries<QP>(q_fname, .0001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ insert_records<Shard, Rec, true>(extension, 0, warmup, data);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Shard, Rec, true>(extension, warmup, data.size(), data);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP, true>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp
new file mode 100644
index 0000000..2c9412a
--- /dev/null
+++ b/benchmarks/vldb/ts_parmsweep.cpp
@@ -0,0 +1,124 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/TrieSpline.h"
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::TrieSpline<Rec> Shard;
+typedef de::rc::Query<Rec, Shard, true> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .001);
+
+ const std::vector<de::LayoutPolicy> policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING};
+ const std::vector<size_t> buffer_sizes = {1000, 4000, 8000, 12000, 15000, 20000};
+ const std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12};
+
+ for (const auto &bs : buffer_sizes) {
+ for (const auto &sf : scale_factors) {
+ auto extension = new Ext(bs, bs, sf, 0, 64);
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size);
+ delete extension;
+ }
+ }
+
+ for (const auto &bs : buffer_sizes) {
+ for (const auto &sf : scale_factors) {
+ auto extension = new Ext2(bs, bs, sf, 0, 64);
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext2, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext2, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext2, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size);
+ delete extension;
+ }
+ }
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp
new file mode 100644
index 0000000..0b98a52
--- /dev/null
+++ b/benchmarks/vldb/vptree_bench.cpp
@@ -0,0 +1,102 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1400, 1400, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_knn_queries<QP>(q_fname, 1000);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ fprintf(stderr, "Running Static query tests\n\n");
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp
new file mode 100644
index 0000000..b09ee7d
--- /dev/null
+++ b/benchmarks/vldb/vptree_bench_alt.cpp
@@ -0,0 +1,102 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1400, 1400, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ fprintf(stderr, "Running Static query tests\n\n");
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp
new file mode 100644
index 0000000..4a7fcb6
--- /dev/null
+++ b/benchmarks/vldb/vptree_bsm_bench.cpp
@@ -0,0 +1,102 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1, 1400, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_knn_queries<QP>(q_fname, 1000);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ fprintf(stderr, "Running Static query tests\n\n");
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
new file mode 100644
index 0000000..63baf8b
--- /dev/null
+++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
@@ -0,0 +1,92 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1, 1400, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t\t%ld\n", insert_throughput, query_latency, ext_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/vldb/vptree_parmsweep.cpp b/benchmarks/vldb/vptree_parmsweep.cpp
new file mode 100644
index 0000000..2cbd521
--- /dev/null
+++ b/benchmarks/vldb/vptree_parmsweep.cpp
@@ -0,0 +1,129 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_knn_queries<QP>(q_fname, 10);
+
+
+ const std::vector<de::LayoutPolicy> policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING};
+ const std::vector<size_t> buffer_sizes = {100, 400, 800, 1200, 1500, 2000};
+ const std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12};
+
+ for (const auto &bs : buffer_sizes) {
+ for (const auto &sf : scale_factors) {
+ auto extension = new Ext(bs, bs, sf, 0, 64);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size);
+ delete extension;
+ }
+ }
+
+ for (const auto &bs : buffer_sizes) {
+ for (const auto &sf : scale_factors) {
+ auto extension = new Ext2(bs, bs, sf, 0, 64);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext2, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext2, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext2, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size);
+ delete extension;
+ }
+ }
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp
index caba8ff..c56fc63 100644
--- a/benchmarks/watermark_testing.cpp
+++ b/benchmarks/watermark_testing.cpp
@@ -5,15 +5,18 @@
#define ENABLE_TIMER
#include "framework/DynamicExtension.h"
-#include "shard/ISAMTree.h"
+#include "shard/TrieSpline.h"
#include "query/rangequery.h"
#include "framework/interface/Record.h"
#include "psu-util/timer.h"
+#include <algorithm>
+#include <random>
-typedef de::Record<int64_t, int64_t> Rec;
-typedef de::ISAMTree<Rec> ISAM;
+typedef uint64_t K;
+typedef de::Record<K, K> Rec;
+typedef de::TrieSpline<Rec> ISAM;
typedef de::rq::Query<Rec, ISAM> Q;
typedef de::DynamicExtension<Rec, ISAM, Q> Ext;
@@ -23,7 +26,17 @@ int main(int argc, char **argv) {
std::vector hwms = {5000l, 10000l, 20000l, 50000l};
std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9};
- size_t n = 1000000;
+ size_t n = 1000000000;
+
+ std::vector<K> keys(n);
+ for (K i=0; i<n; i++) {
+ keys[i] = i;
+ }
+
+ std::random_device rd;
+ std::mt19937 g(rd());
+
+ std::shuffle(keys.begin(), keys.end(), g);
TIMER_INIT();
@@ -33,8 +46,8 @@ int main(int argc, char **argv) {
auto extension = new Ext(lwm, hwm, 8);
TIMER_START();
- for (int64_t i=0; i<n; i++) {
- Rec r = {i, i};
+ for (size_t i=0; i<n; i++) {
+ Rec r = {keys[i], keys[i]};
while (!extension->insert(r)) {
_mm_pause();
}
diff --git a/benchmarks/watermark_testing_knn.cpp b/benchmarks/watermark_testing_knn.cpp
new file mode 100644
index 0000000..7cea594
--- /dev/null
+++ b/benchmarks/watermark_testing_knn.cpp
@@ -0,0 +1,61 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+
+#include "psu-util/timer.h"
+
+constexpr size_t D = 100;
+
+typedef de::EuclidPoint<int64_t, D> Rec;
+typedef de::VPTree<Rec> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q> Ext;
+
+int main(int argc, char **argv) {
+ std::vector hwms = {1000l, 2000l, 4000l, 10000l};
+ std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9};
+
+ size_t n = 1000000;
+
+ std::vector<Rec> records(n);
+ for (size_t i=0; i<n; i++) {
+ Rec r;
+ for (size_t j=0; j<D; j++) {
+ r.data[j] = rand() % n;
+ }
+ }
+
+ TIMER_INIT();
+
+ for (auto &hwm : hwms) {
+ for (size_t i=0; i<lwms.size(); i++) {
+ size_t lwm = hwm * lwms[i];
+
+ auto extension = new Ext(lwm, hwm, 8);
+ TIMER_START();
+ for (int64_t i=0; i<n; i++) {
+ while (!extension->insert(records[i])) {
+ _mm_pause();
+ }
+ }
+ TIMER_STOP();
+
+ auto insert_time = TIMER_RESULT();
+ double insert_throughput = (double) n / (double) insert_time * 1e9;
+
+ fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput);
+ extension->print_scheduler_statistics();
+
+ fflush(stdout);
+ delete extension;
+ }
+ }
+}
+