summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-04-22 12:27:43 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2024-04-22 12:27:43 -0400
commit0bb5b46ec2b64be17f6269631915e62d02e315e4 (patch)
treea45733b510f27a7a0b59f50b983a38ceb9bfa178 /benchmarks
parent735d397513bc0160ba9ecb17c32c4441ed125f52 (diff)
downloaddynamic-extension-0bb5b46ec2b64be17f6269631915e62d02e315e4.tar.gz
Added plain BSM and MDSP BSM benchmark
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/include/file_util.h16
-rw-r--r--benchmarks/include/standard_benchmarks.h23
-rw-r--r--benchmarks/include/triespline_bsm.h134
-rw-r--r--benchmarks/ts_bsm_bench.cpp70
-rw-r--r--benchmarks/ts_mdsp_bench.cpp70
5 files changed, 311 insertions, 2 deletions
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
index 2a3300a..ea8b42e 100644
--- a/benchmarks/include/file_util.h
+++ b/benchmarks/include/file_util.h
@@ -97,6 +97,22 @@ static std::vector<R> read_sosd_file(std::string &fname, size_t n) {
return records;
}
+template<typename K, typename V>
+static std::vector<std::pair<K, V>> read_sosd_file_pair(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ std::vector<std::pair<K,V>> records(n);
+ for (size_t i=0; i<n; i++) {
+ uint64_t k;
+ file.read((char*) &(k), sizeof(uint64_t));
+ records[i].first = k;
+ records[i].second = i;
+ }
+
+ return records;
+}
+
/*
* This function expects a plaintext file with each vector on its own line.
* There should be D dimensions (or more) for each record, separated by
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
index f5af558..74bf93f 100644
--- a/benchmarks/include/standard_benchmarks.h
+++ b/benchmarks/include/standard_benchmarks.h
@@ -16,17 +16,20 @@
#include "framework/interface/Query.h"
#include "psu-util/progress.h"
#include "benchmark_types.h"
+#include "psu-util/bentley-saxe.h"
static size_t g_deleted_records = 0;
static double delete_proportion = 0.05;
-template<typename DE, typename QP>
+template<typename DE, typename QP, bool BSM=false>
static void run_queries(DE *extension, std::vector<QP> &queries) {
for (size_t i=0; i<queries.size(); i++) {
auto q = &queries[i];
auto res = extension->query(q);
- auto r = res.get();
+ if constexpr (!BSM) {
+ auto r = res.get();
+ }
}
}
@@ -47,6 +50,22 @@ static void run_static_queries(S *shard, std::vector<QP> &queries) {
}
+/*
+ * Insert records into a standard Bentley-Saxe extension. Deletes are not
+ * supported.
+ */
+template<typename DS, typename R, bool MDSP=false>
+static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
+ size_t start, size_t stop, std::vector<R> &records) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+ extension->insert(records[i]);
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
template<typename DE, de::RecordInterface R>
diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h
new file mode 100644
index 0000000..9613a62
--- /dev/null
+++ b/benchmarks/include/triespline_bsm.h
@@ -0,0 +1,134 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include "ts/builder.h"
+
+template <typename K, typename V, size_t E=1024>
+class BSMTrieSpline {
+private:
+ typedef std::pair<K, V> R;
+
+public:
+ struct RangeQueryParameters {
+ K lower_bound;
+ K upper_bound;
+ };
+
+public:
+ static BSMTrieSpline *build(std::vector<R> &records) {
+ std::sort(records.begin(), records.end());
+ return new BSMTrieSpline(records);
+ }
+
+ static BSMTrieSpline *build_presorted(std::vector<R> &records) {
+ return new BSMTrieSpline(records);
+ }
+
+ std::vector<R> unbuild() {
+ return std::move(m_data);
+ }
+
+ std::vector<R> query(void *q) {
+ std::vector<R> rs;
+
+ /* return an empty result set if q is invalid */
+ if (q == nullptr) {
+ return rs;
+ }
+
+ auto parms = (BSMTrieSpline::RangeQueryParameters*) q;
+
+ size_t idx = lower_bound(parms->lower_bound);
+
+ while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) {
+ rs.emplace_back(m_data[idx++]);
+ }
+
+ return std::move(rs);
+ }
+
+ std::vector<R> query_merge(std::vector<R> &rsa, std::vector<R> &rsb) {
+ rsa.insert(rsa.end(), rsb.begin(), rsb.end());
+ return std::move(rsa);
+ }
+
+ size_t record_count() {
+ return m_data.size();
+ }
+
+
+ ~BSMTrieSpline() = default;
+
+
+private:
+ std::vector<R> m_data;
+ K m_max_key;
+ K m_min_key;
+ ts::TrieSpline<K> m_ts;
+
+ BSMTrieSpline(std::vector<R> &records) {
+ m_data = std::move(records);
+ m_min_key = m_data[0].first;
+ m_max_key = m_data[m_data.size() - 1].first;
+
+ auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
+ for (size_t i=0; i<m_data.size(); i++) {
+ bldr.AddKey(m_data[i].first);
+ }
+
+ if (m_data.size() > 1) {
+ m_ts = bldr.Finalize();
+ }
+ }
+
+ size_t lower_bound(K key) {
+ if (m_data.size() == 0) {
+ return 1;
+ } else if (m_data.size() == 1) {
+ if (m_data[0].first < key) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ auto bound = m_ts.GetSearchBound(key);
+ size_t idx = bound.begin;
+
+ if (idx >= m_data.size()) {
+ return m_data.size();
+ }
+
+ // If the region to search is less than some pre-specified
+ // amount, perform a linear scan to locate the record.
+ if (bound.end - bound.begin < 256) {
+ while (idx < bound.end && m_data[idx].first < key) {
+ idx++;
+ }
+ } else {
+ // Otherwise, perform a binary search
+ idx = bound.begin;
+ size_t max = bound.end;
+
+ while (idx < max) {
+ size_t mid = (idx + max) / 2;
+ if (key > m_data[mid].first) {
+ idx = mid + 1;
+ } else {
+ max = mid;
+ }
+ }
+ }
+
+ if (idx == m_data.size()) {
+ return m_data.size();
+ }
+
+ if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) {
+ return idx-1;
+ }
+
+ return idx;
+ }
+};
diff --git a/benchmarks/ts_bsm_bench.cpp b/benchmarks/ts_bsm_bench.cpp
new file mode 100644
index 0000000..366abce
--- /dev/null
+++ b/benchmarks/ts_bsm_bench.cpp
@@ -0,0 +1,70 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "include/triespline_bsm.h"
+#include "psu-util/bentley-saxe.h"
+#include "framework/interface/Record.h"
+#include "include/file_util.h"
+#include "query/rangecount.h"
+#include "psu-util/timer.h"
+#include "include/standard_benchmarks.h"
+
+typedef std::pair<uint64_t, uint64_t> Rec;
+typedef de::Record<uint64_t, uint64_t> FRec;
+
+typedef BSMTrieSpline<uint64_t, uint64_t> Shard;
+typedef de::rc::Parms<FRec> QP;
+typedef psudb::bsm::BentleySaxe<Rec, Shard> Ext;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new psudb::bsm::BentleySaxe<Rec, Shard>();
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n);
+ auto queries = read_range_queries<QP>(q_fname, .001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ insert_records<Shard, Rec>(extension, 0, warmup, data);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Shard, Rec>(extension, warmup, data.size(), data);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP, true>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/ts_mdsp_bench.cpp b/benchmarks/ts_mdsp_bench.cpp
new file mode 100644
index 0000000..5e5001d
--- /dev/null
+++ b/benchmarks/ts_mdsp_bench.cpp
@@ -0,0 +1,70 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "include/triespline_bsm.h"
+#include "psu-util/bentley-saxe.h"
+#include "framework/interface/Record.h"
+#include "include/file_util.h"
+#include "query/rangecount.h"
+#include "psu-util/timer.h"
+#include "include/standard_benchmarks.h"
+
+typedef std::pair<uint64_t, uint64_t> Rec;
+typedef de::Record<uint64_t, uint64_t> FRec;
+
+typedef BSMTrieSpline<uint64_t, uint64_t> Shard;
+typedef de::rc::Parms<FRec> QP;
+typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>();
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n);
+ auto queries = read_range_queries<QP>(q_fname, .001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ insert_records<Shard, Rec, true>(extension, 0, warmup, data);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Shard, Rec, true>(extension, warmup, data.size(), data);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP, true>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+