From 0bb5b46ec2b64be17f6269631915e62d02e315e4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 12:27:43 -0400 Subject: Added plain BSM and MDSP BSM benchmark --- benchmarks/include/file_util.h | 16 ++++ benchmarks/include/standard_benchmarks.h | 23 +++++- benchmarks/include/triespline_bsm.h | 134 +++++++++++++++++++++++++++++++ benchmarks/ts_bsm_bench.cpp | 70 ++++++++++++++++ benchmarks/ts_mdsp_bench.cpp | 70 ++++++++++++++++ 5 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 benchmarks/include/triespline_bsm.h create mode 100644 benchmarks/ts_bsm_bench.cpp create mode 100644 benchmarks/ts_mdsp_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 2a3300a..ea8b42e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -97,6 +97,22 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { return records; } +template +static std::vector> read_sosd_file_pair(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector> records(n); + for (size_t i=0; i +template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; iquery(q); - auto r = res.get(); + if constexpr (!BSM) { + auto r = res.get(); + } } } @@ -47,6 +50,22 @@ static void run_static_queries(S *shard, std::vector &queries) { } +/* + * Insert records into a standard Bentley-Saxe extension. Deletes are not + * supported. + */ +template +static void insert_records(psudb::bsm::BentleySaxe *extension, + size_t start, size_t stop, std::vector &records) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i]); + } + + psudb::progress_update(1, "Insert Progress"); +} template diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h new file mode 100644 index 0000000..9613a62 --- /dev/null +++ b/benchmarks/include/triespline_bsm.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include "ts/builder.h" + +template +class BSMTrieSpline { +private: + typedef std::pair R; + +public: + struct RangeQueryParameters { + K lower_bound; + K upper_bound; + }; + +public: + static BSMTrieSpline *build(std::vector &records) { + std::sort(records.begin(), records.end()); + return new BSMTrieSpline(records); + } + + static BSMTrieSpline *build_presorted(std::vector &records) { + return new BSMTrieSpline(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMTrieSpline::RangeQueryParameters*) q; + + size_t idx = lower_bound(parms->lower_bound); + + while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + rs.emplace_back(m_data[idx++]); + } + + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb) { + rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + return std::move(rsa); + } + + size_t record_count() { + return m_data.size(); + } + + + ~BSMTrieSpline() = default; + + +private: + std::vector m_data; + K m_max_key; + K m_min_key; + ts::TrieSpline m_ts; + + BSMTrieSpline(std::vector &records) { + m_data = std::move(records); + m_min_key = m_data[0].first; + m_max_key = m_data[m_data.size() - 1].first; + + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; i 1) { + m_ts = bldr.Finalize(); + } + } + + size_t lower_bound(K key) { + if (m_data.size() == 0) { + return 1; + } else if (m_data.size() == 1) { + if (m_data[0].first < key) { + return 1; + } else { + return 0; + } + } + + auto bound = m_ts.GetSearchBound(key); + size_t idx = bound.begin; + + if (idx >= m_data.size()) { + return m_data.size(); + } + + // If the region to search is less than some pre-specified + // amount, perform a linear scan to locate the record. + if (bound.end - bound.begin < 256) { + while (idx < bound.end && m_data[idx].first < key) { + idx++; + } + } else { + // Otherwise, perform a binary search + idx = bound.begin; + size_t max = bound.end; + + while (idx < max) { + size_t mid = (idx + max) / 2; + if (key > m_data[mid].first) { + idx = mid + 1; + } else { + max = mid; + } + } + } + + if (idx == m_data.size()) { + return m_data.size(); + } + + if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) { + return idx-1; + } + + return idx; + } +}; diff --git a/benchmarks/ts_bsm_bench.cpp b/benchmarks/ts_bsm_bench.cpp new file mode 100644 index 0000000..366abce --- /dev/null +++ b/benchmarks/ts_bsm_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "include/triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "include/standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/ts_mdsp_bench.cpp b/benchmarks/ts_mdsp_bench.cpp new file mode 100644 index 0000000..5e5001d --- /dev/null +++ b/benchmarks/ts_mdsp_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "include/triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "include/standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3