summaryrefslogtreecommitdiffstats
path: root/benchmarks/include
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-06-13 13:56:17 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-06-13 13:56:17 -0400
commit37304a11b8edfef8fc627e7a3a095e77e70b07b1 (patch)
tree945d2eefd5b45cb8bed97bea089b0cfd80b47edf /benchmarks/include
parent8463210dfac049cafd2334bc489eb59dff1b9d9d (diff)
downloaddynamic-extension-37304a11b8edfef8fc627e7a3a095e77e70b07b1.tar.gz
Benchmark refactoring
Diffstat (limited to 'benchmarks/include')
-rw-r--r--benchmarks/include/bench.h67
-rw-r--r--benchmarks/include/bench_utility.h236
2 files changed, 303 insertions, 0 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h
new file mode 100644
index 0000000..d82da48
--- /dev/null
+++ b/benchmarks/include/bench.h
@@ -0,0 +1,67 @@
+#include "bench_utility.h"
+
+template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
+static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
+ double delete_prop, std::vector<R> &to_delete) {
+
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete);
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ if constexpr (PROGRESS) {
+ progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
+ de_index.erase(delete_vec[delete_idx++]);
+ applied_deletes++;
+ }
+
+ // insert the record;
+ de_index.insert(insert_vec[i]);
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ if constexpr (PROGRESS) {
+ progress_update(1.0, "inserting:");
+ }
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "\n%ld\n", throughput);
+ reset_de_perf_metrics();
+
+ return continue_benchmark;
+}
+
+
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
new file mode 100644
index 0000000..78d6415
--- /dev/null
+++ b/benchmarks/include/bench_utility.h
@@ -0,0 +1,236 @@
+#ifndef H_BENCH
+#define H_BENCH
+#include "framework/DynamicExtension.h"
+#include "shard/WSS.h"
+#include "shard/MemISAM.h"
+#include "shard/PGM.h"
+#include "shard/TrieSpline.h"
+#include "shard/WIRS.h"
+
+#include <cstdlib>
+#include <cstdio>
+#include <chrono>
+#include <algorithm>
+#include <numeric>
+#include <memory>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <unordered_set>
+#include <set>
+#include <string>
+#include <random>
+
+typedef uint64_t key_type;
+typedef uint32_t value_type;
+typedef uint64_t weight_type;
+
+typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
+typedef de::Record<key_type, value_type> Rec;
+
+typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
+typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTS;
+typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGM;
+
+static gsl_rng *g_rng;
+static std::set<WRec> *g_to_delete;
+static bool g_osm_data;
+
+static key_type g_min_key = UINT64_MAX;
+static key_type g_max_key = 0;
+
+static size_t g_max_record_cnt = 0;
+static size_t g_reccnt = 0;
+
+static constexpr unsigned int DEFAULT_SEED = 0;
+
+static unsigned int get_random_seed()
+{
+ unsigned int seed = 0;
+ std::fstream urandom;
+ urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
+ urandom.read((char *) &seed, sizeof(seed));
+ urandom.close();
+
+ return seed;
+}
+
+static key_type osm_to_key(const char *key_field) {
+ double tmp_key = (atof(key_field) + 180) * 10e6;
+ return (key_type) tmp_key;
+}
+
+static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
+{
+ g_rng = gsl_rng_alloc(type);
+ gsl_rng_set(g_rng, seed);
+}
+
+static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
+{
+ unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
+ init_bench_rng(seed, gsl_rng_mt19937);
+ g_to_delete = new std::set<WRec>();
+ g_osm_data = osm_correction;
+ g_max_record_cnt = max_reccnt;
+ g_reccnt = 0;
+}
+
+static void delete_bench_env()
+{
+ gsl_rng_free(g_rng);
+ delete g_to_delete;
+}
+
+template <de::RecordInterface R>
+static bool next_record(std::fstream &file, R &record)
+{
+ if (g_reccnt >= g_max_record_cnt) return false;
+
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ std::string key_field;
+ std::string value_field;
+ std::string weight_field;
+
+ std::getline(line_stream, value_field, '\t');
+ std::getline(line_stream, key_field, '\t');
+ std::getline(line_stream, weight_field, '\t');
+
+ record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
+ record.value = atol(value_field.c_str());
+
+ if constexpr (de::WeightedRecordInterface<R>) {
+ record.weight = atof(weight_field.c_str());
+ }
+
+ if (record.key < g_min_key) g_min_key = record.key;
+
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+}
+
+template <de::RecordInterface R>
+static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
+ double delete_prop, std::vector<R> &to_delete) {
+ vec.clear();
+ for (size_t i=0; i<n; i++) {
+ R rec;
+ if (!next_record(file, rec)) {
+ if (i == 0) {
+ return false;
+ }
+
+ break;
+ }
+
+ vec.emplace_back(rec);
+
+ if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) {
+ to_delete.emplace_back(rec);
+ }
+ }
+
+ return true;
+}
+
+template <de::RecordInterface R>
+static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
+ vec.clear();
+
+ size_t cnt = 0;
+ while (cnt < n) {
+ if (to_delete.size() == 0) {
+ return false;
+ }
+
+ auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
+ vec.emplace_back(to_delete[i]);
+ to_delete.erase(to_delete.begin() + i);
+ }
+td:
+ return true;
+}
+
+/*
+ * helper routines for displaying progress bars to stderr
+ */
+static const char *g_prog_bar = "======================================================================";
+static const size_t g_prog_width = 50;
+
+static void progress_update(double percentage, std::string prompt) {
+ int val = (int) (percentage * 100);
+ int lpad = (int) (percentage * g_prog_width);
+ int rpad = (int) (g_prog_width - lpad);
+ fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, "");
+ fflush(stderr);
+
+ if (percentage >= 1) fprintf(stderr, "\n");
+}
+
+template <typename DE, de::RecordInterface R>
+static bool warmup(std::fstream &file, DE &extended_index, size_t count,
+ double delete_prop, std::vector<R> to_delete, bool progress=true) {
+ size_t batch = std::min(.1 * count, 25000.0);
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(batch);
+ delete_vec.reserve(batch*delete_prop);
+
+ size_t inserted = 0;
+ size_t delete_idx = 0;
+
+ double last_percent = 0;
+ while (inserted < count) {
+ // Build vector of records to insert and potentially delete
+ auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete);
+ if (inserted > batch) {
+ build_delete_vec(to_delete, delete_vec, batch*delete_prop);
+ delete_idx = 0;
+ }
+
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
+ extended_index.erase(delete_vec[delete_idx++]);
+ }
+
+ // insert the record;
+ extended_index.insert(insert_vec[i]);
+ inserted++;
+
+ if (progress) {
+ progress_update((double) inserted / (double) count, "warming up:");
+ }
+ }
+ }
+
+ /*
+ if (progress) {
+ progress_update(1, "warming up:");
+ }
+ */
+
+ return true;
+}
+
+
+static void reset_de_perf_metrics() {
+
+ /*
+ * rejection counters are zeroed automatically by the
+ * sampling function itself.
+ */
+
+ RESET_IO_CNT();
+}
+
+#endif // H_BENCH