summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-06-13 13:56:17 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-06-13 13:56:17 -0400
commit37304a11b8edfef8fc627e7a3a095e77e70b07b1 (patch)
tree945d2eefd5b45cb8bed97bea089b0cfd80b47edf /benchmarks
parent8463210dfac049cafd2334bc489eb59dff1b9d9d (diff)
downloaddynamic-extension-37304a11b8edfef8fc627e7a3a095e77e70b07b1.tar.gz
Benchmark refactoring
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/include/bench.h67
-rw-r--r--benchmarks/include/bench_utility.h (renamed from benchmarks/bench.h)116
-rw-r--r--benchmarks/sampling_tput.cpp96
3 files changed, 149 insertions, 130 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h
new file mode 100644
index 0000000..d82da48
--- /dev/null
+++ b/benchmarks/include/bench.h
@@ -0,0 +1,67 @@
+#include "bench_utility.h"
+
+template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
+static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
+ double delete_prop, std::vector<R> &to_delete) {
+
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete);
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ if constexpr (PROGRESS) {
+ progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
+ de_index.erase(delete_vec[delete_idx++]);
+ applied_deletes++;
+ }
+
+ // insert the record;
+ de_index.insert(insert_vec[i]);
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ if constexpr (PROGRESS) {
+ progress_update(1.0, "inserting:");
+ }
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "\n%ld\n", throughput);
+ reset_de_perf_metrics();
+
+ return continue_benchmark;
+}
+
+
diff --git a/benchmarks/bench.h b/benchmarks/include/bench_utility.h
index c1457fc..78d6415 100644
--- a/benchmarks/bench.h
+++ b/benchmarks/include/bench_utility.h
@@ -2,6 +2,10 @@
#define H_BENCH
#include "framework/DynamicExtension.h"
#include "shard/WSS.h"
+#include "shard/MemISAM.h"
+#include "shard/PGM.h"
+#include "shard/TrieSpline.h"
+#include "shard/WIRS.h"
#include <cstdlib>
#include <cstdio>
@@ -22,7 +26,11 @@ typedef uint32_t value_type;
typedef uint64_t weight_type;
typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
+typedef de::Record<key_type, value_type> Rec;
+
typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
+typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTS;
+typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGM;
static gsl_rng *g_rng;
static std::set<WRec> *g_to_delete;
@@ -74,12 +82,13 @@ static void delete_bench_env()
delete g_to_delete;
}
-static bool next_record(std::fstream *file, WRec *record)
+template <de::RecordInterface R>
+static bool next_record(std::fstream &file, R &record)
{
if (g_reccnt >= g_max_record_cnt) return false;
std::string line;
- if (std::getline(*file, line, '\n')) {
+ if (std::getline(file, line, '\n')) {
std::stringstream line_stream(line);
std::string key_field;
std::string value_field;
@@ -89,13 +98,16 @@ static bool next_record(std::fstream *file, WRec *record)
std::getline(line_stream, key_field, '\t');
std::getline(line_stream, weight_field, '\t');
- record->key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
- record->value = atol(value_field.c_str());
- record->weight = atof(weight_field.c_str());
+ record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
+ record.value = atol(value_field.c_str());
+
+ if constexpr (de::WeightedRecordInterface<R>) {
+ record.weight = atof(weight_field.c_str());
+ }
- if (record->key < g_min_key) g_min_key = record->key;
+ if (record.key < g_min_key) g_min_key = record.key;
- if (record->key > g_max_key) g_max_key = record->key;
+ if (record.key > g_max_key) g_max_key = record.key;
g_reccnt++;
@@ -105,11 +117,13 @@ static bool next_record(std::fstream *file, WRec *record)
return false;
}
-static bool build_insert_vec(std::fstream *file, std::vector<WRec> &vec, size_t n) {
+template <de::RecordInterface R>
+static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
+ double delete_prop, std::vector<R> &to_delete) {
vec.clear();
for (size_t i=0; i<n; i++) {
- WRec rec;
- if (!next_record(file, &rec)) {
+ R rec;
+ if (!next_record(file, rec)) {
if (i == 0) {
return false;
}
@@ -118,11 +132,33 @@ static bool build_insert_vec(std::fstream *file, std::vector<WRec> &vec, size_t
}
vec.emplace_back(rec);
+
+ if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) {
+ to_delete.emplace_back(rec);
+ }
}
return true;
}
+template <de::RecordInterface R>
+static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
+ vec.clear();
+
+ size_t cnt = 0;
+ while (cnt < n) {
+ if (to_delete.size() == 0) {
+ return false;
+ }
+
+ auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
+ vec.emplace_back(to_delete[i]);
+ to_delete.erase(to_delete.begin() + i);
+ }
+td:
+ return true;
+}
+
/*
* helper routines for displaying progress bars to stderr
*/
@@ -139,55 +175,49 @@ static void progress_update(double percentage, std::string prompt) {
if (percentage >= 1) fprintf(stderr, "\n");
}
-static bool warmup(std::fstream *file, ExtendedWSS *extended_wss, size_t count, double delete_prop, bool progress=true)
-{
- size_t del_buf_size = 10000;
- size_t delete_idx = del_buf_size;
-
- std::vector<WRec> delbuf;
- std::set<WRec> deleted_keys;
+template <typename DE, de::RecordInterface R>
+static bool warmup(std::fstream &file, DE &extended_index, size_t count,
+ double delete_prop, std::vector<R> to_delete, bool progress=true) {
+ size_t batch = std::min(.1 * count, 25000.0);
- de::wss_query_parms<WRec> parms;
- parms.rng = g_rng;
- parms.sample_size = del_buf_size;
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(batch);
+ delete_vec.reserve(batch*delete_prop);
size_t inserted = 0;
+ size_t delete_idx = 0;
double last_percent = 0;
- for (size_t i=0; i<count; i++) {
- WRec rec;
- if (!next_record(file, &rec)) {
- return false;
- }
-
- inserted++;
- extended_wss->insert(rec);
-
- if (delete_prop > 0 && i > extended_wss->get_buffer_capacity() && delete_idx >= delbuf.size()) {
- delbuf = extended_wss->query(&parms);
+ while (inserted < count) {
+ // Build vector of records to insert and potentially delete
+ auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete);
+ if (inserted > batch) {
+ build_delete_vec(to_delete, delete_vec, batch*delete_prop);
delete_idx = 0;
- deleted_keys.clear();
}
- if (delete_prop > 0 && i > extended_wss->get_buffer_capacity() && gsl_rng_uniform(g_rng) < delete_prop && delete_idx < delbuf.size()) {
- auto rec = delbuf[delete_idx];
- delete_idx++;
-
- if (deleted_keys.find(rec) == deleted_keys.end()) {
- extended_wss->erase(rec);
- deleted_keys.insert(rec);
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
+ extended_index.erase(delete_vec[delete_idx++]);
}
- }
- if (progress && ((double) i / (double) count) - last_percent > .01) {
- progress_update((double) i / (double) count, "warming up:");
- last_percent = (double) i / (double) count;
+ // insert the record;
+ extended_index.insert(insert_vec[i]);
+ inserted++;
+
+ if (progress) {
+ progress_update((double) inserted / (double) count, "warming up:");
+ }
}
}
+ /*
if (progress) {
progress_update(1, "warming up:");
}
+ */
return true;
}
diff --git a/benchmarks/sampling_tput.cpp b/benchmarks/sampling_tput.cpp
index b25b15f..70ff0cb 100644
--- a/benchmarks/sampling_tput.cpp
+++ b/benchmarks/sampling_tput.cpp
@@ -1,86 +1,6 @@
-#include "bench.h"
+#include "include/bench.h"
-size_t g_insert_batch_size = 1000;
-
-static bool insert_benchmark(ExtendedWSS *tree, std::fstream *file,
- size_t insert_cnt, double delete_prop) {
-
- size_t delete_cnt = insert_cnt * delete_prop;
- size_t delete_batch_size = g_insert_batch_size * delete_prop * 15;
- size_t delete_idx = delete_batch_size;
-
- std::vector<WRec> delbuf;
-
- std::set<WRec> deleted;
-
- size_t applied_deletes = 0;
- size_t applied_inserts = 0;
-
- std::vector<WRec> insert_vec;
- insert_vec.reserve(g_insert_batch_size);
- bool continue_benchmark = true;
-
- size_t total_time = 0;
-
- de::wss_query_parms<WRec> parms;
- parms.rng = g_rng;
- parms.sample_size = delete_batch_size;
-
- while (applied_inserts < insert_cnt && continue_benchmark) {
- continue_benchmark = build_insert_vec(file, insert_vec, g_insert_batch_size);
-
- if (insert_vec.size() == 0) {
- break;
- }
-
- // if we've fully processed the delete vector, sample a new
- // set of records to delete.
- if (delete_idx >= delbuf.size()) {
- delbuf = tree->query(&parms);
- deleted.clear();
- delete_idx = 0;
- }
-
- progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
- size_t local_inserted = 0;
- size_t local_deleted = 0;
-
- auto insert_start = std::chrono::high_resolution_clock::now();
- for (size_t i=0; i<insert_vec.size(); i++) {
- // process a delete if necessary
- if (applied_deletes < delete_cnt && delete_idx < delete_batch_size && gsl_rng_uniform(g_rng) < delete_prop) {
- auto rec = delbuf[delete_idx];
- delete_idx++;
-
- if (deleted.find(rec) == deleted.end()) {
- tree->erase(rec);
- deleted.insert(rec);
- local_deleted++;
- }
- }
-
- // insert the record;
- tree->insert(insert_vec[i]);
- local_inserted++;
- }
- auto insert_stop = std::chrono::high_resolution_clock::now();
-
- applied_deletes += local_deleted;
- applied_inserts += local_inserted;
- total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
- }
-
- progress_update(1.0, "inserting:");
- size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
-
- fprintf(stdout, "\n%ld\n", throughput);
- reset_de_perf_metrics();
-
- return continue_benchmark;
-}
-
-
-static void sample_benchmark(ExtendedWSS *tree, size_t k, size_t trial_cnt)
+static void sample_benchmark(ExtendedWSS &de_wss, size_t k, size_t trial_cnt)
{
char progbuf[25];
sprintf(progbuf, "sampling (%ld):", k);
@@ -101,7 +21,7 @@ static void sample_benchmark(ExtendedWSS *tree, size_t k, size_t trial_cnt)
progress_update((double) (i * batch_size) / (double) trial_cnt, progbuf);
auto start = std::chrono::high_resolution_clock::now();
for (int j=0; j < batch_size; j++) {
- auto res = tree->query(&parms);
+ auto res = de_wss.query(&parms);
total_samples += res.size();
}
auto stop = std::chrono::high_resolution_clock::now();
@@ -137,20 +57,22 @@ int main(int argc, char **argv)
init_bench_env(record_count, true, use_osm);
- auto sampling_lsm = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop);
+ auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop);
std::fstream datafile;
datafile.open(filename, std::ios::in);
+ std::vector<WRec> to_delete;
+
// warm up the tree with initial_insertions number of initially inserted
// records
size_t warmup_cnt = insert_batch * record_count;
- warmup(&datafile, &sampling_lsm, warmup_cnt, delete_prop);
+ warmup<ExtendedWSS, WRec>(datafile, de_wss, warmup_cnt, delete_prop, to_delete);
size_t insert_cnt = record_count - warmup_cnt;
- insert_benchmark(&sampling_lsm, &datafile, insert_cnt, delete_prop);
- sample_benchmark(&sampling_lsm, 1000, 10000);
+ insert_tput_bench<ExtendedWSS, WRec>(de_wss, datafile, insert_cnt, delete_prop, to_delete);
+ sample_benchmark(de_wss, 1000, 10000);
delete_bench_env();
fflush(stdout);