summaryrefslogtreecommitdiffstats
path: root/benchmarks/include
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-01-30 15:31:03 -0500
committerDouglas Rumbaugh <dbr4@psu.edu>2024-01-30 15:31:03 -0500
commitf24fdf2fd310a5f868e15cd9682ca37d740c77af (patch)
treeb637160ce464dc05104e4ce2968e0568f550567c /benchmarks/include
parent4aa907d6275b1b74be87ed2f2e94d8a2719a6a97 (diff)
downloaddynamic-extension-f24fdf2fd310a5f868e15cd9682ca37d740c77af.tar.gz
Benchmarking updates
Diffstat (limited to 'benchmarks/include')
-rw-r--r--benchmarks/include/bench.h162
-rw-r--r--benchmarks/include/bench_utility.h181
-rw-r--r--benchmarks/include/btree-util.h27
-rw-r--r--benchmarks/include/data-proc.h (renamed from benchmarks/include/standalone_utility.h)30
4 files changed, 33 insertions, 367 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h
deleted file mode 100644
index 586ff12..0000000
--- a/benchmarks/include/bench.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * benchmarks/include/bench.h
- *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-#include "bench_utility.h"
-
-template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
-static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
- double delete_prop, std::vector<R> &to_delete, bool binary=false) {
-
- size_t delete_cnt = insert_cnt * delete_prop;
-
- size_t applied_deletes = 0;
- size_t applied_inserts = 0;
-
- std::vector<R> insert_vec;
- std::vector<R> delete_vec;
- insert_vec.reserve(BATCH);
- delete_vec.reserve(BATCH*delete_prop);
-
- size_t delete_idx = 0;
-
- bool continue_benchmark = true;
-
- size_t total_time = 0;
-
- while (applied_inserts < insert_cnt && continue_benchmark) {
- continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
- if (applied_deletes < delete_cnt) {
- build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
- delete_idx = 0;
- }
-
- if (insert_vec.size() == 0) {
- break;
- }
-
- if constexpr (PROGRESS) {
- progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
- }
-
- auto insert_start = std::chrono::high_resolution_clock::now();
- for (size_t i=0; i<insert_vec.size(); i++) {
- // process a delete if necessary
- if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
- if constexpr (std::is_same_v<TreeMap, DE>) {
- de_index.erase_one(delete_vec[delete_idx++].key);
- } else if constexpr (std::is_same_v<MTree, DE>) {
- de_index.remove(delete_vec[delete_idx++]);
- } else {
- de_index.erase(delete_vec[delete_idx++]);
- }
- applied_deletes++;
- }
-
- // insert the record;
- if constexpr (std::is_same_v<MTree, DE>) {
- de_index.add(insert_vec[i]);
- } else {
- de_index.insert(insert_vec[i]);
- }
- applied_inserts++;
- }
- auto insert_stop = std::chrono::high_resolution_clock::now();
-
- total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
- }
-
- if constexpr (PROGRESS) {
- progress_update(1.0, "inserting:");
- }
-
- size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
-
- fprintf(stdout, "%ld\t", throughput);
- reset_de_perf_metrics();
-
- return continue_benchmark;
-}
-
-template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true>
-static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) {
- char progbuf[25];
- if constexpr (PROGRESS) {
- sprintf(progbuf, "querying:");
- }
-
- size_t total_time = 0;
- size_t total_results = 0;
-
- for (size_t i=0; i<trial_cnt; i++) {
- if constexpr (PROGRESS) {
- progress_update((double) (i) / (double) trial_cnt, progbuf);
- }
-
- auto start = std::chrono::high_resolution_clock::now();
- for (size_t j=0; j<queries.size(); j++) {
- auto res = de_index.query(&queries[j]);
-
- total_results += res.size();
- }
- auto stop = std::chrono::high_resolution_clock::now();
-
- total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
- }
-
- progress_update(1.0, progbuf);
-
- size_t query_latency = total_time / (trial_cnt * queries.size());
-
- fprintf(stdout, "%ld\t", query_latency);
- fflush(stdout);
-
- return true;
-}
-
-
-template <typename Shard, de::RecordInterface R, typename QP, QueryInterface Q, bool PROGRESS=true>
-static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) {
- char progbuf[25];
- if constexpr (PROGRESS) {
- sprintf(progbuf, "querying:");
- }
-
- size_t total_time = 0;
- size_t total_results = 0;
-
- for (size_t i=0; i<trial_cnt; i++) {
- if constexpr (PROGRESS) {
- progress_update((double) (i) / (double) trial_cnt, progbuf);
- }
-
- std::vector<void *> states(1);
-
- auto start = std::chrono::high_resolution_clock::now();
- for (size_t j=0; j<queries.size(); j++) {
- states[0] = Q::get_query_state(shard, &queries[j]);
- Q::process_query_states(&queries[j], states, nullptr);
- auto res = Q::query(shard, states[0], &queries[j]);
- total_results += res.size();
- Q::delete_query_state(states[0]);
- }
- auto stop = std::chrono::high_resolution_clock::now();
-
- total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
- }
-
- progress_update(1.0, progbuf);
-
- size_t query_latency = total_time / (trial_cnt * queries.size());
-
- fprintf(stdout, "%ld\t", query_latency);
- fflush(stdout);
-
- return true;
-}
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
deleted file mode 100644
index e33b93d..0000000
--- a/benchmarks/include/bench_utility.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- * benchmarks/include/bench_utility.h
- *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-#include "framework/DynamicExtension.h"
-#include "shard/WSS.h"
-#include "shard/MemISAM.h"
-#include "shard/PGM.h"
-#include "shard/TrieSpline.h"
-#include "shard/WIRS.h"
-#include "ds/BTree.h"
-#include "shard/VPTree.h"
-#include "mtree.h"
-#include "standalone_utility.h"
-
-#include <cstdlib>
-#include <cstdio>
-#include <chrono>
-#include <algorithm>
-#include <numeric>
-#include <memory>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <unordered_set>
-#include <set>
-#include <string>
-#include <random>
-
-typedef uint64_t key_type;
-typedef uint64_t value_type;
-typedef uint64_t weight_type;
-
-typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
-typedef de::Record<key_type, value_type> Rec;
-
-const size_t W2V_SIZE = 300;
-typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec;
-
-typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
-typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ;
-typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGMRQ;
-typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMPointLookup<Rec>> ExtendedPGM_PL;
-typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedISAM_IRS;
-typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ;
-typedef de::DynamicExtension<Word2VecRec, de::VPTree<Word2VecRec>, de::KNNQuery<Word2VecRec>> ExtendedVPTree_KNN;
-
-struct euclidean_distance {
- double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
- double dist = 0;
- for (size_t i=0; i<W2V_SIZE; i++) {
- dist += (first.data[i] - second.data[i]) * (first.data[i] - second.data[i]);
- }
-
- return std::sqrt(dist);
- }
-};
-
-
-struct cosine_similarity {
- double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
- double prod = 0;
- double asquared = 0;
- double bsquared = 0;
-
- for (size_t i=0; i<W2V_SIZE; i++) {
- prod += first.data[i] * second.data[i];
- asquared += first.data[i]*first.data[i];
- bsquared += second.data[i]*second.data[i];
- }
-
- return prod / std::sqrt(asquared * bsquared);
- }
-};
-
-typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
-typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
-
-template <de::RecordInterface R>
-static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
- double delete_prop, std::vector<R> &to_delete, bool binary=false) {
- vec.clear();
- for (size_t i=0; i<n; i++) {
- R rec;
- if constexpr (std::is_same_v<R, Word2VecRec>) {
- if (!next_vector_record(file, rec)) {
- if (i == 0) {
- return false;
- }
-
- break;
- }
- } else {
- if (!next_record(file, rec, binary)) {
- if (i == 0) {
- return false;
- }
-
- break;
- }
- }
-
- vec.emplace_back(rec);
-
- if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) {
- to_delete.emplace_back(rec);
- }
- }
-
- return true;
-}
-
-
-template <typename DE, de::RecordInterface R>
-static bool warmup(std::fstream &file, DE &extended_index, size_t count,
- double delete_prop, std::vector<R> to_delete, bool progress=true, bool binary=false) {
- size_t batch = std::min(.1 * count, 25000.0);
-
- std::vector<R> insert_vec;
- std::vector<R> delete_vec;
- insert_vec.reserve(batch);
- delete_vec.reserve(batch*delete_prop);
-
- size_t inserted = 0;
- size_t delete_idx = 0;
-
- double last_percent = 0;
- while (inserted < count) {
- // Build vector of records to insert and potentially delete
- auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary);
- if (inserted > batch) {
- build_delete_vec(to_delete, delete_vec, batch*delete_prop);
- delete_idx = 0;
- }
-
- for (size_t i=0; i<insert_vec.size(); i++) {
- // process a delete if necessary
- if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
- if constexpr (std::is_same_v<TreeMap, DE>) {
- extended_index.erase_one(delete_vec[delete_idx++].key);
- }
- else if constexpr (std::is_same_v<MTree, DE>) {
- extended_index.remove(delete_vec[delete_idx++]);
- } else {
- extended_index.erase(delete_vec[delete_idx++]);
- }
- }
-
- // insert the record;
- if constexpr (std::is_same_v<MTree, DE>) {
- extended_index.add(insert_vec[i]);
- } else {
- extended_index.insert(insert_vec[i]);
- }
- inserted++;
-
- if (progress) {
- progress_update((double) inserted / (double) count, "warming up:");
- }
- }
- }
-
- return true;
-}
-
-
-static void reset_de_perf_metrics() {
-
- /*
- * rejection counters are zeroed automatically by the
- * sampling function itself.
- */
-
- RESET_IO_CNT();
-}
diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h
new file mode 100644
index 0000000..571c073
--- /dev/null
+++ b/benchmarks/include/btree-util.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <cstdlib>
+#include "psu-ds/BTree.h"
+
+struct btree_record {
+ int64_t key;
+ int64_t value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+struct btree_key_extract {
+ static const int64_t &get(const btree_record &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
+
+
diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/include/data-proc.h
index 9876e84..f758ed4 100644
--- a/benchmarks/include/standalone_utility.h
+++ b/benchmarks/include/data-proc.h
@@ -1,18 +1,14 @@
#include <cstdlib>
#include <cstdio>
-#include <chrono>
-#include <algorithm>
-#include <numeric>
-#include <memory>
#include <iostream>
#include <fstream>
#include <sstream>
-#include <unordered_set>
-#include <set>
#include <string>
-#include <random>
#include <gsl/gsl_rng.h>
#include <cstring>
+#include <vector>
+
+#include "psu-ds/BTree.h"
typedef uint64_t key_type;
typedef uint64_t value_type;
@@ -40,6 +36,8 @@ struct btree_key_extract {
}
};
+typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
+
static key_type g_min_key = UINT64_MAX;
static key_type g_max_key = 0;
@@ -105,7 +103,7 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity
}
template <typename QP>
-static std::vector<QP> read_range_queries(std::string fname, double selectivity) {
+static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
std::vector<QP> queries;
FILE *qf = fopen(fname.c_str(), "r");
@@ -244,19 +242,3 @@ static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, siz
td:
return true;
}
-
-/*
- * helper routines for displaying progress bars to stderr
- */
-static const char *g_prog_bar = "======================================================================";
-static const size_t g_prog_width = 50;
-
-static void progress_update(double percentage, std::string prompt) {
- int val = (int) (percentage * 100);
- int lpad = (int) (percentage * g_prog_width);
- int rpad = (int) (g_prog_width - lpad);
- fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, "");
- fflush(stderr);
-
- if (percentage >= 1) fprintf(stderr, "\n");
-}