diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-01-30 15:31:03 -0500 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-01-30 15:31:03 -0500 |
| commit | f24fdf2fd310a5f868e15cd9682ca37d740c77af (patch) | |
| tree | b637160ce464dc05104e4ce2968e0568f550567c /benchmarks/include | |
| parent | 4aa907d6275b1b74be87ed2f2e94d8a2719a6a97 (diff) | |
| download | dynamic-extension-f24fdf2fd310a5f868e15cd9682ca37d740c77af.tar.gz | |
Benchmarking updates
Diffstat (limited to 'benchmarks/include')
| -rw-r--r-- | benchmarks/include/bench.h | 162 | ||||
| -rw-r--r-- | benchmarks/include/bench_utility.h | 181 | ||||
| -rw-r--r-- | benchmarks/include/btree-util.h | 27 | ||||
| -rw-r--r-- | benchmarks/include/data-proc.h (renamed from benchmarks/include/standalone_utility.h) | 30 |
4 files changed, 33 insertions, 367 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h deleted file mode 100644 index 586ff12..0000000 --- a/benchmarks/include/bench.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * benchmarks/include/bench.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "bench_utility.h" - -template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000> -static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, - double delete_prop, std::vector<R> &to_delete, bool binary=false) { - - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - std::vector<R> insert_vec; - std::vector<R> delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - if constexpr (PROGRESS) { - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i<insert_vec.size(); i++) { - // process a delete if necessary - if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { - if constexpr (std::is_same_v<TreeMap, DE>) { - de_index.erase_one(delete_vec[delete_idx++].key); - } else if constexpr (std::is_same_v<MTree, DE>) { - de_index.remove(delete_vec[delete_idx++]); - } else { - de_index.erase(delete_vec[delete_idx++]); - } - applied_deletes++; - } - - // insert the record; - if constexpr (std::is_same_v<MTree, DE>) { - de_index.add(insert_vec[i]); - } else { - de_index.insert(insert_vec[i]); - } - applied_inserts++; - } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count(); - } - - if constexpr (PROGRESS) { - progress_update(1.0, "inserting:"); - } - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); - reset_de_perf_metrics(); - - return continue_benchmark; -} - -template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true> -static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i<trial_cnt; i++) { - if constexpr (PROGRESS) { - progress_update((double) (i) / (double) trial_cnt, progbuf); - } - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; j<queries.size(); j++) { - auto res = de_index.query(&queries[j]); - - total_results += res.size(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} - - -template <typename Shard, de::RecordInterface R, typename QP, QueryInterface Q, bool PROGRESS=true> -static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i<trial_cnt; i++) { - if constexpr (PROGRESS) { - progress_update((double) (i) / (double) trial_cnt, progbuf); - } - - std::vector<void *> states(1); - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; j<queries.size(); j++) { - states[0] = Q::get_query_state(shard, &queries[j]); - Q::process_query_states(&queries[j], states, nullptr); - auto res = Q::query(shard, states[0], &queries[j]); - total_results += res.size(); - Q::delete_query_state(states[0]); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h deleted file mode 100644 index e33b93d..0000000 --- a/benchmarks/include/bench_utility.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * benchmarks/include/bench_utility.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "framework/DynamicExtension.h" -#include "shard/WSS.h" -#include "shard/MemISAM.h" -#include "shard/PGM.h" -#include "shard/TrieSpline.h" -#include "shard/WIRS.h" -#include "ds/BTree.h" -#include "shard/VPTree.h" -#include "mtree.h" -#include "standalone_utility.h" - -#include <cstdlib> -#include <cstdio> -#include <chrono> -#include <algorithm> -#include <numeric> -#include <memory> -#include <iostream> -#include <fstream> -#include <sstream> -#include <unordered_set> -#include <set> -#include <string> -#include <random> - -typedef uint64_t key_type; -typedef uint64_t value_type; -typedef uint64_t weight_type; - -typedef de::WeightedRecord<key_type, value_type, weight_type> WRec; -typedef de::Record<key_type, value_type> Rec; - -const size_t W2V_SIZE = 300; -typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec; - -typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS; -typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ; -typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGMRQ; -typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMPointLookup<Rec>> ExtendedPGM_PL; -typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedISAM_IRS; -typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ; -typedef de::DynamicExtension<Word2VecRec, de::VPTree<Word2VecRec>, de::KNNQuery<Word2VecRec>> ExtendedVPTree_KNN; - -struct euclidean_distance { - double operator()(const Word2VecRec &first, const Word2VecRec &second) const { - double dist = 0; - for (size_t i=0; i<W2V_SIZE; i++) { - dist += (first.data[i] - second.data[i]) * (first.data[i] - second.data[i]); - } - - return std::sqrt(dist); - } -}; - - -struct cosine_similarity { - double operator()(const Word2VecRec &first, const Word2VecRec &second) const { - double prod = 0; - double asquared = 0; - double bsquared = 0; - - for (size_t i=0; i<W2V_SIZE; i++) { - prod += first.data[i] * second.data[i]; - asquared += first.data[i]*first.data[i]; - bsquared += second.data[i]*second.data[i]; - } - - return prod / std::sqrt(asquared * bsquared); - } -}; - -typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap; -typedef mt::mtree<Word2VecRec, euclidean_distance> MTree; - -template <de::RecordInterface R> -static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n, - double delete_prop, std::vector<R> &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i<n; i++) { - R rec; - if constexpr (std::is_same_v<R, Word2VecRec>) { - if (!next_vector_record(file, rec)) { - if (i == 0) { - return false; - } - - break; - } - } else { - if (!next_record(file, rec, binary)) { - if (i == 0) { - return false; - } - - break; - } - } - - vec.emplace_back(rec); - - if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { - to_delete.emplace_back(rec); - } - } - - return true; -} - - -template <typename DE, de::RecordInterface R> -static bool warmup(std::fstream &file, DE &extended_index, size_t count, - double delete_prop, std::vector<R> to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector<R> insert_vec; - std::vector<R> delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i<insert_vec.size(); i++) { - // process a delete if necessary - if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { - if constexpr (std::is_same_v<TreeMap, DE>) { - extended_index.erase_one(delete_vec[delete_idx++].key); - } - else if constexpr (std::is_same_v<MTree, DE>) { - extended_index.remove(delete_vec[delete_idx++]); - } else { - extended_index.erase(delete_vec[delete_idx++]); - } - } - - // insert the record; - if constexpr (std::is_same_v<MTree, DE>) { - extended_index.add(insert_vec[i]); - } else { - extended_index.insert(insert_vec[i]); - } - inserted++; - - if (progress) { - progress_update((double) inserted / (double) count, "warming up:"); - } - } - } - - return true; -} - - -static void reset_de_perf_metrics() { - - /* - * rejection counters are zeroed automatically by the - * sampling function itself. - */ - - RESET_IO_CNT(); -} diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h new file mode 100644 index 0000000..571c073 --- /dev/null +++ b/benchmarks/include/btree-util.h @@ -0,0 +1,27 @@ +#pragma once + +#include <cstdlib> +#include "psu-ds/BTree.h" + +struct btree_record { + int64_t key; + int64_t value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const int64_t &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree; + + diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/include/data-proc.h index 9876e84..f758ed4 100644 --- a/benchmarks/include/standalone_utility.h +++ b/benchmarks/include/data-proc.h @@ -1,18 +1,14 @@ #include <cstdlib> #include <cstdio> -#include <chrono> -#include <algorithm> -#include <numeric> -#include <memory> #include <iostream> #include <fstream> #include <sstream> -#include <unordered_set> -#include <set> #include <string> -#include <random> #include <gsl/gsl_rng.h> #include <cstring> +#include <vector> + +#include "psu-ds/BTree.h" typedef uint64_t key_type; typedef uint64_t value_type; @@ -40,6 +36,8 @@ struct btree_key_extract { } }; +typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree; + static key_type g_min_key = UINT64_MAX; static key_type g_max_key = 0; @@ -105,7 +103,7 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity } template <typename QP> -static std::vector<QP> read_range_queries(std::string fname, double selectivity) { +static std::vector<QP> read_range_queries(std::string &fname, double selectivity) { std::vector<QP> queries; FILE *qf = fopen(fname.c_str(), "r"); @@ -244,19 +242,3 @@ static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, siz td: return true; } - -/* - * helper routines for displaying progress bars to stderr - */ -static const char *g_prog_bar = "======================================================================"; -static const size_t g_prog_width = 50; - -static void progress_update(double percentage, std::string prompt) { - int val = (int) (percentage * 100); - int lpad = (int) (percentage * g_prog_width); - int rpad = (int) (g_prog_width - lpad); - fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, ""); - fflush(stderr); - - if (percentage >= 1) fprintf(stderr, "\n"); -} |