diff options
Diffstat (limited to 'benchmarks/include/standard_benchmarks.h')
| -rw-r--r-- | benchmarks/include/standard_benchmarks.h | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h new file mode 100644 index 0000000..b805c08 --- /dev/null +++ b/benchmarks/include/standard_benchmarks.h @@ -0,0 +1,380 @@ +/* + * benchmarks/include/bench.h + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include <cstdlib> +#include <fstream> +#include <gsl/gsl_rng.h> + +#include "framework/DynamicExtension.h" +#include "framework/interface/Query.h" +#include "query/irs.h" +#include "psu-util/progress.h" +#include "benchmark_types.h" +#include "psu-util/bentley-saxe.h" + +static size_t g_deleted_records = 0; +static double delete_proportion = 0.05; + +static volatile size_t total = 0; + +template<typename DE, typename QP, typename R> +static void run_queries(DE *extension, DE *ghost, std::vector<QP> &queries) { + for (size_t i=0; i<queries.size(); i++) { + std::vector<R> res = extension->query(&queries[i]); + std::vector<R> negres = ghost->query(&queries[i]); + auto result = res[0].first - negres[0].first; + total = result; + } +} + + +template<typename DE, typename QP, bool BSM=false> +static void run_queries(DE *extension, std::vector<QP> &queries) { + for (size_t i=0; i<queries.size(); i++) { + if constexpr (std::is_same_v<MTree, DE>) { + std::vector<Word2VecRec> result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (auto &r : result) { + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + #endif + } else if constexpr (std::is_same_v<MTree_alt, DE>) { + std::vector<ANNRec> result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + }else if constexpr (std::is_same_v<PGM, DE>) { + size_t tot = 0; + auto ptr = extension->find(queries[i].lower_bound); + while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { + tot++; + ++ptr; + } + } else { + auto res = extension->query(&queries[i]); + if constexpr (!BSM) { + auto result = res.get(); + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=result.size()-1; i>=0; i--) { + auto &r = result[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif + } else { + total = res.size(); + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif + } + } + } +} + +template <typename R> +static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) { + std::vector<int64_t> sample_set; + sample_set.reserve(queries[0].sample_size); + + for (size_t i=0; i<queries.size(); i++) { + btree->range_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng); + } +} + + +template<typename S, typename QP, typename Q> +static void run_static_queries(S *shard, std::vector<QP> &queries) { + for (size_t i=0; i<queries.size(); i++) { + auto q = &queries[i]; + + auto state = Q::get_query_state(shard, q); + + std::vector<void*> shards = {shard}; + std::vector<void*> states = {state}; + + Q::process_query_states(q, states, nullptr); + auto res = Q::query(shard, state, q); + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i].rec; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif + } +} + + +/* + * Insert records into a standard Bentley-Saxe extension. Deletes are not + * supported. + */ +template<typename DS, typename R, bool MDSP=false> +static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, + size_t start, size_t stop, std::vector<R> &records) { + + psudb::progress_update(0, "Insert Progress"); + for (size_t i=start; i<stop; i++) { + extension->insert(records[i]); + } + + psudb::progress_update(1, "Insert Progress"); +} + + +template<typename DS, typename R, bool MDSP=false> +static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, + psudb::bsm::BentleySaxe<R, DS, MDSP> *ghost, + size_t start, size_t stop, std::vector<R> &records, + std::vector<size_t> &to_delete, size_t &delete_idx, + gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; i<stop; i++) { + + extension->insert(records[i]); + + if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + ghost->insert(records[to_delete[delete_idx]]); + delete_idx++; + g_deleted_records++; + } + + } + +} + + +template<typename DE, typename R> +static void insert_records(DE *structure, size_t start, size_t stop, + std::vector<R> &records, std::vector<size_t> &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; i<stop; i++) { + + if constexpr (std::is_same_v<BenchBTree, DE>) { + structure->insert(records[i]); + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { + structure->add(records[i]); + } else if constexpr (std::is_same_v<PGM, DE>) { + structure->insert_or_assign(records[i].key, records[i].value); + } else { + while (!structure->insert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } + } + + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { + + if constexpr (std::is_same_v<BenchBTree, DE>) { + structure->erase_one(records[to_delete[delete_idx]].key); + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { + structure->remove(records[to_delete[delete_idx]]); + } else if constexpr (std::is_same_v<PGM, DE>) { + structure->erase(records[to_delete[delete_idx]].key); + } else { + while (!structure->erase(records[to_delete[delete_idx]])) { + usleep(1); + } + } + delete_idx++; + g_deleted_records++; + } + } + + psudb::progress_update(1, "Insert Progress"); +} + +template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000> +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, gsl_rng *rng, std::vector<R> &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector<R> insert_vec; + std::vector<R> delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i<insert_vec.size(); i++) { + // process a delete if necessary + if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) { + if constexpr (std::is_same_v<BenchBTree, DE>) { + de_index.erase_one(delete_vec[delete_idx++].key); + #ifdef _GNU_SOURCE + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { + de_index.remove(delete_vec[delete_idx++]); + #endif + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + #ifdef _GNU_SOURCE + if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + #else + de_index.insert(insert_vec[i]); + #endif + + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + psudb::progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + + return continue_benchmark; +} + +template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true> +static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i<trial_cnt; i++) { + if constexpr (PROGRESS) { + psudb::progress_update((double) (i) / (double) trial_cnt, progbuf); + } + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j<queries.size(); j++) { + auto res = de_index.query(&queries[j]); + + total_results += res.size(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true> +static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i<trial_cnt; i++) { + if constexpr (PROGRESS) { + psudb::progress_update((double) (i) / (double) trial_cnt, progbuf); + } + + std::vector<void *> states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j<queries.size(); j++) { + states[0] = Q::get_query_state(shard, &queries[j]); + Q::process_query_states(&queries[j], states, nullptr); + auto res = Q::query(shard, states[0], &queries[j]); + total_results += res.size(); + Q::delete_query_state(states[0]); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} |