diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2023-06-13 13:56:17 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2023-06-13 13:56:17 -0400 |
| commit | 37304a11b8edfef8fc627e7a3a095e77e70b07b1 (patch) | |
| tree | 945d2eefd5b45cb8bed97bea089b0cfd80b47edf /benchmarks | |
| parent | 8463210dfac049cafd2334bc489eb59dff1b9d9d (diff) | |
| download | dynamic-extension-37304a11b8edfef8fc627e7a3a095e77e70b07b1.tar.gz | |
Benchmark refactoring
Diffstat (limited to 'benchmarks')
| -rw-r--r-- | benchmarks/include/bench.h | 67 | ||||
| -rw-r--r-- | benchmarks/include/bench_utility.h (renamed from benchmarks/bench.h) | 116 | ||||
| -rw-r--r-- | benchmarks/sampling_tput.cpp | 96 |
3 files changed, 149 insertions, 130 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h new file mode 100644 index 0000000..d82da48 --- /dev/null +++ b/benchmarks/include/bench.h @@ -0,0 +1,67 @@ +#include "bench_utility.h" + +template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000> +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, std::vector<R> &to_delete) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector<R> insert_vec; + std::vector<R> delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i<insert_vec.size(); i++) { + // process a delete if necessary + if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { + de_index.erase(delete_vec[delete_idx++]); + applied_deletes++; + } + + // insert the record; + de_index.insert(insert_vec[i]); + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "\n%ld\n", throughput); + reset_de_perf_metrics(); + + return continue_benchmark; +} + + diff --git a/benchmarks/bench.h b/benchmarks/include/bench_utility.h index c1457fc..78d6415 100644 --- a/benchmarks/bench.h +++ b/benchmarks/include/bench_utility.h @@ -2,6 +2,10 @@ #define H_BENCH #include "framework/DynamicExtension.h" #include "shard/WSS.h" +#include "shard/MemISAM.h" +#include "shard/PGM.h" +#include "shard/TrieSpline.h" +#include "shard/WIRS.h" #include <cstdlib> #include <cstdio> @@ -22,7 +26,11 @@ typedef uint32_t value_type; typedef uint64_t weight_type; typedef de::WeightedRecord<key_type, value_type, weight_type> WRec; +typedef de::Record<key_type, value_type> Rec; + typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS; +typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTS; +typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGM; static gsl_rng *g_rng; static std::set<WRec> *g_to_delete; @@ -74,12 +82,13 @@ static void delete_bench_env() delete g_to_delete; } -static bool next_record(std::fstream *file, WRec *record) +template <de::RecordInterface R> +static bool next_record(std::fstream &file, R &record) { if (g_reccnt >= g_max_record_cnt) return false; std::string line; - if (std::getline(*file, line, '\n')) { + if (std::getline(file, line, '\n')) { std::stringstream line_stream(line); std::string key_field; std::string value_field; @@ -89,13 +98,16 @@ static bool next_record(std::fstream *file, WRec *record) std::getline(line_stream, key_field, '\t'); std::getline(line_stream, weight_field, '\t'); - record->key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record->value = atol(value_field.c_str()); - record->weight = atof(weight_field.c_str()); + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if constexpr (de::WeightedRecordInterface<R>) { + record.weight = atof(weight_field.c_str()); + } - if (record->key < g_min_key) g_min_key = record->key; + if (record.key < g_min_key) g_min_key = record.key; - if (record->key > g_max_key) g_max_key = record->key; + if (record.key > g_max_key) g_max_key = record.key; g_reccnt++; @@ -105,11 +117,13 @@ static bool next_record(std::fstream *file, WRec *record) return false; } -static bool build_insert_vec(std::fstream *file, std::vector<WRec> &vec, size_t n) { +template <de::RecordInterface R> +static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n, + double delete_prop, std::vector<R> &to_delete) { vec.clear(); for (size_t i=0; i<n; i++) { - WRec rec; - if (!next_record(file, &rec)) { + R rec; + if (!next_record(file, rec)) { if (i == 0) { return false; } @@ -118,11 +132,33 @@ static bool build_insert_vec(std::fstream *file, std::vector<WRec> &vec, size_t } vec.emplace_back(rec); + + if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { + to_delete.emplace_back(rec); + } } return true; } +template <de::RecordInterface R> +static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} + /* * helper routines for displaying progress bars to stderr */ @@ -139,55 +175,49 @@ static void progress_update(double percentage, std::string prompt) { if (percentage >= 1) fprintf(stderr, "\n"); } -static bool warmup(std::fstream *file, ExtendedWSS *extended_wss, size_t count, double delete_prop, bool progress=true) -{ - size_t del_buf_size = 10000; - size_t delete_idx = del_buf_size; - - std::vector<WRec> delbuf; - std::set<WRec> deleted_keys; +template <typename DE, de::RecordInterface R> +static bool warmup(std::fstream &file, DE &extended_index, size_t count, + double delete_prop, std::vector<R> to_delete, bool progress=true) { + size_t batch = std::min(.1 * count, 25000.0); - de::wss_query_parms<WRec> parms; - parms.rng = g_rng; - parms.sample_size = del_buf_size; + std::vector<R> insert_vec; + std::vector<R> delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); size_t inserted = 0; + size_t delete_idx = 0; double last_percent = 0; - for (size_t i=0; i<count; i++) { - WRec rec; - if (!next_record(file, &rec)) { - return false; - } - - inserted++; - extended_wss->insert(rec); - - if (delete_prop > 0 && i > extended_wss->get_buffer_capacity() && delete_idx >= delbuf.size()) { - delbuf = extended_wss->query(&parms); + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); delete_idx = 0; - deleted_keys.clear(); } - if (delete_prop > 0 && i > extended_wss->get_buffer_capacity() && gsl_rng_uniform(g_rng) < delete_prop && delete_idx < delbuf.size()) { - auto rec = delbuf[delete_idx]; - delete_idx++; - - if (deleted_keys.find(rec) == deleted_keys.end()) { - extended_wss->erase(rec); - deleted_keys.insert(rec); + for (size_t i=0; i<insert_vec.size(); i++) { + // process a delete if necessary + if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { + extended_index.erase(delete_vec[delete_idx++]); } - } - if (progress && ((double) i / (double) count) - last_percent > .01) { - progress_update((double) i / (double) count, "warming up:"); - last_percent = (double) i / (double) count; + // insert the record; + extended_index.insert(insert_vec[i]); + inserted++; + + if (progress) { + progress_update((double) inserted / (double) count, "warming up:"); + } } } + /* if (progress) { progress_update(1, "warming up:"); } + */ return true; } diff --git a/benchmarks/sampling_tput.cpp b/benchmarks/sampling_tput.cpp index b25b15f..70ff0cb 100644 --- a/benchmarks/sampling_tput.cpp +++ b/benchmarks/sampling_tput.cpp @@ -1,86 +1,6 @@ -#include "bench.h" +#include "include/bench.h" -size_t g_insert_batch_size = 1000; - -static bool insert_benchmark(ExtendedWSS *tree, std::fstream *file, - size_t insert_cnt, double delete_prop) { - - size_t delete_cnt = insert_cnt * delete_prop; - size_t delete_batch_size = g_insert_batch_size * delete_prop * 15; - size_t delete_idx = delete_batch_size; - - std::vector<WRec> delbuf; - - std::set<WRec> deleted; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - std::vector<WRec> insert_vec; - insert_vec.reserve(g_insert_batch_size); - bool continue_benchmark = true; - - size_t total_time = 0; - - de::wss_query_parms<WRec> parms; - parms.rng = g_rng; - parms.sample_size = delete_batch_size; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, g_insert_batch_size); - - if (insert_vec.size() == 0) { - break; - } - - // if we've fully processed the delete vector, sample a new - // set of records to delete. - if (delete_idx >= delbuf.size()) { - delbuf = tree->query(&parms); - deleted.clear(); - delete_idx = 0; - } - - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - size_t local_inserted = 0; - size_t local_deleted = 0; - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i<insert_vec.size(); i++) { - // process a delete if necessary - if (applied_deletes < delete_cnt && delete_idx < delete_batch_size && gsl_rng_uniform(g_rng) < delete_prop) { - auto rec = delbuf[delete_idx]; - delete_idx++; - - if (deleted.find(rec) == deleted.end()) { - tree->erase(rec); - deleted.insert(rec); - local_deleted++; - } - } - - // insert the record; - tree->insert(insert_vec[i]); - local_inserted++; - } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - applied_deletes += local_deleted; - applied_inserts += local_inserted; - total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "\n%ld\n", throughput); - reset_de_perf_metrics(); - - return continue_benchmark; -} - - -static void sample_benchmark(ExtendedWSS *tree, size_t k, size_t trial_cnt) +static void sample_benchmark(ExtendedWSS &de_wss, size_t k, size_t trial_cnt) { char progbuf[25]; sprintf(progbuf, "sampling (%ld):", k); @@ -101,7 +21,7 @@ static void sample_benchmark(ExtendedWSS *tree, size_t k, size_t trial_cnt) progress_update((double) (i * batch_size) / (double) trial_cnt, progbuf); auto start = std::chrono::high_resolution_clock::now(); for (int j=0; j < batch_size; j++) { - auto res = tree->query(&parms); + auto res = de_wss.query(&parms); total_samples += res.size(); } auto stop = std::chrono::high_resolution_clock::now(); @@ -137,20 +57,22 @@ int main(int argc, char **argv) init_bench_env(record_count, true, use_osm); - auto sampling_lsm = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); + auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); std::fstream datafile; datafile.open(filename, std::ios::in); + std::vector<WRec> to_delete; + // warm up the tree with initial_insertions number of initially inserted // records size_t warmup_cnt = insert_batch * record_count; - warmup(&datafile, &sampling_lsm, warmup_cnt, delete_prop); + warmup<ExtendedWSS, WRec>(datafile, de_wss, warmup_cnt, delete_prop, to_delete); size_t insert_cnt = record_count - warmup_cnt; - insert_benchmark(&sampling_lsm, &datafile, insert_cnt, delete_prop); - sample_benchmark(&sampling_lsm, 1000, 10000); + insert_tput_bench<ExtendedWSS, WRec>(de_wss, datafile, insert_cnt, delete_prop, to_delete); + sample_benchmark(de_wss, 1000, 10000); delete_bench_env(); fflush(stdout); |