From fc8b4c14bd2814447b5d3180c4ecf3742196c6bf Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 23 Jul 2023 14:17:38 -0400 Subject: Benchmarking updates --- benchmarks/include/bench.h | 6 ++-- benchmarks/include/bench_utility.h | 42 ++++++++++++++++++++----- benchmarks/isam_irs_bench.cpp | 64 ++++++++++++++++++++++++++++++++++++++ benchmarks/isam_rq_bench.cpp | 59 +++++++++++++++++++++++++++++++++++ benchmarks/pgm_rq_bench.cpp | 6 ++-- 5 files changed, 162 insertions(+), 15 deletions(-) create mode 100644 benchmarks/isam_irs_bench.cpp create mode 100644 benchmarks/isam_rq_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h index 3e1c6b2..e0f4c1d 100644 --- a/benchmarks/include/bench.h +++ b/benchmarks/include/bench.h @@ -12,7 +12,7 @@ template static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, - double delete_prop, std::vector &to_delete) { + double delete_prop, std::vector &to_delete, bool binary=false) { size_t delete_cnt = insert_cnt * delete_prop; @@ -31,7 +31,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn size_t total_time = 0; while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete); + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); if (applied_deletes < delete_cnt) { build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); delete_idx = 0; @@ -104,7 +104,6 @@ static bool query_latency_bench(DE &de_index, std::vector queries, size_t tr size_t query_latency = total_time / (trial_cnt * queries.size()); fprintf(stdout, "%ld\t", query_latency); - fprintf(stderr, "%ld\n", total_results); fflush(stdout); return true; @@ -146,7 +145,6 @@ static bool static_latency_bench(Shard *shard, std::vector queries, size_t t size_t query_latency = total_time / (trial_cnt * queries.size()); fprintf(stdout, "%ld\t", query_latency); - fprintf(stderr, "%ld\n", total_results); fflush(stdout); return true; diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index 2d31cae..a5f5e0b 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -30,7 +30,7 @@ #include typedef uint64_t key_type; -typedef uint32_t value_type; +typedef uint64_t value_type; typedef uint64_t weight_type; typedef de::WeightedRecord WRec; @@ -39,6 +39,8 @@ typedef de::Record Rec; typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; +typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; +typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; static gsl_rng *g_rng; static std::set *g_to_delete; @@ -115,11 +117,36 @@ static std::vector read_range_queries(std::string fname, double selectivity) return queries; } -template -static bool next_record(std::fstream &file, R &record) +template +static bool next_record(std::fstream &file, R &record, bool binary=false) { + static value_type value = 1; if (g_reccnt >= g_max_record_cnt) return false; + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if constexpr (de::WeightedRecordInterface) { + decltype(R::weight) weight; + file.read((char*) &weight, sizeof(weight)); + record.weight = weight; + } + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + std::string line; if (std::getline(file, line, '\n')) { std::stringstream line_stream(line); @@ -139,7 +166,6 @@ static bool next_record(std::fstream &file, R &record) } if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; g_reccnt++; @@ -152,11 +178,11 @@ static bool next_record(std::fstream &file, R &record) template static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete) { + double delete_prop, std::vector &to_delete, bool binary=false) { vec.clear(); for (size_t i=0; i static bool warmup(std::fstream &file, DE &extended_index, size_t count, - double delete_prop, std::vector to_delete, bool progress=true) { + double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { size_t batch = std::min(.1 * count, 25000.0); std::vector insert_vec; @@ -224,7 +250,7 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count, double last_percent = 0; while (inserted < count) { // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete); + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); if (inserted > batch) { build_delete_vec(to_delete, delete_vec, batch*delete_prop); delete_idx = 0; diff --git a/benchmarks/isam_irs_bench.cpp b/benchmarks/isam_irs_bench.cpp new file mode 100644 index 0000000..96525f0 --- /dev/null +++ b/benchmarks/isam_irs_bench.cpp @@ -0,0 +1,64 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_irs, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_irs.get_memory_usage()); + query_latency_bench>(de_irs, queries); + fprintf(stdout, "\n"); + + auto ts = de_irs.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::irs_query_parms, de::IRSQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/isam_rq_bench.cpp b/benchmarks/isam_rq_bench.cpp new file mode 100644 index 0000000..bb5626e --- /dev/null +++ b/benchmarks/isam_rq_bench.cpp @@ -0,0 +1,59 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage()); + query_latency_bench>(de_isam_rq, queries); + fprintf(stdout, "\n"); + + auto ts = de_isam_rq.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::ISAMRangeQueryParms, de::ISAMRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/pgm_rq_bench.cpp b/benchmarks/pgm_rq_bench.cpp index aac6e07..3acc34e 100644 --- a/benchmarks/pgm_rq_bench.cpp +++ b/benchmarks/pgm_rq_bench.cpp @@ -32,18 +32,18 @@ int main(int argc, char **argv) auto queries = read_range_queries>(query_file, .0001); std::fstream datafile; - datafile.open(filename, std::ios::in); + datafile.open(filename, std::ios::in | std::ios::binary); std::vector to_delete; // warm up the tree with initial_insertions number of initially inserted // records size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete); + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); size_t insert_cnt = record_count - warmup_cnt; - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete); + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); fprintf(stdout, "%ld\t", de.get_memory_usage()); query_latency_bench>(de, queries, 1); -- cgit v1.2.3