From f6a846a864112cd0df686138fae89c727e8771ab Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 23 Jul 2023 15:38:04 -0400 Subject: BTree-based baselines for IRS and Range queries --- benchmarks/btree_irs_bench.cpp | 90 ++++++++++++++++++++++++++++++++++++++ benchmarks/btree_rq_bench.cpp | 89 +++++++++++++++++++++++++++++++++++++ benchmarks/include/bench.h | 6 ++- benchmarks/include/bench_utility.h | 31 ++++++++++++- 4 files changed, 213 insertions(+), 3 deletions(-) create mode 100644 benchmarks/btree_irs_bench.cpp create mode 100644 benchmarks/btree_rq_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/btree_irs_bench.cpp b/benchmarks/btree_irs_bench.cpp new file mode 100644 index 0000000..c64df8a --- /dev/null +++ b/benchmarks/btree_irs_bench.cpp @@ -0,0 +1,90 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_sample_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=10) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector sample_set; + sample_set.reserve(queries[0].sample_size); + + for (int i=0; i(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + fprintf(stdout, "%ld\t", memory_usage); + + btree_sample_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/btree_rq_bench.cpp b/benchmarks/btree_rq_bench.cpp new file mode 100644 index 0000000..818e6f4 --- /dev/null +++ b/benchmarks/btree_rq_bench.cpp @@ -0,0 +1,89 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_rq_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; ikey <= queries[j].upper_bound) { + result_set.emplace_back(*ptr); + ptr++; + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + fprintf(stdout, "%ld\t", memory_usage); + + btree_rq_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h index e0f4c1d..28c1e97 100644 --- a/benchmarks/include/bench.h +++ b/benchmarks/include/bench.h @@ -49,7 +49,11 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else { + de_index.erase(delete_vec[delete_idx++]); + } applied_deletes++; } diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index a5f5e0b..cc926a3 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -14,6 +14,7 @@ #include "shard/PGM.h" #include "shard/TrieSpline.h" #include "shard/WIRS.h" +#include "ds/BTree.h" #include #include @@ -42,6 +43,28 @@ typedef de::DynamicExtension, de::PGMRangeQuery> Extended typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +typedef tlx::BTree TreeMap; + + static gsl_rng *g_rng; static std::set *g_to_delete; static bool g_osm_data; @@ -246,7 +269,7 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count, size_t inserted = 0; size_t delete_idx = 0; - + double last_percent = 0; while (inserted < count) { // Build vector of records to insert and potentially delete @@ -259,7 +282,11 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count, for (size_t i=0; i) { + extended_index.erase_one(delete_vec[delete_idx++].key); + } else { + extended_index.erase(delete_vec[delete_idx++]); + } } // insert the record; -- cgit v1.2.3