From 3680667a07d924043c5da7bba8a11883bb918b97 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 15 Nov 2023 15:18:56 -0500 Subject: Insertion throughput benchmark --- benchmarks/insertion_tput.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 benchmarks/insertion_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp new file mode 100644 index 0000000..ad53443 --- /dev/null +++ b/benchmarks/insertion_tput.cpp @@ -0,0 +1,49 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; + + + +int main(int argc, char **argv) { + + auto extension = Ext(1000, 2, 1); + + size_t n = 1000000; + size_t per_trial = 100; + + std::vector latencies; + + TIMER_INIT(); + for (int64_t i=0; i Date: Thu, 16 Nov 2023 11:59:52 -0500 Subject: insert_tput: minor adjustments --- benchmarks/insertion_tput.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp index ad53443..5959173 100644 --- a/benchmarks/insertion_tput.cpp +++ b/benchmarks/insertion_tput.cpp @@ -21,29 +21,24 @@ typedef de::DynamicExtension Ext; int main(int argc, char **argv) { - auto extension = Ext(1000, 2, 1); + auto extension = new Ext(10000, 2, 1); - size_t n = 1000000; - size_t per_trial = 100; - - std::vector latencies; + size_t n = 1000000000; + size_t per_trial = 1000; TIMER_INIT(); for (int64_t i=0; iinsert(r); } TIMER_STOP(); + auto insert_lat = TIMER_RESULT(); - auto res = TIMER_RESULT(); - - latencies.push_back(TIMER_RESULT()); + fprintf(stdout, "%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, per_trial); } - for (size_t i=0; i Date: Wed, 13 Dec 2023 12:40:16 -0500 Subject: Query throughput benchmark --- benchmarks/insert_query_tput.cpp | 79 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 benchmarks/insert_query_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp new file mode 100644 index 0000000..fe85e68 --- /dev/null +++ b/benchmarks/insert_query_tput.cpp @@ -0,0 +1,79 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; + +std::atomic inserts_done = false; + +void insert_thread(Ext *extension, size_t n, size_t k) { + TIMER_INIT(); + for (int64_t i=0; iinsert(r); + } + TIMER_STOP(); + auto insert_lat = TIMER_RESULT(); + + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, k); + } + + inserts_done.store(true); +} + +void query_thread(Ext *extension, double selectivity, size_t k) { + TIMER_INIT(); + + while (!inserts_done.load()) { + size_t reccnt = extension->get_record_count(); + size_t range = reccnt * selectivity; + + auto q = new de::rq::Parms(); + + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + } +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + auto extension = new Ext(10000, 2, 1, 0, 2); + size_t n = 10000000; + size_t per_trial = 1000; + double selectivity = .001; + + std::thread i_thrd(insert_thread, extension, n, per_trial); + std::thread q_thrd(query_thread, extension, selectivity, 1); + + q_thrd.join(); + i_thrd.join(); + fflush(stderr); +} + -- cgit v1.2.3 From 0a9e79416df03a9e0a3d2cf171cf90028a644d6d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 15 Jan 2024 17:21:11 -0500 Subject: Benchmarking programs --- benchmarks/alex_rq_bench.cpp | 205 ------------------------- benchmarks/alias_wss_bench.cpp | 57 ------- benchmarks/btree_irs_bench.cpp | 91 ----------- benchmarks/btree_rq_bench.cpp | 90 ----------- benchmarks/insert_query_tput.cpp | 7 +- benchmarks/insertion_tput.cpp | 6 +- benchmarks/isam_irs_bench.cpp | 64 -------- benchmarks/isam_rq_bench.cpp | 59 -------- benchmarks/mtree_knn_bench.cpp | 83 ---------- benchmarks/old-bench/alex_rq_bench.cpp | 205 +++++++++++++++++++++++++ benchmarks/old-bench/alias_wss_bench.cpp | 57 +++++++ benchmarks/old-bench/btree_irs_bench.cpp | 91 +++++++++++ benchmarks/old-bench/btree_rq_bench.cpp | 90 +++++++++++ benchmarks/old-bench/isam_irs_bench.cpp | 64 ++++++++ benchmarks/old-bench/isam_rq_bench.cpp | 59 ++++++++ benchmarks/old-bench/mtree_knn_bench.cpp | 83 ++++++++++ benchmarks/old-bench/pgm_pl_bench.cpp | 67 +++++++++ benchmarks/old-bench/pgm_rq_bench.cpp | 67 +++++++++ benchmarks/old-bench/test.cpp | 7 + benchmarks/old-bench/triespline_rq_bench.cpp | 66 ++++++++ benchmarks/old-bench/upgm_pl_bench.cpp | 212 ++++++++++++++++++++++++++ benchmarks/old-bench/upgm_rq_bench.cpp | 217 +++++++++++++++++++++++++++ benchmarks/old-bench/vptree_knn_bench.cpp | 58 +++++++ benchmarks/pgm_pl_bench.cpp | 67 --------- benchmarks/pgm_rq_bench.cpp | 67 --------- benchmarks/reconstruction_interference.cpp | 110 ++++++++++++++ benchmarks/test.cpp | 7 - benchmarks/triespline_rq_bench.cpp | 66 -------- benchmarks/upgm_pl_bench.cpp | 212 -------------------------- benchmarks/upgm_rq_bench.cpp | 217 --------------------------- benchmarks/vptree_knn_bench.cpp | 58 ------- 31 files changed, 1462 insertions(+), 1347 deletions(-) delete mode 100644 benchmarks/alex_rq_bench.cpp delete mode 100644 benchmarks/alias_wss_bench.cpp delete mode 100644 benchmarks/btree_irs_bench.cpp delete mode 100644 benchmarks/btree_rq_bench.cpp delete mode 100644 benchmarks/isam_irs_bench.cpp delete mode 100644 benchmarks/isam_rq_bench.cpp delete mode 100644 benchmarks/mtree_knn_bench.cpp create mode 100644 benchmarks/old-bench/alex_rq_bench.cpp create mode 100644 benchmarks/old-bench/alias_wss_bench.cpp create mode 100644 benchmarks/old-bench/btree_irs_bench.cpp create mode 100644 benchmarks/old-bench/btree_rq_bench.cpp create mode 100644 benchmarks/old-bench/isam_irs_bench.cpp create mode 100644 benchmarks/old-bench/isam_rq_bench.cpp create mode 100644 benchmarks/old-bench/mtree_knn_bench.cpp create mode 100644 benchmarks/old-bench/pgm_pl_bench.cpp create mode 100644 benchmarks/old-bench/pgm_rq_bench.cpp create mode 100644 benchmarks/old-bench/test.cpp create mode 100644 benchmarks/old-bench/triespline_rq_bench.cpp create mode 100644 benchmarks/old-bench/upgm_pl_bench.cpp create mode 100644 benchmarks/old-bench/upgm_rq_bench.cpp create mode 100644 benchmarks/old-bench/vptree_knn_bench.cpp delete mode 100644 benchmarks/pgm_pl_bench.cpp delete mode 100644 benchmarks/pgm_rq_bench.cpp create mode 100644 benchmarks/reconstruction_interference.cpp delete mode 100644 benchmarks/test.cpp delete mode 100644 benchmarks/triespline_rq_bench.cpp delete mode 100644 benchmarks/upgm_pl_bench.cpp delete mode 100644 benchmarks/upgm_rq_bench.cpp delete mode 100644 benchmarks/vptree_knn_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/alex_rq_bench.cpp b/benchmarks/alex_rq_bench.cpp deleted file mode 100644 index f75afa6..0000000 --- a/benchmarks/alex_rq_bench.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#include "alex.h" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef alex::Alex Alex; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::pair *insert_vec = new std::pair[count]; - Alex *alex = new Alex(); - - size_t cnt = 0; - record rec; - while (cnt < count && next_record(file, rec)) { - insert_vec[cnt] = {rec.key, rec.value}; - cnt++; - } - - std::sort(insert_vec, insert_vec + count); - - alex->bulk_load(insert_vec, count); - delete[] insert_vec; - - return alex; -} - - -static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: alex_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.8; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries(qfilename, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); - - fprintf(stderr, "Size: %ld\n", alex->size()); - size_t insert_cnt = record_count - warmup_cnt; - - alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = alex->model_size() + alex->data_size(); - - fprintf(stderr, "Size: %ld\n", alex->size()); - fprintf(stdout, "%ld\t", memory_usage); - - alex_rq_bench(*alex, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - delete alex; - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/alias_wss_bench.cpp b/benchmarks/alias_wss_bench.cpp deleted file mode 100644 index a3a43f2..0000000 --- a/benchmarks/alias_wss_bench.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * benchmarks/alias_wss_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 4) { - fprintf(stderr, "Usage: sampling_tput [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - bool use_osm = (argc == 5) ? atoi(argv[4]) : 0; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - - auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_wss, warmup_cnt, delete_prop, to_delete); - - size_t insert_cnt = record_count - warmup_cnt; - - std::vector> queries(1); - queries[0].rng = g_rng; - queries[0].sample_size = 1000; - - insert_tput_bench(de_wss, datafile, insert_cnt, delete_prop, to_delete); - query_latency_bench>(de_wss, queries, 1000); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/btree_irs_bench.cpp b/benchmarks/btree_irs_bench.cpp deleted file mode 100644 index 862fc6b..0000000 --- a/benchmarks/btree_irs_bench.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "include/bench.h" -#include "ds/BTree.h" - -static void btree_sample_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=10) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector sample_set; - sample_set.reserve(queries[0].sample_size); - - for (int i=0; i(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - - - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: btree_irs_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .001); - - for (auto &q: queries) { - q.rng = g_rng; - q.sample_size = 1000; - } - - auto btree = TreeMap(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); - memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); - fprintf(stdout, "%ld\t", memory_usage); - - btree_sample_bench(btree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/btree_rq_bench.cpp b/benchmarks/btree_rq_bench.cpp deleted file mode 100644 index d92b45d..0000000 --- a/benchmarks/btree_rq_bench.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "include/bench.h" -#include "ds/BTree.h" - -static void btree_rq_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; ikey <= queries[j].upper_bound) { - result_set.emplace_back(*ptr); - ptr++; - } - result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - - - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: btree_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .0001); - - auto btree = TreeMap(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); - memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); - fprintf(stdout, "%ld\t", memory_usage); - - btree_rq_bench(btree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index fe85e68..09179b0 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -27,7 +27,9 @@ void insert_thread(Ext *extension, size_t n, size_t k) { TIMER_START(); for (int64_t j=0; jinsert(r); + while (!extension->insert(r)) { + _mm_pause(); + } } TIMER_STOP(); auto insert_lat = TIMER_RESULT(); @@ -58,13 +60,14 @@ void query_thread(Ext *extension, double selectivity, size_t k) { TIMER_STOP(); auto query_lat = TIMER_RESULT(); fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; } } int main(int argc, char **argv) { /* the closeout routine takes _forever_ ... so we'll just leak the memory */ - auto extension = new Ext(10000, 2, 1, 0, 2); + auto extension = new Ext(1000, 10000, 2); size_t n = 10000000; size_t per_trial = 1000; double selectivity = .001; diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp index 5959173..5498f93 100644 --- a/benchmarks/insertion_tput.cpp +++ b/benchmarks/insertion_tput.cpp @@ -21,7 +21,7 @@ typedef de::DynamicExtension Ext; int main(int argc, char **argv) { - auto extension = new Ext(10000, 2, 1); + auto extension = new Ext(1000, 10000, 2); size_t n = 1000000000; size_t per_trial = 1000; @@ -31,7 +31,9 @@ int main(int argc, char **argv) { TIMER_START(); for (int64_t j=0; jinsert(r); + while (!extension->insert(r)) { + _mm_pause(); + } } TIMER_STOP(); auto insert_lat = TIMER_RESULT(); diff --git a/benchmarks/isam_irs_bench.cpp b/benchmarks/isam_irs_bench.cpp deleted file mode 100644 index 96525f0..0000000 --- a/benchmarks/isam_irs_bench.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: isam_irs_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .001); - - for (auto &q: queries) { - q.rng = g_rng; - q.sample_size = 1000; - } - - auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_irs, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_irs.get_memory_usage()); - query_latency_bench>(de_irs, queries); - fprintf(stdout, "\n"); - - auto ts = de_irs.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::irs_query_parms, de::IRSQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/isam_rq_bench.cpp b/benchmarks/isam_rq_bench.cpp deleted file mode 100644 index bb5626e..0000000 --- a/benchmarks/isam_rq_bench.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: isam_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .0001); - - auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage()); - query_latency_bench>(de_isam_rq, queries); - fprintf(stdout, "\n"); - - auto ts = de_isam_rq.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::ISAMRangeQueryParms, de::ISAMRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/mtree_knn_bench.cpp deleted file mode 100644 index 9d4cc57..0000000 --- a/benchmarks/mtree_knn_bench.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "include/bench.h" -#include "mtree.h" - -static void mtree_knn_bench(MTree &tree, std::vector> queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i results; - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; jdata); - itr++; - } - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: mtree_knn_bench [k]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - size_t k = (argc == 6) ? atol(argv[5]) : 10; - - init_bench_env(record_count, true); - auto queries = read_knn_queries>(qfilename, k); - - auto mtree = MTree(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = 0.1 * record_count; - warmup(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); - // fprintf(stdout, "%ld\t", mtree.get_memory_usage()); - - mtree_knn_bench(mtree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/old-bench/alex_rq_bench.cpp b/benchmarks/old-bench/alex_rq_bench.cpp new file mode 100644 index 0000000..f75afa6 --- /dev/null +++ b/benchmarks/old-bench/alex_rq_bench.cpp @@ -0,0 +1,205 @@ +#include "alex.h" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::pair *insert_vec = new std::pair[count]; + Alex *alex = new Alex(); + + size_t cnt = 0; + record rec; + while (cnt < count && next_record(file, rec)) { + insert_vec[cnt] = {rec.key, rec.value}; + cnt++; + } + + std::sort(insert_vec, insert_vec + count); + + alex->bulk_load(insert_vec, count); + delete[] insert_vec; + + return alex; +} + + +static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: alex_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.8; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries(qfilename, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + + fprintf(stderr, "Size: %ld\n", alex->size()); + size_t insert_cnt = record_count - warmup_cnt; + + alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = alex->model_size() + alex->data_size(); + + fprintf(stderr, "Size: %ld\n", alex->size()); + fprintf(stdout, "%ld\t", memory_usage); + + alex_rq_bench(*alex, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + delete alex; + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/alias_wss_bench.cpp b/benchmarks/old-bench/alias_wss_bench.cpp new file mode 100644 index 0000000..a3a43f2 --- /dev/null +++ b/benchmarks/old-bench/alias_wss_bench.cpp @@ -0,0 +1,57 @@ +/* + * benchmarks/alias_wss_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 4) { + fprintf(stderr, "Usage: sampling_tput [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + bool use_osm = (argc == 5) ? atoi(argv[4]) : 0; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + + auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_wss, warmup_cnt, delete_prop, to_delete); + + size_t insert_cnt = record_count - warmup_cnt; + + std::vector> queries(1); + queries[0].rng = g_rng; + queries[0].sample_size = 1000; + + insert_tput_bench(de_wss, datafile, insert_cnt, delete_prop, to_delete); + query_latency_bench>(de_wss, queries, 1000); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/btree_irs_bench.cpp b/benchmarks/old-bench/btree_irs_bench.cpp new file mode 100644 index 0000000..862fc6b --- /dev/null +++ b/benchmarks/old-bench/btree_irs_bench.cpp @@ -0,0 +1,91 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_sample_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=10) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector sample_set; + sample_set.reserve(queries[0].sample_size); + + for (int i=0; i(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); + fprintf(stdout, "%ld\t", memory_usage); + + btree_sample_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/btree_rq_bench.cpp b/benchmarks/old-bench/btree_rq_bench.cpp new file mode 100644 index 0000000..d92b45d --- /dev/null +++ b/benchmarks/old-bench/btree_rq_bench.cpp @@ -0,0 +1,90 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_rq_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; ikey <= queries[j].upper_bound) { + result_set.emplace_back(*ptr); + ptr++; + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); + fprintf(stdout, "%ld\t", memory_usage); + + btree_rq_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/isam_irs_bench.cpp b/benchmarks/old-bench/isam_irs_bench.cpp new file mode 100644 index 0000000..96525f0 --- /dev/null +++ b/benchmarks/old-bench/isam_irs_bench.cpp @@ -0,0 +1,64 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_irs, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_irs.get_memory_usage()); + query_latency_bench>(de_irs, queries); + fprintf(stdout, "\n"); + + auto ts = de_irs.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::irs_query_parms, de::IRSQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/isam_rq_bench.cpp b/benchmarks/old-bench/isam_rq_bench.cpp new file mode 100644 index 0000000..bb5626e --- /dev/null +++ b/benchmarks/old-bench/isam_rq_bench.cpp @@ -0,0 +1,59 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage()); + query_latency_bench>(de_isam_rq, queries); + fprintf(stdout, "\n"); + + auto ts = de_isam_rq.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::ISAMRangeQueryParms, de::ISAMRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/mtree_knn_bench.cpp b/benchmarks/old-bench/mtree_knn_bench.cpp new file mode 100644 index 0000000..9d4cc57 --- /dev/null +++ b/benchmarks/old-bench/mtree_knn_bench.cpp @@ -0,0 +1,83 @@ +#include "include/bench.h" +#include "mtree.h" + +static void mtree_knn_bench(MTree &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i results; + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; jdata); + itr++; + } + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: mtree_knn_bench [k]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + size_t k = (argc == 6) ? atol(argv[5]) : 10; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, k); + + auto mtree = MTree(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = 0.1 * record_count; + warmup(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); + // fprintf(stdout, "%ld\t", mtree.get_memory_usage()); + + mtree_knn_bench(mtree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/pgm_pl_bench.cpp b/benchmarks/old-bench/pgm_pl_bench.cpp new file mode 100644 index 0000000..f798861 --- /dev/null +++ b/benchmarks/old-bench/pgm_pl_bench.cpp @@ -0,0 +1,67 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: pgm_pl_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 1000; + size_t scale_factor = 6; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedPGM_PL(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_lookup_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::PGMPointLookupParms, de::PGMPointLookup>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/pgm_rq_bench.cpp b/benchmarks/old-bench/pgm_rq_bench.cpp new file mode 100644 index 0000000..e25d29f --- /dev/null +++ b/benchmarks/old-bench/pgm_rq_bench.cpp @@ -0,0 +1,67 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: pgm_rq_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 8; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.5; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedPGMRQ(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_range_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::pgm_range_query_parms, de::PGMRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/test.cpp b/benchmarks/old-bench/test.cpp new file mode 100644 index 0000000..75bffe3 --- /dev/null +++ b/benchmarks/old-bench/test.cpp @@ -0,0 +1,7 @@ +#include "alex.h" + + +int main(int argc, char **argv) { + alex::Alex test; + +} diff --git a/benchmarks/old-bench/triespline_rq_bench.cpp b/benchmarks/old-bench/triespline_rq_bench.cpp new file mode 100644 index 0000000..967c3b0 --- /dev/null +++ b/benchmarks/old-bench/triespline_rq_bench.cpp @@ -0,0 +1,66 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: triespline_rq_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 8; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.5; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedTSRQ(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_range_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::ts_range_query_parms, de::TrieSplineRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/upgm_pl_bench.cpp b/benchmarks/old-bench/upgm_pl_bench.cpp new file mode 100644 index 0000000..e0445b2 --- /dev/null +++ b/benchmarks/old-bench/upgm_pl_bench.cpp @@ -0,0 +1,212 @@ +#include "pgm/pgm_index_dynamic.hpp" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef pgm::DynamicPGMIndex> PGM; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void pgm_pl_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; ifirst == queries[j].lower_bound) { + result_set.push_back({ptr->first, ptr->second}); + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: upgm_pl_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + double insert_batch = 0.1; + + init_bench_env(record_count, true); + auto queries = read_range_queries(qfilename, .0001); + + std::vector> data; + PGM pgm(data.begin(), data.end()); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = pgm.size_in_bytes(); + fprintf(stdout, "%ld\t", memory_usage); + + pgm_pl_bench(pgm, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/upgm_rq_bench.cpp b/benchmarks/old-bench/upgm_rq_bench.cpp new file mode 100644 index 0000000..940a9e6 --- /dev/null +++ b/benchmarks/old-bench/upgm_rq_bench.cpp @@ -0,0 +1,217 @@ +#include "pgm/pgm_index_dynamic.hpp" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef pgm::DynamicPGMIndex> PGM; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void pgm_rq_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + //std::vector result_set; + size_t tot = 0; + + for (int i=0; ifirst <= queries[j].upper_bound) { + ++tot; + //result_set.push_back({ptr->first, ptr->second}); + ++ptr; + } + assert(tot > 0); + //result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: upgm_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + double insert_batch = 0.5; + + init_bench_env(record_count, true); + auto queries = read_range_queries(qfilename, .0001); + + std::vector> data; + PGM pgm(data.begin(), data.end()); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = pgm.size_in_bytes(); + fprintf(stdout, "%ld\t", memory_usage); + + pgm_rq_bench(pgm, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/vptree_knn_bench.cpp b/benchmarks/old-bench/vptree_knn_bench.cpp new file mode 100644 index 0000000..d8247e4 --- /dev/null +++ b/benchmarks/old-bench/vptree_knn_bench.cpp @@ -0,0 +1,58 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: vptree_knn_bench [k]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + size_t k = (argc == 6) ? atol(argv[5]) : 10; + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, k); + + auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = 0.1 * record_count; + warmup(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); + + query_latency_bench>(de_vp_knn, queries); + fprintf(stdout, "\n"); + + auto ts = de_vp_knn.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Word2VecRec, de::KNNQueryParms, de::KNNQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/pgm_pl_bench.cpp b/benchmarks/pgm_pl_bench.cpp deleted file mode 100644 index f798861..0000000 --- a/benchmarks/pgm_pl_bench.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: pgm_pl_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 1000; - size_t scale_factor = 6; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedPGM_PL(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_lookup_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::PGMPointLookupParms, de::PGMPointLookup>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/pgm_rq_bench.cpp b/benchmarks/pgm_rq_bench.cpp deleted file mode 100644 index e25d29f..0000000 --- a/benchmarks/pgm_rq_bench.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: pgm_rq_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 8; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.5; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedPGMRQ(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_range_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::pgm_range_query_parms, de::PGMRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp new file mode 100644 index 0000000..a843c71 --- /dev/null +++ b/benchmarks/reconstruction_interference.cpp @@ -0,0 +1,110 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; + +void query_thread(Ext *extension, double selectivity, size_t k) { + TIMER_INIT(); + + size_t reccnt = extension->get_record_count(); + size_t range = reccnt * selectivity; + + auto q = new de::rq::Parms(); + + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; +} + +Ext *build_structure(size_t n) { + auto extension = new Ext(1000, 10000, 2); + + size_t i=0; + Rec r; + do { + r.key = rand() % n; + r.value = i; + if (extension->insert(r)) { + i++; + } else { + _mm_pause(); + } + } while (i < n); + + extension->await_next_epoch(); + return extension; +} + +void query_benchmark(double selectivity, size_t k, Ext *extension) { + TIMER_INIT(); + + size_t query_thrd_cnt = 4; + std::vector thrds(query_thrd_cnt); + + TIMER_START(); + for (size_t i=0; iget_record_count(), query_lat, k, query_thrd_cnt); +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + size_t n = 10000000; + + size_t per_trial = 1000; + double selectivity = .001; + + /* build initial structure */ + auto extension = build_structure(n); + + /* benchmark queries w/o any interference from reconstructions */ + query_benchmark(selectivity, per_trial, extension); + + fprintf(stderr, "Running interference test...\n"); + + /* trigger a worst-case reconstruction and benchmark the queries */ + std::thread q_thrd(query_benchmark, selectivity, per_trial, extension); + auto s = extension->create_static_structure(); + fprintf(stderr, "Construction complete\n"); + q_thrd.join(); + + delete extension; + delete s; + + fflush(stderr); +} + diff --git a/benchmarks/test.cpp b/benchmarks/test.cpp deleted file mode 100644 index 75bffe3..0000000 --- a/benchmarks/test.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "alex.h" - - -int main(int argc, char **argv) { - alex::Alex test; - -} diff --git a/benchmarks/triespline_rq_bench.cpp b/benchmarks/triespline_rq_bench.cpp deleted file mode 100644 index 967c3b0..0000000 --- a/benchmarks/triespline_rq_bench.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: triespline_rq_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 8; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.5; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedTSRQ(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_range_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::ts_range_query_parms, de::TrieSplineRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/upgm_pl_bench.cpp b/benchmarks/upgm_pl_bench.cpp deleted file mode 100644 index e0445b2..0000000 --- a/benchmarks/upgm_pl_bench.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "pgm/pgm_index_dynamic.hpp" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef pgm::DynamicPGMIndex> PGM; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void pgm_pl_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; ifirst == queries[j].lower_bound) { - result_set.push_back({ptr->first, ptr->second}); - } - result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: upgm_pl_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - double insert_batch = 0.1; - - init_bench_env(record_count, true); - auto queries = read_range_queries(qfilename, .0001); - - std::vector> data; - PGM pgm(data.begin(), data.end()); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = pgm.size_in_bytes(); - fprintf(stdout, "%ld\t", memory_usage); - - pgm_pl_bench(pgm, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/upgm_rq_bench.cpp b/benchmarks/upgm_rq_bench.cpp deleted file mode 100644 index 940a9e6..0000000 --- a/benchmarks/upgm_rq_bench.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include "pgm/pgm_index_dynamic.hpp" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef pgm::DynamicPGMIndex> PGM; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void pgm_rq_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - //std::vector result_set; - size_t tot = 0; - - for (int i=0; ifirst <= queries[j].upper_bound) { - ++tot; - //result_set.push_back({ptr->first, ptr->second}); - ++ptr; - } - assert(tot > 0); - //result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: upgm_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - double insert_batch = 0.5; - - init_bench_env(record_count, true); - auto queries = read_range_queries(qfilename, .0001); - - std::vector> data; - PGM pgm(data.begin(), data.end()); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = pgm.size_in_bytes(); - fprintf(stdout, "%ld\t", memory_usage); - - pgm_rq_bench(pgm, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp deleted file mode 100644 index d8247e4..0000000 --- a/benchmarks/vptree_knn_bench.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: vptree_knn_bench [k]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - size_t k = (argc == 6) ? atol(argv[5]) : 10; - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - - init_bench_env(record_count, true); - auto queries = read_knn_queries>(qfilename, k); - - auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = 0.1 * record_count; - warmup(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); - - query_latency_bench>(de_vp_knn, queries); - fprintf(stdout, "\n"); - - auto ts = de_vp_knn.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Word2VecRec, de::KNNQueryParms, de::KNNQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} -- cgit v1.2.3 From 38693c342558628c75e0ab0d23c32a95a499ed8b Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Jan 2024 15:58:04 -0500 Subject: Initial rough-out of internal statistics tracker Need to figure out the best way to do the detailed tracking in a concurrent manner. I was thinking just an event log, with parsing routines for extracting statistics. But that'll be pretty slow. --- benchmarks/reconstruction_interference.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'benchmarks') diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp index a843c71..2fb1591 100644 --- a/benchmarks/reconstruction_interference.cpp +++ b/benchmarks/reconstruction_interference.cpp @@ -101,6 +101,8 @@ int main(int argc, char **argv) { auto s = extension->create_static_structure(); fprintf(stderr, "Construction complete\n"); q_thrd.join(); + + extension->print_scheduler_statistics(); delete extension; delete s; -- cgit v1.2.3 From 97ddc19c2f57d54df2fe791ddedcbaf62fd1922e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Jan 2024 10:42:40 -0500 Subject: Moved some benchmarks over to range count --- benchmarks/insert_query_tput.cpp | 6 +++--- benchmarks/reconstruction_interference.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 09179b0..3b63395 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -8,7 +8,7 @@ #include "framework/DynamicExtension.h" #include "shard/ISAMTree.h" -#include "query/rangequery.h" +#include "query/rangecount.h" #include "framework/interface/Record.h" #include "psu-util/timer.h" @@ -16,7 +16,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rq::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; std::atomic inserts_done = false; @@ -47,7 +47,7 @@ void query_thread(Ext *extension, double selectivity, size_t k) { size_t reccnt = extension->get_record_count(); size_t range = reccnt * selectivity; - auto q = new de::rq::Parms(); + auto q = new de::rc::Parms(); TIMER_START(); for (int64_t i=0; i Rec; typedef de::ISAMTree ISAM; -typedef de::rq::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; void query_thread(Ext *extension, double selectivity, size_t k) { @@ -25,7 +25,7 @@ void query_thread(Ext *extension, double selectivity, size_t k) { size_t reccnt = extension->get_record_count(); size_t range = reccnt * selectivity; - auto q = new de::rq::Parms(); + auto q = new de::rc::Parms(); TIMER_START(); for (int64_t i=0; i Date: Mon, 22 Jan 2024 10:53:56 -0500 Subject: Benchmarking: updated insert_query_tput to use better rng --- benchmarks/insert_query_tput.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 3b63395..865e82c 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -11,6 +11,8 @@ #include "query/rangecount.h" #include "framework/interface/Record.h" +#include + #include "psu-util/timer.h" @@ -40,18 +42,23 @@ void insert_thread(Ext *extension, size_t n, size_t k) { inserts_done.store(true); } -void query_thread(Ext *extension, double selectivity, size_t k) { +void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { TIMER_INIT(); while (!inserts_done.load()) { size_t reccnt = extension->get_record_count(); + if (reccnt == 0) { + continue; // don't start querying until there is data + } + size_t range = reccnt * selectivity; auto q = new de::rc::Parms(); TIMER_START(); for (int64_t i=0; ilower_bound = start; q->upper_bound = start + range; auto res = extension->query(q); @@ -72,11 +79,15 @@ int main(int argc, char **argv) { size_t per_trial = 1000; double selectivity = .001; + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + std::thread i_thrd(insert_thread, extension, n, per_trial); - std::thread q_thrd(query_thread, extension, selectivity, 1); + std::thread q_thrd(query_thread, extension, selectivity, 1, rng); q_thrd.join(); i_thrd.join(); + + gsl_rng_free(rng); fflush(stderr); } -- cgit v1.2.3 From b1e4182825e6c162571b7cc4efaf8bc44055b49c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Jan 2024 12:10:54 -0500 Subject: Adjusted recon_benchmark and properly shutdown FIFOScheduler --- benchmarks/reconstruction_interference.cpp | 36 ++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp index 5cf364a..c4c8c1b 100644 --- a/benchmarks/reconstruction_interference.cpp +++ b/benchmarks/reconstruction_interference.cpp @@ -19,6 +19,8 @@ typedef de::ISAMTree ISAM; typedef de::rc::Query Q; typedef de::DynamicExtension Ext; +volatile std::atomic queries_done; + void query_thread(Ext *extension, double selectivity, size_t k) { TIMER_INIT(); @@ -60,10 +62,9 @@ Ext *build_structure(size_t n) { return extension; } -void query_benchmark(double selectivity, size_t k, Ext *extension) { +void query_benchmark(double selectivity, size_t k, Ext *extension, size_t query_thrd_cnt) { TIMER_INIT(); - size_t query_thrd_cnt = 4; std::vector thrds(query_thrd_cnt); TIMER_START(); @@ -78,6 +79,8 @@ void query_benchmark(double selectivity, size_t k, Ext *extension) { auto query_lat = TIMER_RESULT(); fprintf(stdout, "Q\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), query_lat, k, query_thrd_cnt); + + queries_done.store(true); } int main(int argc, char **argv) { @@ -91,21 +94,30 @@ int main(int argc, char **argv) { /* build initial structure */ auto extension = build_structure(n); - /* benchmark queries w/o any interference from reconstructions */ - query_benchmark(selectivity, per_trial, extension); + std::vector thread_counts = {8, 16, 32, 64, 128}; + + for (auto &threads : thread_counts) { + /* benchmark queries w/o any interference from reconstructions */ + query_benchmark(selectivity, per_trial, extension, threads); + + fprintf(stderr, "Running interference test...\n"); - fprintf(stderr, "Running interference test...\n"); + queries_done.store(false); + /* trigger a worst-case reconstruction and benchmark the queries */ - /* trigger a worst-case reconstruction and benchmark the queries */ - std::thread q_thrd(query_benchmark, selectivity, per_trial, extension); - auto s = extension->create_static_structure(); - fprintf(stderr, "Construction complete\n"); - q_thrd.join(); + std::thread q_thrd(query_benchmark, selectivity, per_trial, extension, threads); + + while (!queries_done.load()) { + auto s = extension->create_static_structure(); + delete s; + } + + fprintf(stderr, "Construction complete\n"); + q_thrd.join(); + } extension->print_scheduler_statistics(); - delete extension; - delete s; fflush(stderr); } -- cgit v1.2.3 From dea7f559c4fa5cd603ce23d66c3c1b9311adc1cb Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Jan 2024 13:33:11 -0500 Subject: WAtermark testing benchmark --- benchmarks/watermark_testing.cpp | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 benchmarks/watermark_testing.cpp (limited to 'benchmarks') diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp new file mode 100644 index 0000000..1abe7f5 --- /dev/null +++ b/benchmarks/watermark_testing.cpp @@ -0,0 +1,53 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; + + + +int main(int argc, char **argv) { + std::vector hwms = {5000l, 10000l, 20000l, 50000l}; + std::vector lwms = {.1, .2, .3, .4, .5}; + + size_t n = 1000000; + + TIMER_INIT(); + + for (auto &hwm : hwms) { + for (size_t i=0; iinsert(r)) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_time = TIMER_RESULT(); + double insert_throughput = (double) n / (double) insert_time * 1e9; + + fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + + delete extension; + } + } +} + -- cgit v1.2.3 From 4aa907d6275b1b74be87ed2f2e94d8a2719a6a97 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 24 Jan 2024 11:31:28 -0500 Subject: Multithreaded Insertion Benchmark --- benchmarks/insertion_tput.cpp | 48 +++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp index 5498f93..785b933 100644 --- a/benchmarks/insertion_tput.cpp +++ b/benchmarks/insertion_tput.cpp @@ -18,27 +18,53 @@ typedef de::rq::Query Q; typedef de::DynamicExtension Ext; +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (int64_t i=start; iinsert(r)) { + _mm_pause(); + } + } +} + int main(int argc, char **argv) { - auto extension = new Ext(1000, 10000, 2); size_t n = 1000000000; - size_t per_trial = 1000; - TIMER_INIT(); - for (int64_t i=0; i counts = {1, 2, 4, 8}; //, 16, 32, 64}; + + + for (auto thread_count : counts) { + + auto extension = new Ext(1000, 12000, 8); + + size_t per_thread = n / thread_count; + + std::thread threads[thread_count]; + + TIMER_INIT(); TIMER_START(); - for (int64_t j=0; jinsert(r)) { - _mm_pause(); - } + for (size_t i=0; iget_record_count(), insert_lat, per_trial); + auto total_time = TIMER_RESULT(); + + double tput = (double) n / (double) total_time * 1e9; + + fprintf(stdout, "%ld\t%d\t%lf\n", extension->get_record_count(), + thread_count, tput); + + delete extension; } fflush(stderr); -- cgit v1.2.3 From f24fdf2fd310a5f868e15cd9682ca37d740c77af Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Jan 2024 15:31:03 -0500 Subject: Benchmarking updates --- benchmarks/include/bench.h | 162 ------------- benchmarks/include/bench_utility.h | 181 --------------- benchmarks/include/btree-util.h | 27 +++ benchmarks/include/data-proc.h | 244 ++++++++++++++++++++ benchmarks/include/standalone_utility.h | 262 ---------------------- benchmarks/insert_tail_latency.cpp | 80 +++++++ benchmarks/old-bench/include/bench.h | 162 +++++++++++++ benchmarks/old-bench/include/bench_utility.h | 181 +++++++++++++++ benchmarks/old-bench/include/standalone_utility.h | 240 ++++++++++++++++++++ benchmarks/static_dynamic_comp.cpp | 117 ++++++++++ benchmarks/watermark_testing.cpp | 4 +- 11 files changed, 1054 insertions(+), 606 deletions(-) delete mode 100644 benchmarks/include/bench.h delete mode 100644 benchmarks/include/bench_utility.h create mode 100644 benchmarks/include/btree-util.h create mode 100644 benchmarks/include/data-proc.h delete mode 100644 benchmarks/include/standalone_utility.h create mode 100644 benchmarks/insert_tail_latency.cpp create mode 100644 benchmarks/old-bench/include/bench.h create mode 100644 benchmarks/old-bench/include/bench_utility.h create mode 100644 benchmarks/old-bench/include/standalone_utility.h create mode 100644 benchmarks/static_dynamic_comp.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h deleted file mode 100644 index 586ff12..0000000 --- a/benchmarks/include/bench.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * benchmarks/include/bench.h - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "bench_utility.h" - -template -static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, - double delete_prop, std::vector &to_delete, bool binary=false) { - - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - if constexpr (PROGRESS) { - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i) { - de_index.erase_one(delete_vec[delete_idx++].key); - } else if constexpr (std::is_same_v) { - de_index.remove(delete_vec[delete_idx++]); - } else { - de_index.erase(delete_vec[delete_idx++]); - } - applied_deletes++; - } - - // insert the record; - if constexpr (std::is_same_v) { - de_index.add(insert_vec[i]); - } else { - de_index.insert(insert_vec[i]); - } - applied_inserts++; - } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); - } - - if constexpr (PROGRESS) { - progress_update(1.0, "inserting:"); - } - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); - reset_de_perf_metrics(); - - return continue_benchmark; -} - -template -static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} - - -template -static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i states(1); - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; j(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h deleted file mode 100644 index e33b93d..0000000 --- a/benchmarks/include/bench_utility.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * benchmarks/include/bench_utility.h - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "framework/DynamicExtension.h" -#include "shard/WSS.h" -#include "shard/MemISAM.h" -#include "shard/PGM.h" -#include "shard/TrieSpline.h" -#include "shard/WIRS.h" -#include "ds/BTree.h" -#include "shard/VPTree.h" -#include "mtree.h" -#include "standalone_utility.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef uint64_t key_type; -typedef uint64_t value_type; -typedef uint64_t weight_type; - -typedef de::WeightedRecord WRec; -typedef de::Record Rec; - -const size_t W2V_SIZE = 300; -typedef de::EuclidPoint Word2VecRec; - -typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; -typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; -typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; -typedef de::DynamicExtension, de::PGMPointLookup> ExtendedPGM_PL; -typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; -typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; -typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; - -struct euclidean_distance { - double operator()(const Word2VecRec &first, const Word2VecRec &second) const { - double dist = 0; - for (size_t i=0; i TreeMap; -typedef mt::mtree MTree; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i) { - if (!next_vector_record(file, rec)) { - if (i == 0) { - return false; - } - - break; - } - } else { - if (!next_record(file, rec, binary)) { - if (i == 0) { - return false; - } - - break; - } - } - - vec.emplace_back(rec); - - if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { - to_delete.emplace_back(rec); - } - } - - return true; -} - - -template -static bool warmup(std::fstream &file, DE &extended_index, size_t count, - double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i) { - extended_index.erase_one(delete_vec[delete_idx++].key); - } - else if constexpr (std::is_same_v) { - extended_index.remove(delete_vec[delete_idx++]); - } else { - extended_index.erase(delete_vec[delete_idx++]); - } - } - - // insert the record; - if constexpr (std::is_same_v) { - extended_index.add(insert_vec[i]); - } else { - extended_index.insert(insert_vec[i]); - } - inserted++; - - if (progress) { - progress_update((double) inserted / (double) count, "warming up:"); - } - } - } - - return true; -} - - -static void reset_de_perf_metrics() { - - /* - * rejection counters are zeroed automatically by the - * sampling function itself. - */ - - RESET_IO_CNT(); -} diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h new file mode 100644 index 0000000..571c073 --- /dev/null +++ b/benchmarks/include/btree-util.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include "psu-ds/BTree.h" + +struct btree_record { + int64_t key; + int64_t value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const int64_t &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree BenchBTree; + + diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h new file mode 100644 index 0000000..f758ed4 --- /dev/null +++ b/benchmarks/include/data-proc.h @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "psu-ds/BTree.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +static gsl_rng *g_rng; +static bool g_osm_data; + +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree BenchBTree; + +static key_type g_min_key = UINT64_MAX; +static key_type g_max_key = 0; + +static size_t g_max_record_cnt = 0; +static size_t g_reccnt = 0; + +static constexpr unsigned int DEFAULT_SEED = 0; + +static unsigned int get_random_seed() +{ + unsigned int seed = 0; + std::fstream urandom; + urandom.open("/dev/urandom", std::ios::in|std::ios::binary); + urandom.read((char *) &seed, sizeof(seed)); + urandom.close(); + + return seed; +} + +static key_type osm_to_key(const char *key_field) { + double tmp_key = (atof(key_field) + 180) * 10e6; + return (key_type) tmp_key; +} + +static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) +{ + g_rng = gsl_rng_alloc(type); + gsl_rng_set(g_rng, seed); +} + +static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) +{ + unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; + init_bench_rng(seed, gsl_rng_mt19937); + g_osm_data = osm_correction; + g_max_record_cnt = max_reccnt; + g_reccnt = 0; +} + +static void delete_bench_env() +{ + gsl_rng_free(g_rng); +} + + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +/* + * NOTE: The QP type must have lower_bound and upper_bound attributes, which + * this function will initialize. Any other query parameter attributes must + * be manually initialized after the call. + */ +template +static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + +template +static bool next_record(std::fstream &file, R &record, bool binary=false) +{ + static value_type value = 1; + if (g_reccnt >= g_max_record_cnt) return false; + + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + std::string key_field; + std::string value_field; + std::string weight_field; + + std::getline(line_stream, value_field, '\t'); + std::getline(line_stream, key_field, '\t'); + std::getline(line_stream, weight_field, '\t'); + + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + g_reccnt++; + + return true; + } + + return false; +} + +template +static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/include/standalone_utility.h deleted file mode 100644 index 9876e84..0000000 --- a/benchmarks/include/standalone_utility.h +++ /dev/null @@ -1,262 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef uint64_t key_type; -typedef uint64_t value_type; -typedef uint64_t weight_type; - -static gsl_rng *g_rng; -static bool g_osm_data; - -struct btree_record { - key_type key; - value_type value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const key_type &get(const btree_record &v) { - return v.key; - } -}; - -static key_type g_min_key = UINT64_MAX; -static key_type g_max_key = 0; - -static size_t g_max_record_cnt = 0; -static size_t g_reccnt = 0; - -static constexpr unsigned int DEFAULT_SEED = 0; - -static unsigned int get_random_seed() -{ - unsigned int seed = 0; - std::fstream urandom; - urandom.open("/dev/urandom", std::ios::in|std::ios::binary); - urandom.read((char *) &seed, sizeof(seed)); - urandom.close(); - - return seed; -} - -static key_type osm_to_key(const char *key_field) { - double tmp_key = (atof(key_field) + 180) * 10e6; - return (key_type) tmp_key; -} - -static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) -{ - g_rng = gsl_rng_alloc(type); - gsl_rng_set(g_rng, seed); -} - -static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) -{ - unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; - init_bench_rng(seed, gsl_rng_mt19937); - g_osm_data = osm_correction; - g_max_record_cnt = max_reccnt; - g_reccnt = 0; -} - -static void delete_bench_env() -{ - gsl_rng_free(g_rng); -} - - -template -static std::vector read_lookup_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.target_key = start; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_range_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.lower_bound = start; - q.upper_bound = stop; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_knn_queries(std::string fname, size_t k) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - char *line = NULL; - size_t len = 0; - - while (getline(&line, &len, qf) > 0) { - char *token; - QP query; - size_t idx = 0; - - token = strtok(line, " "); - do { - query.point.data[idx++] = atof(token); - } while ((token = strtok(NULL, " "))); - - query.k = k; - queries.emplace_back(query); - } - - free(line); - fclose(qf); - - return queries; -} - -/* - * NOTE: The QP type must have lower_bound and upper_bound attributes, which - * this function will initialize. Any other query parameter attributes must - * be manually initialized after the call. - */ -template -static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - for (size_t i=0; i<300; i++) { - std::string dimension; - - std::getline(line_stream, dimension, ' '); - record.data[i] = atof(dimension.c_str()); - } - - g_reccnt++; - - return true; - } - - return false; - -} - -template -static bool next_record(std::fstream &file, R &record, bool binary=false) -{ - static value_type value = 1; - if (g_reccnt >= g_max_record_cnt) return false; - - if (binary) { - if (file.good()) { - decltype(R::key) key; - - file.read((char*) &key, sizeof(key)); - record.key = key; - record.value = value; - value++; - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - return true; - } - - return false; - } - - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - std::string key_field; - std::string value_field; - std::string weight_field; - - std::getline(line_stream, value_field, '\t'); - std::getline(line_stream, key_field, '\t'); - std::getline(line_stream, weight_field, '\t'); - - record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record.value = atol(value_field.c_str()); - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - g_reccnt++; - - return true; - } - - return false; -} - -template -static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { - vec.clear(); - - size_t cnt = 0; - while (cnt < n) { - if (to_delete.size() == 0) { - return false; - } - - auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); - vec.emplace_back(to_delete[i]); - to_delete.erase(to_delete.begin() + i); - } -td: - return true; -} - -/* - * helper routines for displaying progress bars to stderr - */ -static const char *g_prog_bar = "======================================================================"; -static const size_t g_prog_width = 50; - -static void progress_update(double percentage, std::string prompt) { - int val = (int) (percentage * 100); - int lpad = (int) (percentage * g_prog_width); - int rpad = (int) (g_prog_width - lpad); - fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, ""); - fflush(stderr); - - if (percentage >= 1) fprintf(stderr, "\n"); -} diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp new file mode 100644 index 0000000..5e32898 --- /dev/null +++ b/benchmarks/insert_tail_latency.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +std::atomic total_latency = 0; + +void insert_thread(Ext *extension, size_t n, size_t k, size_t rate) { + int64_t delay = (1.0 / (double) rate) * 10e6; // delay in us + TIMER_INIT(); + for (int64_t i=0; iinsert(r)) { + _mm_pause(); + } + + //usleep(delay); + /* + for (size_t i=0; i<10000; i++) { + __asm__ __volatile__ ("":::"memory"); + } + */ + } + TIMER_STOP(); + + auto insert_lat = TIMER_RESULT(); + + total_latency.fetch_add(insert_lat); + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", i+k, insert_lat, k); + } +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + auto extension = new Ext(100, 1000000, 3); + size_t n = 100000000; + size_t per_trial = 1000; + double selectivity = .001; + size_t rate = 1000000; + + total_latency.store(0); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::thread i_thrd1(insert_thread, extension, n/2, per_trial, rate); + std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate); + + i_thrd1.join(); + i_thrd2.join(); + + auto avg_latency = total_latency.load() / n; + auto throughput = (int64_t) ((double) n / (double) total_latency * 1e9); + + fprintf(stdout, "AVG LAT: %ld\nThroughput: %ld\n", avg_latency, throughput); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/old-bench/include/bench.h b/benchmarks/old-bench/include/bench.h new file mode 100644 index 0000000..586ff12 --- /dev/null +++ b/benchmarks/old-bench/include/bench.h @@ -0,0 +1,162 @@ +/* + * benchmarks/include/bench.h + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include "bench_utility.h" + +template +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, std::vector &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + reset_de_perf_metrics(); + + return continue_benchmark; +} + +template +static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template +static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h new file mode 100644 index 0000000..e33b93d --- /dev/null +++ b/benchmarks/old-bench/include/bench_utility.h @@ -0,0 +1,181 @@ +/* + * benchmarks/include/bench_utility.h + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include "framework/DynamicExtension.h" +#include "shard/WSS.h" +#include "shard/MemISAM.h" +#include "shard/PGM.h" +#include "shard/TrieSpline.h" +#include "shard/WIRS.h" +#include "ds/BTree.h" +#include "shard/VPTree.h" +#include "mtree.h" +#include "standalone_utility.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +typedef de::WeightedRecord WRec; +typedef de::Record Rec; + +const size_t W2V_SIZE = 300; +typedef de::EuclidPoint Word2VecRec; + +typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; +typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; +typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; +typedef de::DynamicExtension, de::PGMPointLookup> ExtendedPGM_PL; +typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; +typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; +typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; + +struct euclidean_distance { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double dist = 0; + for (size_t i=0; i TreeMap; +typedef mt::mtree MTree; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i) { + if (!next_vector_record(file, rec)) { + if (i == 0) { + return false; + } + + break; + } + } else { + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } + + break; + } + } + + vec.emplace_back(rec); + + if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { + to_delete.emplace_back(rec); + } + } + + return true; +} + + +template +static bool warmup(std::fstream &file, DE &extended_index, size_t count, + double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i) { + extended_index.erase_one(delete_vec[delete_idx++].key); + } + else if constexpr (std::is_same_v) { + extended_index.remove(delete_vec[delete_idx++]); + } else { + extended_index.erase(delete_vec[delete_idx++]); + } + } + + // insert the record; + if constexpr (std::is_same_v) { + extended_index.add(insert_vec[i]); + } else { + extended_index.insert(insert_vec[i]); + } + inserted++; + + if (progress) { + progress_update((double) inserted / (double) count, "warming up:"); + } + } + } + + return true; +} + + +static void reset_de_perf_metrics() { + + /* + * rejection counters are zeroed automatically by the + * sampling function itself. + */ + + RESET_IO_CNT(); +} diff --git a/benchmarks/old-bench/include/standalone_utility.h b/benchmarks/old-bench/include/standalone_utility.h new file mode 100644 index 0000000..727daa5 --- /dev/null +++ b/benchmarks/old-bench/include/standalone_utility.h @@ -0,0 +1,240 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +static gsl_rng *g_rng; +static bool g_osm_data; + +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +static key_type g_min_key = UINT64_MAX; +static key_type g_max_key = 0; + +static size_t g_max_record_cnt = 0; +static size_t g_reccnt = 0; + +static constexpr unsigned int DEFAULT_SEED = 0; + +static unsigned int get_random_seed() +{ + unsigned int seed = 0; + std::fstream urandom; + urandom.open("/dev/urandom", std::ios::in|std::ios::binary); + urandom.read((char *) &seed, sizeof(seed)); + urandom.close(); + + return seed; +} + +static key_type osm_to_key(const char *key_field) { + double tmp_key = (atof(key_field) + 180) * 10e6; + return (key_type) tmp_key; +} + +static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) +{ + g_rng = gsl_rng_alloc(type); + gsl_rng_set(g_rng, seed); +} + +static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) +{ + unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; + init_bench_rng(seed, gsl_rng_mt19937); + g_osm_data = osm_correction; + g_max_record_cnt = max_reccnt; + g_reccnt = 0; +} + +static void delete_bench_env() +{ + gsl_rng_free(g_rng); +} + + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +/* + * NOTE: The QP type must have lower_bound and upper_bound attributes, which + * this function will initialize. Any other query parameter attributes must + * be manually initialized after the call. + */ +template +static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + +template +static bool next_record(std::fstream &file, R &record, bool binary=false) +{ + static value_type value = 1; + if (g_reccnt >= g_max_record_cnt) return false; + + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + std::string key_field; + std::string value_field; + std::string weight_field; + + std::getline(line_stream, value_field, '\t'); + std::getline(line_stream, key_field, '\t'); + std::getline(line_stream, weight_field, '\t'); + + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + g_reccnt++; + + return true; + } + + return false; +} + +template +static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} diff --git a/benchmarks/static_dynamic_comp.cpp b/benchmarks/static_dynamic_comp.cpp new file mode 100644 index 0000000..2d8f041 --- /dev/null +++ b/benchmarks/static_dynamic_comp.cpp @@ -0,0 +1,117 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "query/rangecount.h" +#include "shard/TrieSpline.h" +#include "shard/ISAMTree.h" + + +#include "framework/interface/Record.h" +#include "framework/interface/Query.h" +#include "include/data-proc.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::TrieSpline TS; + +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +typedef de::MutableBuffer Buffer; + +typedef de::rc::Parms query; + +Buffer *file_to_mbuffer(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto buff = new Buffer(n, n+1); + + Rec rec; + while (next_record(file, rec) && buff->get_record_count() < n) { + buff->append(rec); + } + + return buff; +} + +BenchBTree *file_to_btree(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto btree = new BenchBTree(); + Rec rec; + while (next_record(file, rec) && btree->size() < n) { + btree->insert({rec.key, rec.value}); + } + + return btree; +} + +template +void benchmark_shard(S *shard, std::vector &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + auto state = de::rc::Query::get_query_state(shard, &q); + auto res = de::rc::Query::query(shard, state, &q); + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + fprintf(stdout, "%ld %ld\n", latency, shard->get_memory_usage() - shard->get_record_count() * sizeof(de::Wrapped)); +} + +void benchmark_btree(BenchBTree *btree, std::vector &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + size_t c = 0; + auto ptr = btree->find(q.lower_bound); + while(ptr != btree->end() && ptr->key <= q.upper_bound) { + c++; + } + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + auto mem = btree->get_stats().inner_nodes * psudb::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + fprintf(stdout, "%ld %ld\n", latency, mem); +} + +int main(int argc, char **argv) { + if (argc < 4) { + fprintf(stderr, "Usage: static_dynamic_comp \n"); + exit(EXIT_FAILURE); + } + + std::string d_fname = std::string(argv[1]); + size_t reccnt = atol(argv[2]); + std::string q_fname = std::string(argv[3]); + + init_bench_env(reccnt, true, false); + auto queries = read_range_queries(q_fname, .001); + + auto buff = file_to_mbuffer(d_fname, reccnt); + + TS *ts = new TS(buff->get_buffer_view()); + benchmark_shard(ts, queries); + delete ts; + + ISAM *isam = new ISAM(buff->get_buffer_view()); + benchmark_shard(isam, queries); + delete isam; + + auto btree = file_to_btree(d_fname, reccnt); + +} + diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp index 1abe7f5..e016aa4 100644 --- a/benchmarks/watermark_testing.cpp +++ b/benchmarks/watermark_testing.cpp @@ -21,7 +21,7 @@ typedef de::DynamicExtension Ext; int main(int argc, char **argv) { std::vector hwms = {5000l, 10000l, 20000l, 50000l}; - std::vector lwms = {.1, .2, .3, .4, .5}; + std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; size_t n = 1000000; @@ -46,6 +46,8 @@ int main(int argc, char **argv) { fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + extension->print_scheduler_statistics(); + delete extension; } } -- cgit v1.2.3 From d166465dcca3550cb8f3263e0f5b5189a69d531a Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 13:29:49 -0500 Subject: Temporary thread affinity for reconstruction --- benchmarks/insert_tail_latency.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp index 5e32898..1640ce5 100644 --- a/benchmarks/insert_tail_latency.cpp +++ b/benchmarks/insert_tail_latency.cpp @@ -19,7 +19,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext; std::atomic total_latency = 0; @@ -53,8 +53,8 @@ void insert_thread(Ext *extension, size_t n, size_t k, size_t rate) { int main(int argc, char **argv) { /* the closeout routine takes _forever_ ... so we'll just leak the memory */ - auto extension = new Ext(100, 1000000, 3); - size_t n = 100000000; + auto extension = new Ext(12000, 12001, 3); + size_t n = 10000000; size_t per_trial = 1000; double selectivity = .001; size_t rate = 1000000; @@ -63,11 +63,12 @@ int main(int argc, char **argv) { gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - std::thread i_thrd1(insert_thread, extension, n/2, per_trial, rate); - std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate); + std::thread i_thrd1(insert_thread, extension, n, per_trial, rate); + //std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate); + i_thrd1.join(); - i_thrd2.join(); + //i_thrd2.join(); auto avg_latency = total_latency.load() / n; auto throughput = (int64_t) ((double) n / (double) total_latency * 1e9); -- cgit v1.2.3 From b1f966353695a0e06948df5332acccb84bbbcda0 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 14:26:34 -0500 Subject: Query/Insert intermix benchmarks --- benchmarks/insert_query_tput.cpp | 87 ++++++++++++++------------ benchmarks/insert_query_tput_serial.cpp | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 38 deletions(-) create mode 100644 benchmarks/insert_query_tput_serial.cpp (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 865e82c..8844d04 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -23,71 +23,82 @@ typedef de::DynamicExtension Ext; std::atomic inserts_done = false; -void insert_thread(Ext *extension, size_t n, size_t k) { + +void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { TIMER_INIT(); - for (int64_t i=0; iinsert(r)) { - _mm_pause(); - } - } - TIMER_STOP(); - auto insert_lat = TIMER_RESULT(); - fprintf(stdout, "I\t%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, k); - } + size_t reccnt = extension->get_record_count(); - inserts_done.store(true); -} + size_t range = reccnt * selectivity; -void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { - TIMER_INIT(); + auto q = new de::rc::Parms(); - while (!inserts_done.load()) { - size_t reccnt = extension->get_record_count(); - if (reccnt == 0) { - continue; // don't start querying until there is data - } + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; +} + +void insert_thread(Ext *extension, size_t n, size_t k, gsl_rng *rng) { + TIMER_INIT(); - auto q = new de::rc::Parms(); + size_t reccnt = 0; + Rec r; + while (reccnt < n) { + auto old_reccnt = reccnt; TIMER_START(); - for (int64_t i=0; ilower_bound = start; - q->upper_bound = start + range; - auto res = extension->query(q); - auto r = res.get(); + if (extension->insert(r)) { + reccnt++; + } } TIMER_STOP(); - auto query_lat = TIMER_RESULT(); - fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); - delete q; + auto insert_lat = TIMER_RESULT(); + + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", reccnt, insert_lat, reccnt - old_reccnt); + + if (reccnt % 100000 == 0 && reccnt != n) { + auto a = std::thread(query_thread, extension, .01, 20, rng); + a.detach(); + } } } int main(int argc, char **argv) { /* the closeout routine takes _forever_ ... so we'll just leak the memory */ - auto extension = new Ext(1000, 10000, 2); + auto extension = new Ext(1000, 12000, 8); size_t n = 10000000; size_t per_trial = 1000; double selectivity = .001; - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + TIMER_INIT(); - std::thread i_thrd(insert_thread, extension, n, per_trial); - std::thread q_thrd(query_thread, extension, selectivity, 1, rng); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - q_thrd.join(); + TIMER_START(); + std::thread i_thrd(insert_thread, extension, n, per_trial, rng); i_thrd.join(); + TIMER_STOP(); + + auto total_latency = TIMER_RESULT(); + fprintf(stdout, "T\t%ld\n", total_latency); gsl_rng_free(rng); + delete extension; fflush(stderr); } diff --git a/benchmarks/insert_query_tput_serial.cpp b/benchmarks/insert_query_tput_serial.cpp new file mode 100644 index 0000000..ff9dc40 --- /dev/null +++ b/benchmarks/insert_query_tput_serial.cpp @@ -0,0 +1,104 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +std::atomic inserts_done = false; + + +void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { + TIMER_INIT(); + + size_t reccnt = extension->get_record_count(); + + size_t range = reccnt * selectivity; + + auto q = new de::rc::Parms(); + + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; +} + +void insert_thread(Ext *extension, size_t n, size_t k, gsl_rng *rng) { + TIMER_INIT(); + + size_t reccnt = 0; + Rec r; + while (reccnt < n) { + auto old_reccnt = reccnt; + + TIMER_START(); + for (size_t i=0; iinsert(r)) { + reccnt++; + } + } + TIMER_STOP(); + auto insert_lat = TIMER_RESULT(); + + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", reccnt, insert_lat, reccnt - old_reccnt); + + if (reccnt % 100000 == 0 && reccnt != n) { + auto a = std::thread(query_thread, extension, .01, 20, rng); + a.join(); + } + } +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + auto extension = new Ext(1000, 12000, 8); + size_t n = 10000000; + size_t per_trial = 1000; + double selectivity = .001; + + TIMER_INIT(); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + TIMER_START(); + std::thread i_thrd(insert_thread, extension, n, per_trial, rng); + i_thrd.join(); + TIMER_STOP(); + + auto total_latency = TIMER_RESULT(); + fprintf(stdout, "T\t%ld\n", total_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3 From 27d36dd9a68e4cf454be2ca7877ece0a34c3e929 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 15:48:21 -0500 Subject: Insert throughput benchmark --- benchmarks/insert_query_tput.cpp | 96 +++++++++++---------- benchmarks/query_workload_bench.cpp | 168 ++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 45 deletions(-) create mode 100644 benchmarks/query_workload_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 8844d04..ed5bfe9 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -23,79 +23,85 @@ typedef de::DynamicExtension Ext; std::atomic inserts_done = false; +void query_thread(Ext *extension, size_t n) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t range = n*.0001; -void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { - TIMER_INIT(); - - size_t reccnt = extension->get_record_count(); - - size_t range = reccnt * selectivity; - - auto q = new de::rc::Parms(); - - TIMER_START(); - for (int64_t i=0; i *q = new de::rc::Parms(); + while (!inserts_done.load()) { + size_t start = gsl_rng_uniform_int(rng, n - range); q->lower_bound = start; q->upper_bound = start + range; auto res = extension->query(q); auto r = res.get(); + usleep(100); } - TIMER_STOP(); - auto query_lat = TIMER_RESULT(); - fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + + gsl_rng_free(rng); delete q; } -void insert_thread(Ext *extension, size_t n, size_t k, gsl_rng *rng) { - TIMER_INIT(); - +void insert_thread(Ext *extension, size_t n, gsl_rng *rng) { size_t reccnt = 0; Rec r; - while (reccnt < n) { - auto old_reccnt = reccnt; - - TIMER_START(); - for (size_t i=0; iinsert(r)) { - reccnt++; - } - } - TIMER_STOP(); - auto insert_lat = TIMER_RESULT(); - - fprintf(stdout, "I\t%ld\t%ld\t%ld\n", reccnt, insert_lat, reccnt - old_reccnt); + for (size_t i=0; iinsert(r)) { + usleep(1); } } + + inserts_done.store(true); } int main(int argc, char **argv) { - /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + if (argc < 3) { + fprintf(stderr, "insert_query_tput reccnt query_threads\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + auto extension = new Ext(1000, 12000, 8); - size_t n = 10000000; - size_t per_trial = 1000; - double selectivity = .001; + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + Rec r; + for (size_t i=0; iinsert(r)) { + usleep(1); + } + } + + extension->await_next_epoch(); TIMER_INIT(); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + std::vector qthreads(qthread_cnt); TIMER_START(); - std::thread i_thrd(insert_thread, extension, n, per_trial, rng); + std::thread i_thrd(insert_thread, extension, n - warmup, rng); + for (size_t i=0; i + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +size_t g_insert_size = 50000; +size_t g_insert_frequency = 1000; +size_t g_query_count = 5000; + +void query_thread(Ext *extension, gsl_rng *rng, size_t n, bool parallel=true) { + TIMER_INIT(); + double selectivity = .001; + size_t k = 100; + size_t range = n * selectivity; + + size_t total_result = 0; + + auto q = new de::rc::Parms(); + + std::vector>> results(k); + + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + results[i] = extension->query(q); + if (!parallel) { + auto x = results[i].get(); + total_result += x[0].key; + } + } + + if (parallel) { + for (size_t i=0; iget_record_count(), query_lat, k); + fprintf(stderr, "Q Total: %ld\n", total_result); + delete q; +} + +void insert_thread(Ext *extension, size_t n) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + TIMER_INIT(); + size_t k=1000; + + Rec r; + for (size_t i=0; iinsert(r)) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_lat = TIMER_RESULT(); + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, k); + } +} + +void parallel_bench(Ext *extension, gsl_rng *rng, size_t n) { + TIMER_INIT(); + + TIMER_START(); + for (size_t i=0; i < g_query_count; i+=100) { + query_thread(extension, rng, n); + if (i % g_insert_frequency == 0) { + auto x = std::thread(insert_thread, extension, n); + x.detach(); + } + } + TIMER_STOP(); + + auto workload_duration = TIMER_RESULT(); + fprintf(stdout, "W\t%ld\n", workload_duration); +} + + +void serial_bench(Ext *extension, gsl_rng *rng, size_t n) { + TIMER_INIT(); + TIMER_START(); + for (size_t i=0; i < g_query_count; i+=100) { + query_thread(extension, rng, n, false); + if (i % g_insert_frequency == 0) { + auto x = std::thread(insert_thread, extension, n); + x.join(); + } + } + TIMER_STOP(); + + auto workload_duration = TIMER_RESULT(); + fprintf(stdout, "W\t%ld\n", workload_duration); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "query_workload_bench reccnt lwm hwm parallel\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t lwm = atol(argv[2]); + size_t hwm = atol(argv[3]); + bool parallel = atoi(argv[4]); + + size_t scale_factor = 8; + + auto extension = new Ext(lwm, hwm, scale_factor); + size_t per_trial = 1000; + double selectivity = .001; + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + /* build initial structure */ + size_t reccnt = 0; + Rec r; + for (size_t i=0; iinsert(r)) { + _mm_pause(); + } + } + + if (parallel) { + parallel_bench(extension, rng, n); + } else { + serial_bench(extension, rng, n); + } + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + -- cgit v1.2.3 From 1b354771dea44523183758e71ebc7623ace143f5 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 16:13:38 -0500 Subject: insert query tput updates --- benchmarks/insert_query_tput.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index ed5bfe9..8274d2a 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -27,6 +27,8 @@ void query_thread(Ext *extension, size_t n) { gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); size_t range = n*.0001; + size_t total = 0; + de::rc::Parms *q = new de::rc::Parms(); while (!inserts_done.load()) { size_t start = gsl_rng_uniform_int(rng, n - range); @@ -34,9 +36,12 @@ void query_thread(Ext *extension, size_t n) { q->upper_bound = start + range; auto res = extension->query(q); auto r = res.get(); - usleep(100); + total += r[0].key; + usleep(1); } + fprintf(stderr, "%ld\n", total); + gsl_rng_free(rng); delete q; } @@ -66,7 +71,7 @@ int main(int argc, char **argv) { size_t n = atol(argv[1]); size_t qthread_cnt = atol(argv[2]); - auto extension = new Ext(1000, 12000, 8); + auto extension = new Ext(1000, 12000, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); /* warmup structure w/ 10% of records */ -- cgit v1.2.3 From 1e226fc415d7674de0ecde51199d89e9042c6a22 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 16:57:41 -0500 Subject: Updated insert query throughput to use IRS queries --- benchmarks/insert_query_tput.cpp | 14 +++-- benchmarks/insert_query_tput_serial.cpp | 104 -------------------------------- 2 files changed, 8 insertions(+), 110 deletions(-) delete mode 100644 benchmarks/insert_query_tput_serial.cpp (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 8274d2a..05715b1 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -8,7 +8,7 @@ #include "framework/DynamicExtension.h" #include "shard/ISAMTree.h" -#include "query/rangecount.h" +#include "query/irs.h" #include "framework/interface/Record.h" #include @@ -18,7 +18,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rc::Query Q; +typedef de::irs::Query Q; typedef de::DynamicExtension Ext; std::atomic inserts_done = false; @@ -27,17 +27,19 @@ void query_thread(Ext *extension, size_t n) { gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); size_t range = n*.0001; - size_t total = 0; + int64_t total = 0; - de::rc::Parms *q = new de::rc::Parms(); + de::irs::Parms *q = new de::irs::Parms(); while (!inserts_done.load()) { size_t start = gsl_rng_uniform_int(rng, n - range); q->lower_bound = start; q->upper_bound = start + range; + q->sample_size = 100; + q->rng = rng; auto res = extension->query(q); auto r = res.get(); - total += r[0].key; - usleep(1); + total += r.size(); + usleep(1); } fprintf(stderr, "%ld\n", total); diff --git a/benchmarks/insert_query_tput_serial.cpp b/benchmarks/insert_query_tput_serial.cpp deleted file mode 100644 index ff9dc40..0000000 --- a/benchmarks/insert_query_tput_serial.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "framework/DynamicExtension.h" -#include "shard/ISAMTree.h" -#include "query/rangecount.h" -#include "framework/interface/Record.h" - -#include - -#include "psu-util/timer.h" - - -typedef de::Record Rec; -typedef de::ISAMTree ISAM; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; - -std::atomic inserts_done = false; - - -void query_thread(Ext *extension, double selectivity, size_t k, gsl_rng *rng) { - TIMER_INIT(); - - size_t reccnt = extension->get_record_count(); - - size_t range = reccnt * selectivity; - - auto q = new de::rc::Parms(); - - TIMER_START(); - for (int64_t i=0; ilower_bound = start; - q->upper_bound = start + range; - auto res = extension->query(q); - auto r = res.get(); - } - TIMER_STOP(); - auto query_lat = TIMER_RESULT(); - fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); - delete q; -} - -void insert_thread(Ext *extension, size_t n, size_t k, gsl_rng *rng) { - TIMER_INIT(); - - size_t reccnt = 0; - Rec r; - while (reccnt < n) { - auto old_reccnt = reccnt; - - TIMER_START(); - for (size_t i=0; iinsert(r)) { - reccnt++; - } - } - TIMER_STOP(); - auto insert_lat = TIMER_RESULT(); - - fprintf(stdout, "I\t%ld\t%ld\t%ld\n", reccnt, insert_lat, reccnt - old_reccnt); - - if (reccnt % 100000 == 0 && reccnt != n) { - auto a = std::thread(query_thread, extension, .01, 20, rng); - a.join(); - } - } -} - -int main(int argc, char **argv) { - - /* the closeout routine takes _forever_ ... so we'll just leak the memory */ - auto extension = new Ext(1000, 12000, 8); - size_t n = 10000000; - size_t per_trial = 1000; - double selectivity = .001; - - TIMER_INIT(); - - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - TIMER_START(); - std::thread i_thrd(insert_thread, extension, n, per_trial, rng); - i_thrd.join(); - TIMER_STOP(); - - auto total_latency = TIMER_RESULT(); - fprintf(stdout, "T\t%ld\n", total_latency); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - -- cgit v1.2.3 From 080e73dd1f90163cea987ba3d3d56e3c1b7ddea7 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 20:57:40 -0500 Subject: Updated throughput bench to use SOSD --- benchmarks/include/data-proc.h | 14 ++++++++++++ benchmarks/insert_query_tput.cpp | 46 ++++++++++++++++++++++------------------ 2 files changed, 39 insertions(+), 21 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h index f758ed4..dbac671 100644 --- a/benchmarks/include/data-proc.h +++ b/benchmarks/include/data-proc.h @@ -10,6 +10,8 @@ #include "psu-ds/BTree.h" +#pragma once + typedef uint64_t key_type; typedef uint64_t value_type; typedef uint64_t weight_type; @@ -242,3 +244,15 @@ static bool build_delete_vec(std::vector &to_delete, std::vector &vec, siz td: return true; } + +static std::vector read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector records(n); + for (size_t i=0; i @@ -20,23 +21,22 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; typedef de::irs::Query Q; typedef de::DynamicExtension Ext; +typedef de::irs::Parms QP; std::atomic inserts_done = false; -void query_thread(Ext *extension, size_t n) { +void query_thread(Ext *extension, std::vector *queries) { gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - size_t range = n*.0001; + size_t total = 0; - int64_t total = 0; - - de::irs::Parms *q = new de::irs::Parms(); while (!inserts_done.load()) { - size_t start = gsl_rng_uniform_int(rng, n - range); - q->lower_bound = start; - q->upper_bound = start + range; - q->sample_size = 100; - q->rng = rng; - auto res = extension->query(q); + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + q.rng = rng; + q.sample_size = 1000; + + auto res = extension->query(&q); auto r = res.get(); total += r.size(); usleep(1); @@ -45,15 +45,14 @@ void query_thread(Ext *extension, size_t n) { fprintf(stderr, "%ld\n", total); gsl_rng_free(rng); - delete q; } -void insert_thread(Ext *extension, size_t n, gsl_rng *rng) { +void insert_thread(Ext *extension, size_t start, std::vector *records) { size_t reccnt = 0; Rec r; - for (size_t i=0; isize(); i++) { + r.key = (*records)[i]; + r.value = i; while (!extension->insert(r)) { usleep(1); @@ -65,22 +64,27 @@ void insert_thread(Ext *extension, size_t n, gsl_rng *rng) { int main(int argc, char **argv) { - if (argc < 3) { - fprintf(stderr, "insert_query_tput reccnt query_threads\n"); + if (argc < 5) { + fprintf(stderr, "insert_query_tput reccnt query_threads datafile queryfile\n"); exit(EXIT_FAILURE); } size_t n = atol(argv[1]); size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); auto extension = new Ext(1000, 12000, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; Rec r; for (size_t i=0; iinsert(r)) { @@ -95,9 +99,9 @@ int main(int argc, char **argv) { std::vector qthreads(qthread_cnt); TIMER_START(); - std::thread i_thrd(insert_thread, extension, n - warmup, rng); + std::thread i_thrd(insert_thread, extension, warmup, &data); for (size_t i=0; i Date: Wed, 31 Jan 2024 21:17:57 -0500 Subject: BTree benchmark --- benchmarks/btree_insert_query_tput.cpp | 116 +++++++++++++++++++++++++++++++++ benchmarks/include/data-proc.h | 4 +- 2 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 benchmarks/btree_insert_query_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/btree_insert_query_tput.cpp b/benchmarks/btree_insert_query_tput.cpp new file mode 100644 index 0000000..9c8fe6a --- /dev/null +++ b/benchmarks/btree_insert_query_tput.cpp @@ -0,0 +1,116 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "query/irs.h" +#include "include/data-proc.h" +#include "psu-ds/BTree.h" +#include + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::irs::Parms QP; + +std::atomic inserts_done = false; + +std::mutex g_btree_lock; + +void query_thread(BenchBTree *tree, std::vector *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + + std::vector result; + g_btree_lock.lock(); + tree->range_sample(q.lower_bound, q.upper_bound, 1000, result, rng); + g_btree_lock.unlock(); + + total += result.size(); + usleep(1); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(BenchBTree *tree, size_t start, std::vector *records) { + size_t reccnt = 0; + for (size_t i=start; isize(); i++) { + btree_record r; + r.key = (*records)[i]; + r.value = i; + + g_btree_lock.lock(); + tree->insert(r); + g_btree_lock.unlock(); + } + + inserts_done.store(true); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "btree_insert_query_tput reccnt query_threads datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); + + auto tree = new BenchBTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; iinsert(r); + } + + TIMER_INIT(); + + std::vector qthreads(qthread_cnt); + + TIMER_START(); + std::thread i_thrd(insert_thread, tree, warmup, &data); + for (size_t i=0; i Date: Wed, 31 Jan 2024 22:35:16 -0500 Subject: IRS bench (replication of existing one) --- benchmarks/irs_bench.cpp | 125 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 benchmarks/irs_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp new file mode 100644 index 0000000..ffedcf2 --- /dev/null +++ b/benchmarks/irs_bench.cpp @@ -0,0 +1,125 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "include/data-proc.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::irs::Query Q; +typedef de::DynamicExtension Ext; +typedef de::irs::Parms QP; + +void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; irng = rng; + q->sample_size = 1000; + + auto res = extension->query(&q); + auto r = res.get(); + total += r.size(); + } + + fprintf(stderr, "%ld\n", total); +} + +size_t g_deleted_records = 0; + +double delete_proportion = 0; + +void insert_records(Ext *extension, size_t start, + size_t stop, + std::vector &records, + std::vector &to_delete, + size_t &delete_idx, + gsl_rng *rng) { + size_t reccnt = 0; + Rec r; + for (size_t i=start; iinsert(r)) { + usleep(1); + } + + if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + r.key = records[to_delete[delete_idx]]; + r.value = (int64_t) (to_delete[delete_idx]); + while (!extension->erase(r)) { + usleep(1); + } + delete_idx++; + g_deleted_records++; + } + } +} + +int main(int argc, char **argv) { + + if (argc < 4) { + fprintf(stderr, "insert_query_tput reccnt datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3 From fca660859bd8133cff53592b17abf4c8a51fc2c0 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 31 Jan 2024 23:01:29 -0500 Subject: updated btree benchmark --- benchmarks/btree_insert_query_tput.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'benchmarks') diff --git a/benchmarks/btree_insert_query_tput.cpp b/benchmarks/btree_insert_query_tput.cpp index 9c8fe6a..f838f80 100644 --- a/benchmarks/btree_insert_query_tput.cpp +++ b/benchmarks/btree_insert_query_tput.cpp @@ -56,6 +56,10 @@ void insert_thread(BenchBTree *tree, size_t start, std::vector *records g_btree_lock.lock(); tree->insert(r); g_btree_lock.unlock(); + + if (i % 100000 == 0) { + fprintf(stderr, "Inserted %ld records\n", i); + } } inserts_done.store(true); -- cgit v1.2.3 From db4806d9dd9757273a14e6c3ea92e5a087239145 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 5 Feb 2024 15:17:25 -0500 Subject: Set up tombstone deletes properly --- benchmarks/irs_bench.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index ffedcf2..6de8681 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -20,7 +20,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { @@ -30,7 +30,7 @@ void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { q->rng = rng; q->sample_size = 1000; - auto res = extension->query(&q); + auto res = extension->query(q); auto r = res.get(); total += r.size(); } @@ -39,14 +39,14 @@ void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { } size_t g_deleted_records = 0; - -double delete_proportion = 0; +double delete_proportion = 0.05; void insert_records(Ext *extension, size_t start, size_t stop, std::vector &records, std::vector &to_delete, size_t &delete_idx, + bool delete_records, gsl_rng *rng) { size_t reccnt = 0; Rec r; @@ -58,7 +58,7 @@ void insert_records(Ext *extension, size_t start, usleep(1); } - if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { r.key = records[to_delete[delete_idx]]; r.value = (int64_t) (to_delete[delete_idx]); while (!extension->erase(r)) { @@ -95,16 +95,16 @@ int main(int argc, char **argv) { auto queries = read_range_queries(q_fname, .001); /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; + size_t warmup = .3 * n; size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, rng); + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); @@ -116,7 +116,7 @@ int main(int argc, char **argv) { auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "T\t%ld\t%ld\n", insert_throughput, query_latency); + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); gsl_rng_free(rng); delete extension; -- cgit v1.2.3 From 2c5d549b3618b9ea72e6eece4cb4f3da5a6811a8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 7 Feb 2024 13:42:34 -0500 Subject: Fully realized shard concept interface --- benchmarks/insert_query_tput.cpp | 2 +- benchmarks/insert_tail_latency.cpp | 2 +- benchmarks/insertion_tput.cpp | 2 +- benchmarks/irs_bench.cpp | 2 +- benchmarks/query_workload_bench.cpp | 2 +- benchmarks/reconstruction_interference.cpp | 2 +- benchmarks/static_dynamic_comp.cpp | 2 +- benchmarks/watermark_testing.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index 40a5f8d..ce05264 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -19,7 +19,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::irs::Query Q; +typedef de::irs::Query Q; typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp index 1640ce5..bdc4536 100644 --- a/benchmarks/insert_tail_latency.cpp +++ b/benchmarks/insert_tail_latency.cpp @@ -18,7 +18,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rc::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; std::atomic total_latency = 0; diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp index 785b933..b4428f6 100644 --- a/benchmarks/insertion_tput.cpp +++ b/benchmarks/insertion_tput.cpp @@ -14,7 +14,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rq::Query Q; +typedef de::rq::Query Q; typedef de::DynamicExtension Ext; diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 6de8681..ddb4220 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -19,7 +19,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::irs::Query Q; +typedef de::irs::Query Q; typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; diff --git a/benchmarks/query_workload_bench.cpp b/benchmarks/query_workload_bench.cpp index 114f780..db8f61a 100644 --- a/benchmarks/query_workload_bench.cpp +++ b/benchmarks/query_workload_bench.cpp @@ -18,7 +18,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rc::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; size_t g_insert_size = 50000; diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp index c4c8c1b..57eb923 100644 --- a/benchmarks/reconstruction_interference.cpp +++ b/benchmarks/reconstruction_interference.cpp @@ -16,7 +16,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rc::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; volatile std::atomic queries_done; diff --git a/benchmarks/static_dynamic_comp.cpp b/benchmarks/static_dynamic_comp.cpp index 2d8f041..5a89d88 100644 --- a/benchmarks/static_dynamic_comp.cpp +++ b/benchmarks/static_dynamic_comp.cpp @@ -21,7 +21,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; typedef de::TrieSpline TS; -typedef de::rc::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; typedef de::MutableBuffer Buffer; diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp index e016aa4..caba8ff 100644 --- a/benchmarks/watermark_testing.cpp +++ b/benchmarks/watermark_testing.cpp @@ -14,7 +14,7 @@ typedef de::Record Rec; typedef de::ISAMTree ISAM; -typedef de::rq::Query Q; +typedef de::rq::Query Q; typedef de::DynamicExtension Ext; -- cgit v1.2.3 From 26a43e9b76e0041d192379b1343aeace4587dcc9 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 8 Feb 2024 12:23:25 -0500 Subject: Fixed benchmark memory leak --- benchmarks/query_workload_bench.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'benchmarks') diff --git a/benchmarks/query_workload_bench.cpp b/benchmarks/query_workload_bench.cpp index db8f61a..d79daf2 100644 --- a/benchmarks/query_workload_bench.cpp +++ b/benchmarks/query_workload_bench.cpp @@ -86,6 +86,8 @@ void insert_thread(Ext *extension, size_t n) { auto insert_lat = TIMER_RESULT(); fprintf(stdout, "I\t%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, k); } + + gsl_rng_free(rng); } void parallel_bench(Ext *extension, gsl_rng *rng, size_t n) { -- cgit v1.2.3