From 0a9e79416df03a9e0a3d2cf171cf90028a644d6d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 15 Jan 2024 17:21:11 -0500 Subject: Benchmarking programs --- benchmarks/alex_rq_bench.cpp | 205 ------------------------- benchmarks/alias_wss_bench.cpp | 57 ------- benchmarks/btree_irs_bench.cpp | 91 ----------- benchmarks/btree_rq_bench.cpp | 90 ----------- benchmarks/insert_query_tput.cpp | 7 +- benchmarks/insertion_tput.cpp | 6 +- benchmarks/isam_irs_bench.cpp | 64 -------- benchmarks/isam_rq_bench.cpp | 59 -------- benchmarks/mtree_knn_bench.cpp | 83 ---------- benchmarks/old-bench/alex_rq_bench.cpp | 205 +++++++++++++++++++++++++ benchmarks/old-bench/alias_wss_bench.cpp | 57 +++++++ benchmarks/old-bench/btree_irs_bench.cpp | 91 +++++++++++ benchmarks/old-bench/btree_rq_bench.cpp | 90 +++++++++++ benchmarks/old-bench/isam_irs_bench.cpp | 64 ++++++++ benchmarks/old-bench/isam_rq_bench.cpp | 59 ++++++++ benchmarks/old-bench/mtree_knn_bench.cpp | 83 ++++++++++ benchmarks/old-bench/pgm_pl_bench.cpp | 67 +++++++++ benchmarks/old-bench/pgm_rq_bench.cpp | 67 +++++++++ benchmarks/old-bench/test.cpp | 7 + benchmarks/old-bench/triespline_rq_bench.cpp | 66 ++++++++ benchmarks/old-bench/upgm_pl_bench.cpp | 212 ++++++++++++++++++++++++++ benchmarks/old-bench/upgm_rq_bench.cpp | 217 +++++++++++++++++++++++++++ benchmarks/old-bench/vptree_knn_bench.cpp | 58 +++++++ benchmarks/pgm_pl_bench.cpp | 67 --------- benchmarks/pgm_rq_bench.cpp | 67 --------- benchmarks/reconstruction_interference.cpp | 110 ++++++++++++++ benchmarks/test.cpp | 7 - benchmarks/triespline_rq_bench.cpp | 66 -------- benchmarks/upgm_pl_bench.cpp | 212 -------------------------- benchmarks/upgm_rq_bench.cpp | 217 --------------------------- benchmarks/vptree_knn_bench.cpp | 58 ------- 31 files changed, 1462 insertions(+), 1347 deletions(-) delete mode 100644 benchmarks/alex_rq_bench.cpp delete mode 100644 benchmarks/alias_wss_bench.cpp delete mode 100644 benchmarks/btree_irs_bench.cpp delete mode 100644 benchmarks/btree_rq_bench.cpp delete mode 100644 benchmarks/isam_irs_bench.cpp delete mode 100644 benchmarks/isam_rq_bench.cpp delete mode 100644 benchmarks/mtree_knn_bench.cpp create mode 100644 benchmarks/old-bench/alex_rq_bench.cpp create mode 100644 benchmarks/old-bench/alias_wss_bench.cpp create mode 100644 benchmarks/old-bench/btree_irs_bench.cpp create mode 100644 benchmarks/old-bench/btree_rq_bench.cpp create mode 100644 benchmarks/old-bench/isam_irs_bench.cpp create mode 100644 benchmarks/old-bench/isam_rq_bench.cpp create mode 100644 benchmarks/old-bench/mtree_knn_bench.cpp create mode 100644 benchmarks/old-bench/pgm_pl_bench.cpp create mode 100644 benchmarks/old-bench/pgm_rq_bench.cpp create mode 100644 benchmarks/old-bench/test.cpp create mode 100644 benchmarks/old-bench/triespline_rq_bench.cpp create mode 100644 benchmarks/old-bench/upgm_pl_bench.cpp create mode 100644 benchmarks/old-bench/upgm_rq_bench.cpp create mode 100644 benchmarks/old-bench/vptree_knn_bench.cpp delete mode 100644 benchmarks/pgm_pl_bench.cpp delete mode 100644 benchmarks/pgm_rq_bench.cpp create mode 100644 benchmarks/reconstruction_interference.cpp delete mode 100644 benchmarks/test.cpp delete mode 100644 benchmarks/triespline_rq_bench.cpp delete mode 100644 benchmarks/upgm_pl_bench.cpp delete mode 100644 benchmarks/upgm_rq_bench.cpp delete mode 100644 benchmarks/vptree_knn_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/alex_rq_bench.cpp b/benchmarks/alex_rq_bench.cpp deleted file mode 100644 index f75afa6..0000000 --- a/benchmarks/alex_rq_bench.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#include "alex.h" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef alex::Alex Alex; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::pair *insert_vec = new std::pair[count]; - Alex *alex = new Alex(); - - size_t cnt = 0; - record rec; - while (cnt < count && next_record(file, rec)) { - insert_vec[cnt] = {rec.key, rec.value}; - cnt++; - } - - std::sort(insert_vec, insert_vec + count); - - alex->bulk_load(insert_vec, count); - delete[] insert_vec; - - return alex; -} - - -static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: alex_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.8; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries(qfilename, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); - - fprintf(stderr, "Size: %ld\n", alex->size()); - size_t insert_cnt = record_count - warmup_cnt; - - alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = alex->model_size() + alex->data_size(); - - fprintf(stderr, "Size: %ld\n", alex->size()); - fprintf(stdout, "%ld\t", memory_usage); - - alex_rq_bench(*alex, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - delete alex; - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/alias_wss_bench.cpp b/benchmarks/alias_wss_bench.cpp deleted file mode 100644 index a3a43f2..0000000 --- a/benchmarks/alias_wss_bench.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * benchmarks/alias_wss_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 4) { - fprintf(stderr, "Usage: sampling_tput [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - bool use_osm = (argc == 5) ? atoi(argv[4]) : 0; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - - auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_wss, warmup_cnt, delete_prop, to_delete); - - size_t insert_cnt = record_count - warmup_cnt; - - std::vector> queries(1); - queries[0].rng = g_rng; - queries[0].sample_size = 1000; - - insert_tput_bench(de_wss, datafile, insert_cnt, delete_prop, to_delete); - query_latency_bench>(de_wss, queries, 1000); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/btree_irs_bench.cpp b/benchmarks/btree_irs_bench.cpp deleted file mode 100644 index 862fc6b..0000000 --- a/benchmarks/btree_irs_bench.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "include/bench.h" -#include "ds/BTree.h" - -static void btree_sample_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=10) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector sample_set; - sample_set.reserve(queries[0].sample_size); - - for (int i=0; i(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - - - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: btree_irs_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .001); - - for (auto &q: queries) { - q.rng = g_rng; - q.sample_size = 1000; - } - - auto btree = TreeMap(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); - memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); - fprintf(stdout, "%ld\t", memory_usage); - - btree_sample_bench(btree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/btree_rq_bench.cpp b/benchmarks/btree_rq_bench.cpp deleted file mode 100644 index d92b45d..0000000 --- a/benchmarks/btree_rq_bench.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "include/bench.h" -#include "ds/BTree.h" - -static void btree_rq_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; ikey <= queries[j].upper_bound) { - result_set.emplace_back(*ptr); - ptr++; - } - result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - - - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: btree_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .0001); - - auto btree = TreeMap(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); - memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); - fprintf(stdout, "%ld\t", memory_usage); - - btree_rq_bench(btree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index fe85e68..09179b0 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -27,7 +27,9 @@ void insert_thread(Ext *extension, size_t n, size_t k) { TIMER_START(); for (int64_t j=0; jinsert(r); + while (!extension->insert(r)) { + _mm_pause(); + } } TIMER_STOP(); auto insert_lat = TIMER_RESULT(); @@ -58,13 +60,14 @@ void query_thread(Ext *extension, double selectivity, size_t k) { TIMER_STOP(); auto query_lat = TIMER_RESULT(); fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; } } int main(int argc, char **argv) { /* the closeout routine takes _forever_ ... so we'll just leak the memory */ - auto extension = new Ext(10000, 2, 1, 0, 2); + auto extension = new Ext(1000, 10000, 2); size_t n = 10000000; size_t per_trial = 1000; double selectivity = .001; diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp index 5959173..5498f93 100644 --- a/benchmarks/insertion_tput.cpp +++ b/benchmarks/insertion_tput.cpp @@ -21,7 +21,7 @@ typedef de::DynamicExtension Ext; int main(int argc, char **argv) { - auto extension = new Ext(10000, 2, 1); + auto extension = new Ext(1000, 10000, 2); size_t n = 1000000000; size_t per_trial = 1000; @@ -31,7 +31,9 @@ int main(int argc, char **argv) { TIMER_START(); for (int64_t j=0; jinsert(r); + while (!extension->insert(r)) { + _mm_pause(); + } } TIMER_STOP(); auto insert_lat = TIMER_RESULT(); diff --git a/benchmarks/isam_irs_bench.cpp b/benchmarks/isam_irs_bench.cpp deleted file mode 100644 index 96525f0..0000000 --- a/benchmarks/isam_irs_bench.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: isam_irs_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .001); - - for (auto &q: queries) { - q.rng = g_rng; - q.sample_size = 1000; - } - - auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_irs, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_irs.get_memory_usage()); - query_latency_bench>(de_irs, queries); - fprintf(stdout, "\n"); - - auto ts = de_irs.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::irs_query_parms, de::IRSQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/isam_rq_bench.cpp b/benchmarks/isam_rq_bench.cpp deleted file mode 100644 index bb5626e..0000000 --- a/benchmarks/isam_rq_bench.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: isam_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - auto queries = read_range_queries>(qfilename, .0001); - - auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage()); - query_latency_bench>(de_isam_rq, queries); - fprintf(stdout, "\n"); - - auto ts = de_isam_rq.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::ISAMRangeQueryParms, de::ISAMRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/mtree_knn_bench.cpp deleted file mode 100644 index 9d4cc57..0000000 --- a/benchmarks/mtree_knn_bench.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "include/bench.h" -#include "mtree.h" - -static void mtree_knn_bench(MTree &tree, std::vector> queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i results; - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; jdata); - itr++; - } - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: mtree_knn_bench [k]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - size_t k = (argc == 6) ? atol(argv[5]) : 10; - - init_bench_env(record_count, true); - auto queries = read_knn_queries>(qfilename, k); - - auto mtree = MTree(); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = 0.1 * record_count; - warmup(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); - // fprintf(stdout, "%ld\t", mtree.get_memory_usage()); - - mtree_knn_bench(mtree, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/old-bench/alex_rq_bench.cpp b/benchmarks/old-bench/alex_rq_bench.cpp new file mode 100644 index 0000000..f75afa6 --- /dev/null +++ b/benchmarks/old-bench/alex_rq_bench.cpp @@ -0,0 +1,205 @@ +#include "alex.h" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::pair *insert_vec = new std::pair[count]; + Alex *alex = new Alex(); + + size_t cnt = 0; + record rec; + while (cnt < count && next_record(file, rec)) { + insert_vec[cnt] = {rec.key, rec.value}; + cnt++; + } + + std::sort(insert_vec, insert_vec + count); + + alex->bulk_load(insert_vec, count); + delete[] insert_vec; + + return alex; +} + + +static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: alex_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.8; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries(qfilename, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + + fprintf(stderr, "Size: %ld\n", alex->size()); + size_t insert_cnt = record_count - warmup_cnt; + + alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = alex->model_size() + alex->data_size(); + + fprintf(stderr, "Size: %ld\n", alex->size()); + fprintf(stdout, "%ld\t", memory_usage); + + alex_rq_bench(*alex, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + delete alex; + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/alias_wss_bench.cpp b/benchmarks/old-bench/alias_wss_bench.cpp new file mode 100644 index 0000000..a3a43f2 --- /dev/null +++ b/benchmarks/old-bench/alias_wss_bench.cpp @@ -0,0 +1,57 @@ +/* + * benchmarks/alias_wss_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 4) { + fprintf(stderr, "Usage: sampling_tput [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + bool use_osm = (argc == 5) ? atoi(argv[4]) : 0; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + + auto de_wss = ExtendedWSS(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_wss, warmup_cnt, delete_prop, to_delete); + + size_t insert_cnt = record_count - warmup_cnt; + + std::vector> queries(1); + queries[0].rng = g_rng; + queries[0].sample_size = 1000; + + insert_tput_bench(de_wss, datafile, insert_cnt, delete_prop, to_delete); + query_latency_bench>(de_wss, queries, 1000); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/btree_irs_bench.cpp b/benchmarks/old-bench/btree_irs_bench.cpp new file mode 100644 index 0000000..862fc6b --- /dev/null +++ b/benchmarks/old-bench/btree_irs_bench.cpp @@ -0,0 +1,91 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_sample_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=10) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector sample_set; + sample_set.reserve(queries[0].sample_size); + + for (int i=0; i(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); + fprintf(stdout, "%ld\t", memory_usage); + + btree_sample_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/btree_rq_bench.cpp b/benchmarks/old-bench/btree_rq_bench.cpp new file mode 100644 index 0000000..d92b45d --- /dev/null +++ b/benchmarks/old-bench/btree_rq_bench.cpp @@ -0,0 +1,90 @@ +#include "include/bench.h" +#include "ds/BTree.h" + +static void btree_rq_bench(TreeMap &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; ikey <= queries[j].upper_bound) { + result_set.emplace_back(*ptr); + ptr++; + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + + + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: btree_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto btree = TreeMap(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, btree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(btree, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + memory_usage += btree.get_stats().leaves * tlx::btree_default_traits::leaf_slots * sizeof(btree_record); + fprintf(stdout, "%ld\t", memory_usage); + + btree_rq_bench(btree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/isam_irs_bench.cpp b/benchmarks/old-bench/isam_irs_bench.cpp new file mode 100644 index 0000000..96525f0 --- /dev/null +++ b/benchmarks/old-bench/isam_irs_bench.cpp @@ -0,0 +1,64 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_irs_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .001); + + for (auto &q: queries) { + q.rng = g_rng; + q.sample_size = 1000; + } + + auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_irs, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_irs.get_memory_usage()); + query_latency_bench>(de_irs, queries); + fprintf(stdout, "\n"); + + auto ts = de_irs.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::irs_query_parms, de::IRSQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/isam_rq_bench.cpp b/benchmarks/old-bench/isam_rq_bench.cpp new file mode 100644 index 0000000..bb5626e --- /dev/null +++ b/benchmarks/old-bench/isam_rq_bench.cpp @@ -0,0 +1,59 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: isam_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries>(qfilename, .0001); + + auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage()); + query_latency_bench>(de_isam_rq, queries); + fprintf(stdout, "\n"); + + auto ts = de_isam_rq.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::ISAMRangeQueryParms, de::ISAMRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/mtree_knn_bench.cpp b/benchmarks/old-bench/mtree_knn_bench.cpp new file mode 100644 index 0000000..9d4cc57 --- /dev/null +++ b/benchmarks/old-bench/mtree_knn_bench.cpp @@ -0,0 +1,83 @@ +#include "include/bench.h" +#include "mtree.h" + +static void mtree_knn_bench(MTree &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i results; + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; jdata); + itr++; + } + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: mtree_knn_bench [k]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + size_t k = (argc == 6) ? atol(argv[5]) : 10; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, k); + + auto mtree = MTree(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = 0.1 * record_count; + warmup(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); + // fprintf(stdout, "%ld\t", mtree.get_memory_usage()); + + mtree_knn_bench(mtree, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/pgm_pl_bench.cpp b/benchmarks/old-bench/pgm_pl_bench.cpp new file mode 100644 index 0000000..f798861 --- /dev/null +++ b/benchmarks/old-bench/pgm_pl_bench.cpp @@ -0,0 +1,67 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: pgm_pl_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 1000; + size_t scale_factor = 6; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.1; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedPGM_PL(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_lookup_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::PGMPointLookupParms, de::PGMPointLookup>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/pgm_rq_bench.cpp b/benchmarks/old-bench/pgm_rq_bench.cpp new file mode 100644 index 0000000..e25d29f --- /dev/null +++ b/benchmarks/old-bench/pgm_rq_bench.cpp @@ -0,0 +1,67 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: pgm_rq_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 8; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.5; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedPGMRQ(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_range_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::pgm_range_query_parms, de::PGMRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/test.cpp b/benchmarks/old-bench/test.cpp new file mode 100644 index 0000000..75bffe3 --- /dev/null +++ b/benchmarks/old-bench/test.cpp @@ -0,0 +1,7 @@ +#include "alex.h" + + +int main(int argc, char **argv) { + alex::Alex test; + +} diff --git a/benchmarks/old-bench/triespline_rq_bench.cpp b/benchmarks/old-bench/triespline_rq_bench.cpp new file mode 100644 index 0000000..967c3b0 --- /dev/null +++ b/benchmarks/old-bench/triespline_rq_bench.cpp @@ -0,0 +1,66 @@ +/* + * benchmarks/triespline_rq_bench.cpp + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: triespline_rq_bench [osm_data]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + size_t buffer_cap = 12000; + size_t scale_factor = 8; + double delete_prop = atof(argv[3]); + double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; + std::string query_file = std::string(argv[4]); + bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; + + double insert_batch = 0.5; + + init_bench_env(record_count, true, use_osm); + + auto de = ExtendedTSRQ(buffer_cap, scale_factor, max_delete_prop); + auto queries = read_range_queries>(query_file, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de.get_memory_usage()); + query_latency_bench>(de, queries, 1); + fprintf(stdout, "\n"); + + auto ts = de.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Rec, de::ts_range_query_parms, de::TrieSplineRangeQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/upgm_pl_bench.cpp b/benchmarks/old-bench/upgm_pl_bench.cpp new file mode 100644 index 0000000..e0445b2 --- /dev/null +++ b/benchmarks/old-bench/upgm_pl_bench.cpp @@ -0,0 +1,212 @@ +#include "pgm/pgm_index_dynamic.hpp" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef pgm::DynamicPGMIndex> PGM; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void pgm_pl_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; ifirst == queries[j].lower_bound) { + result_set.push_back({ptr->first, ptr->second}); + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: upgm_pl_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + double insert_batch = 0.1; + + init_bench_env(record_count, true); + auto queries = read_range_queries(qfilename, .0001); + + std::vector> data; + PGM pgm(data.begin(), data.end()); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = pgm.size_in_bytes(); + fprintf(stdout, "%ld\t", memory_usage); + + pgm_pl_bench(pgm, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/upgm_rq_bench.cpp b/benchmarks/old-bench/upgm_rq_bench.cpp new file mode 100644 index 0000000..940a9e6 --- /dev/null +++ b/benchmarks/old-bench/upgm_rq_bench.cpp @@ -0,0 +1,217 @@ +#include "pgm/pgm_index_dynamic.hpp" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef pgm::DynamicPGMIndex> PGM; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void pgm_rq_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + //std::vector result_set; + size_t tot = 0; + + for (int i=0; ifirst <= queries[j].upper_bound) { + ++tot; + //result_set.push_back({ptr->first, ptr->second}); + ++ptr; + } + assert(tot > 0); + //result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: upgm_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + double insert_batch = 0.5; + + init_bench_env(record_count, true); + auto queries = read_range_queries(qfilename, .0001); + + std::vector> data; + PGM pgm(data.begin(), data.end()); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = pgm.size_in_bytes(); + fprintf(stdout, "%ld\t", memory_usage); + + pgm_rq_bench(pgm, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/old-bench/vptree_knn_bench.cpp b/benchmarks/old-bench/vptree_knn_bench.cpp new file mode 100644 index 0000000..d8247e4 --- /dev/null +++ b/benchmarks/old-bench/vptree_knn_bench.cpp @@ -0,0 +1,58 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: vptree_knn_bench [k]\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + size_t k = (argc == 6) ? atol(argv[5]) : 10; + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, k); + + auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = 0.1 * record_count; + warmup(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); + + query_latency_bench>(de_vp_knn, queries); + fprintf(stdout, "\n"); + + auto ts = de_vp_knn.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Word2VecRec, de::KNNQueryParms, de::KNNQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/pgm_pl_bench.cpp b/benchmarks/pgm_pl_bench.cpp deleted file mode 100644 index f798861..0000000 --- a/benchmarks/pgm_pl_bench.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: pgm_pl_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 1000; - size_t scale_factor = 6; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.1; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedPGM_PL(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_lookup_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::PGMPointLookupParms, de::PGMPointLookup>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/pgm_rq_bench.cpp b/benchmarks/pgm_rq_bench.cpp deleted file mode 100644 index e25d29f..0000000 --- a/benchmarks/pgm_rq_bench.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: pgm_rq_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 8; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.5; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedPGMRQ(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_range_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::pgm_range_query_parms, de::PGMRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp new file mode 100644 index 0000000..a843c71 --- /dev/null +++ b/benchmarks/reconstruction_interference.cpp @@ -0,0 +1,110 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; + +void query_thread(Ext *extension, double selectivity, size_t k) { + TIMER_INIT(); + + size_t reccnt = extension->get_record_count(); + size_t range = reccnt * selectivity; + + auto q = new de::rq::Parms(); + + TIMER_START(); + for (int64_t i=0; ilower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; +} + +Ext *build_structure(size_t n) { + auto extension = new Ext(1000, 10000, 2); + + size_t i=0; + Rec r; + do { + r.key = rand() % n; + r.value = i; + if (extension->insert(r)) { + i++; + } else { + _mm_pause(); + } + } while (i < n); + + extension->await_next_epoch(); + return extension; +} + +void query_benchmark(double selectivity, size_t k, Ext *extension) { + TIMER_INIT(); + + size_t query_thrd_cnt = 4; + std::vector thrds(query_thrd_cnt); + + TIMER_START(); + for (size_t i=0; iget_record_count(), query_lat, k, query_thrd_cnt); +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + size_t n = 10000000; + + size_t per_trial = 1000; + double selectivity = .001; + + /* build initial structure */ + auto extension = build_structure(n); + + /* benchmark queries w/o any interference from reconstructions */ + query_benchmark(selectivity, per_trial, extension); + + fprintf(stderr, "Running interference test...\n"); + + /* trigger a worst-case reconstruction and benchmark the queries */ + std::thread q_thrd(query_benchmark, selectivity, per_trial, extension); + auto s = extension->create_static_structure(); + fprintf(stderr, "Construction complete\n"); + q_thrd.join(); + + delete extension; + delete s; + + fflush(stderr); +} + diff --git a/benchmarks/test.cpp b/benchmarks/test.cpp deleted file mode 100644 index 75bffe3..0000000 --- a/benchmarks/test.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "alex.h" - - -int main(int argc, char **argv) { - alex::Alex test; - -} diff --git a/benchmarks/triespline_rq_bench.cpp b/benchmarks/triespline_rq_bench.cpp deleted file mode 100644 index 967c3b0..0000000 --- a/benchmarks/triespline_rq_bench.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * benchmarks/triespline_rq_bench.cpp - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: triespline_rq_bench [osm_data]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - size_t buffer_cap = 12000; - size_t scale_factor = 8; - double delete_prop = atof(argv[3]); - double max_delete_prop = (delete_prop > 0) ? delete_prop : 1; - std::string query_file = std::string(argv[4]); - bool use_osm = (argc == 6) ? atoi(argv[5]) : 0; - - double insert_batch = 0.5; - - init_bench_env(record_count, true, use_osm); - - auto de = ExtendedTSRQ(buffer_cap, scale_factor, max_delete_prop); - auto queries = read_range_queries>(query_file, .0001); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, de, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de.get_memory_usage()); - query_latency_bench>(de, queries, 1); - fprintf(stdout, "\n"); - - auto ts = de.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Rec, de::ts_range_query_parms, de::TrieSplineRangeQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/upgm_pl_bench.cpp b/benchmarks/upgm_pl_bench.cpp deleted file mode 100644 index e0445b2..0000000 --- a/benchmarks/upgm_pl_bench.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "pgm/pgm_index_dynamic.hpp" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef pgm::DynamicPGMIndex> PGM; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void pgm_pl_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; ifirst == queries[j].lower_bound) { - result_set.push_back({ptr->first, ptr->second}); - } - result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: upgm_pl_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - double insert_batch = 0.1; - - init_bench_env(record_count, true); - auto queries = read_range_queries(qfilename, .0001); - - std::vector> data; - PGM pgm(data.begin(), data.end()); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = pgm.size_in_bytes(); - fprintf(stdout, "%ld\t", memory_usage); - - pgm_pl_bench(pgm, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/upgm_rq_bench.cpp b/benchmarks/upgm_rq_bench.cpp deleted file mode 100644 index 940a9e6..0000000 --- a/benchmarks/upgm_rq_bench.cpp +++ /dev/null @@ -1,217 +0,0 @@ -#include "pgm/pgm_index_dynamic.hpp" -#include "include/standalone_utility.h" - -typedef uint64_t key_type; -typedef uint64_t value_type; - -typedef pgm::DynamicPGMIndex> PGM; - -struct record { - key_type key; - value_type value; -}; - -struct query { - key_type lower_bound; - key_type upper_bound; -}; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); -} - - - -static void pgm_rq_bench(PGM &pgm, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - //std::vector result_set; - size_t tot = 0; - - for (int i=0; ifirst <= queries[j].upper_bound) { - ++tot; - //result_set.push_back({ptr->first, ptr->second}); - ++ptr; - } - assert(tot > 0); - //result_set.clear(); - } - auto stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(stop - start).count(); - } - - size_t latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", latency); -} - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: upgm_rq_bench \n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - - double insert_batch = 0.5; - - init_bench_env(record_count, true); - auto queries = read_range_queries(qfilename, .0001); - - std::vector> data; - PGM pgm(data.begin(), data.end()); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - warmup(datafile, pgm, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - pgm_rq_insert(pgm, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = pgm.size_in_bytes(); - fprintf(stdout, "%ld\t", memory_usage); - - pgm_rq_bench(pgm, queries); - fprintf(stdout, "\n"); - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp deleted file mode 100644 index d8247e4..0000000 --- a/benchmarks/vptree_knn_bench.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include "include/bench.h" - -int main(int argc, char **argv) -{ - if (argc < 5) { - fprintf(stderr, "Usage: vptree_knn_bench [k]\n"); - exit(EXIT_FAILURE); - } - - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); - size_t k = (argc == 6) ? atol(argv[5]) : 10; - - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - - init_bench_env(record_count, true); - auto queries = read_knn_queries>(qfilename, k); - - auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); - - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - - std::vector to_delete; - - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = 0.1 * record_count; - warmup(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); - - size_t insert_cnt = record_count - warmup_cnt; - - insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); - fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); - - query_latency_bench>(de_vp_knn, queries); - fprintf(stdout, "\n"); - - auto ts = de_vp_knn.create_static_structure(); - - fprintf(stdout, "%ld\t", ts->get_memory_usage()); - static_latency_bench, Word2VecRec, de::KNNQueryParms, de::KNNQuery>( - ts, queries, 1 - ); - fprintf(stdout, "\n"); - - delete ts; - - delete_bench_env(); - fflush(stdout); - fflush(stderr); - - exit(EXIT_SUCCESS); -} -- cgit v1.2.3