From 4a1dde3148e0e84b47c884bc0bb69c60678b4558 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:09:07 -0400 Subject: Benchmark update+reorganization The Alex benchmark isn't updated yet. --- benchmarks/vldb/alex_bench.cpp | 205 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 benchmarks/vldb/alex_bench.cpp (limited to 'benchmarks/vldb/alex_bench.cpp') diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp new file mode 100644 index 0000000..f75afa6 --- /dev/null +++ b/benchmarks/vldb/alex_bench.cpp @@ -0,0 +1,205 @@ +#include "alex.h" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::pair *insert_vec = new std::pair[count]; + Alex *alex = new Alex(); + + size_t cnt = 0; + record rec; + while (cnt < count && next_record(file, rec)) { + insert_vec[cnt] = {rec.key, rec.value}; + cnt++; + } + + std::sort(insert_vec, insert_vec + count); + + alex->bulk_load(insert_vec, count); + delete[] insert_vec; + + return alex; +} + + +static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: alex_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.8; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries(qfilename, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + + fprintf(stderr, "Size: %ld\n", alex->size()); + size_t insert_cnt = record_count - warmup_cnt; + + alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = alex->model_size() + alex->data_size(); + + fprintf(stderr, "Size: %ld\n", alex->size()); + fprintf(stdout, "%ld\t", memory_usage); + + alex_rq_bench(*alex, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + delete alex; + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} -- cgit v1.2.3 From c61164545f4c113fb17eb993e393bbf97373cfb3 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 29 Apr 2024 14:43:10 -0400 Subject: Alex benchmark --- benchmarks/vldb/alex_bench.cpp | 231 +++++++++++++++-------------------------- 1 file changed, 85 insertions(+), 146 deletions(-) (limited to 'benchmarks/vldb/alex_bench.cpp') diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp index f75afa6..76df410 100644 --- a/benchmarks/vldb/alex_bench.cpp +++ b/benchmarks/vldb/alex_bench.cpp @@ -1,5 +1,10 @@ +#define ENABLE_TIMER + #include "alex.h" -#include "include/standalone_utility.h" + +#include "file_util.h" +#include "psu-util/progress.h" +#include "psu-util/timer.h" typedef uint64_t key_type; typedef uint64_t value_type; @@ -16,190 +21,124 @@ struct query { key_type upper_bound; }; -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::pair *insert_vec = new std::pair[count]; - Alex *alex = new Alex(); +static void insert_records(Alex *structure, size_t start, size_t stop, + std::vector &records, std::vector &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { - size_t cnt = 0; - record rec; - while (cnt < count && next_record(file, rec)) { - insert_vec[cnt] = {rec.key, rec.value}; - cnt++; - } + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i].key, records[i].value); - std::sort(insert_vec, insert_vec + count); + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { - alex->bulk_load(insert_vec, count); - delete[] insert_vec; + structure->erase_one(records[i].key); + delete_idx++; + g_deleted_records++; + } + } - return alex; + psudb::progress_update(1, "Insert Progress"); } +size_t g_global_cnt = 0; -static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i &queries) { + for (size_t i=0; ifind(queries[i].lower_bound); + while (ptr != alex->end() && ptr.key() <= queries[i].upper_bound) { + cnt++; + ptr++; } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - fprintf(stdout, "%ld\t", throughput); + g_global_cnt += cnt; + } } +Alex *warmup_alex(std::vector records, size_t cnt) { + if (cnt >= records.size()) { + fprintf(stderr, "[E] Requesting warmup with more records than are available.\n"); + exit(EXIT_FAILURE); + } + auto alex = new Alex(); + std::pair *insert_vec = new std::pair[cnt]; -static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i(stop - start).count(); + for (size_t i=0; ibulk_load(insert_vec, cnt); + delete[] insert_vec; - fprintf(stdout, "%ld\t", latency); + return alex; } int main(int argc, char **argv) { - if (argc < 5) { - fprintf(stderr, "Usage: alex_rq_bench \n"); + if (argc < 4) { + usage(argv[0]); exit(EXIT_FAILURE); } - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - double insert_batch = 0.8; + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(qfilename, .0001); + auto queries = read_range_queries(q_fname, .001); - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - std::vector to_delete; + size_t warmup = .1 * n; + size_t delete_idx = 0; - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + auto alex = warmup_alex(data, warmup); - fprintf(stderr, "Size: %ld\n", alex->size()); - size_t insert_cnt = record_count - warmup_cnt; + TIMER_INIT(); - alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = alex->model_size() + alex->data_size(); + TIMER_START(); + insert_records(alex, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); - fprintf(stderr, "Size: %ld\n", alex->size()); - fprintf(stdout, "%ld\t", memory_usage); + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - alex_rq_bench(*alex, queries); - fprintf(stdout, "\n"); + TIMER_START(); + run_queries(alex, queries); + TIMER_STOP(); - delete_bench_env(); - delete alex; + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = alex->model_size() + alex->data_size() - (alex->size() * sizeof(record)); + + fprintf(stdout, "%ld\t%ld\t%lld\t%ld\n", insert_throughput, query_latency, ext_size, g_global_cnt); fflush(stdout); + + gsl_rng_free(rng); fflush(stderr); + delete alex; + exit(EXIT_SUCCESS); } -- cgit v1.2.3 From ef2ec17c21cb331c37f25501394b009282604fcf Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:06:20 -0400 Subject: Adjusted selectivity for range benches down to .0001 --- benchmarks/vldb/alex_bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks/vldb/alex_bench.cpp') diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp index 76df410..ba687f3 100644 --- a/benchmarks/vldb/alex_bench.cpp +++ b/benchmarks/vldb/alex_bench.cpp @@ -107,7 +107,7 @@ int main(int argc, char **argv) } } - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); size_t warmup = .1 * n; -- cgit v1.2.3