From 3dd16320d38f6312e7f5d7164b6bb7a4e14790fa Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 9 Feb 2024 15:35:32 -0500 Subject: Benchmark updates --- benchmarks/irs_bench.cpp | 4 +- benchmarks/pgm_bench.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++++++ benchmarks/ts_bench.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+), 2 deletions(-) create mode 100644 benchmarks/pgm_bench.cpp create mode 100644 benchmarks/ts_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index ddb4220..49b1630 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -17,7 +17,7 @@ #include "psu-util/timer.h" -typedef de::Record Rec; +typedef de::Record Rec; typedef de::ISAMTree ISAM; typedef de::irs::Query Q; typedef de::DynamicExtension Ext; @@ -73,7 +73,7 @@ void insert_records(Ext *extension, size_t start, int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "insert_query_tput reccnt datafile queryfile\n"); + fprintf(stderr, "irs_bench reccnt datafile queryfile\n"); exit(EXIT_FAILURE); } diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp new file mode 100644 index 0000000..72d3b52 --- /dev/null +++ b/benchmarks/pgm_bench.cpp @@ -0,0 +1,123 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/PGM.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "include/data-proc.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::PGM S; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::rc::Parms QP; + +void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; iquery(q); + auto r = res.get(); + total += r.size(); + } + + fprintf(stderr, "%ld\n", total); +} + +size_t g_deleted_records = 0; +double delete_proportion = 0.05; + +void insert_records(Ext *extension, size_t start, + size_t stop, + std::vector &records, + std::vector &to_delete, + size_t &delete_idx, + bool delete_records, + gsl_rng *rng) { + size_t reccnt = 0; + Rec r; + for (size_t i=start; iinsert(r)) { + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + r.key = records[to_delete[delete_idx]]; + r.value = (int64_t) (to_delete[delete_idx]); + while (!extension->erase(r)) { + usleep(1); + } + delete_idx++; + g_deleted_records++; + } + } +} + +int main(int argc, char **argv) { + + if (argc < 4) { + fprintf(stderr, "pgm_bench reccnt datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp new file mode 100644 index 0000000..3df3371 --- /dev/null +++ b/benchmarks/ts_bench.cpp @@ -0,0 +1,123 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "include/data-proc.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::TrieSpline TS; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::rc::Parms QP; + +void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; iquery(q); + auto r = res.get(); + total += r.size(); + } + + fprintf(stderr, "%ld\n", total); +} + +size_t g_deleted_records = 0; +double delete_proportion = 0.05; + +void insert_records(Ext *extension, size_t start, + size_t stop, + std::vector &records, + std::vector &to_delete, + size_t &delete_idx, + bool delete_records, + gsl_rng *rng) { + size_t reccnt = 0; + Rec r; + for (size_t i=start; iinsert(r)) { + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + r.key = records[to_delete[delete_idx]]; + r.value = (int64_t) (to_delete[delete_idx]); + while (!extension->erase(r)) { + usleep(1); + } + delete_idx++; + g_deleted_records++; + } + } +} + +int main(int argc, char **argv) { + + if (argc < 4) { + fprintf(stderr, "ts_bench reccnt datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3 From 60c30fcfd2c4a085aa87127bfe9cae51b352d13e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 20 Feb 2024 16:10:32 -0500 Subject: Updated watermark benchmark to shuffle data --- benchmarks/watermark_testing.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp index caba8ff..5fa0c0d 100644 --- a/benchmarks/watermark_testing.cpp +++ b/benchmarks/watermark_testing.cpp @@ -5,15 +5,17 @@ #define ENABLE_TIMER #include "framework/DynamicExtension.h" -#include "shard/ISAMTree.h" +#include "shard/TrieSpline.h" #include "query/rangequery.h" #include "framework/interface/Record.h" #include "psu-util/timer.h" +#include -typedef de::Record Rec; -typedef de::ISAMTree ISAM; +typedef uint64_t K; +typedef de::Record Rec; +typedef de::TrieSpline ISAM; typedef de::rq::Query Q; typedef de::DynamicExtension Ext; @@ -23,7 +25,14 @@ int main(int argc, char **argv) { std::vector hwms = {5000l, 10000l, 20000l, 50000l}; std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; - size_t n = 1000000; + size_t n = 1000000000; + + std::vector keys(n); + for (K i=0; iinsert(r)) { _mm_pause(); } -- cgit v1.2.3 From 69f36c9b4f5df19f09156689b333afe29a017eed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 23 Feb 2024 15:40:49 -0500 Subject: Benchmark updates --- benchmarks/CMak | 0 benchmarks/include/benchmark_types.h | 50 ++++++ benchmarks/include/btree-util.h | 27 --- benchmarks/include/data-proc.h | 258 --------------------------- benchmarks/include/file_util.h | 130 ++++++++++++++ benchmarks/include/standard_benchmarks.h | 212 ++++++++++++++++++++++ benchmarks/irs_bench.cpp | 73 ++------ benchmarks/old-bench/include/bench_utility.h | 2 - benchmarks/pgm_bench.cpp | 64 ++----- benchmarks/ts_bench.cpp | 64 ++----- benchmarks/vptree_bench.cpp | 89 +++++++++ benchmarks/watermark_testing_knn.cpp | 61 +++++++ 12 files changed, 583 insertions(+), 447 deletions(-) create mode 100644 benchmarks/CMak create mode 100644 benchmarks/include/benchmark_types.h delete mode 100644 benchmarks/include/btree-util.h delete mode 100644 benchmarks/include/data-proc.h create mode 100644 benchmarks/include/file_util.h create mode 100644 benchmarks/include/standard_benchmarks.h create mode 100644 benchmarks/vptree_bench.cpp create mode 100644 benchmarks/watermark_testing_knn.cpp (limited to 'benchmarks') diff --git a/benchmarks/CMak b/benchmarks/CMak new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h new file mode 100644 index 0000000..85e9565 --- /dev/null +++ b/benchmarks/include/benchmark_types.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include "psu-ds/BTree.h" +#include "mtree.h" +#include "framework/interface/Record.h" + +/* TLX BTree definitions*/ +template +struct btree_record { + K key; + V value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +template +struct btree_key_extract { + static const K &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree, btree_key_extract> BenchBTree; + + +/*MTree Definitions*/ + +const size_t W2V_SIZE = 300; +typedef de::EuclidPoint Word2VecRec; + +struct euclidean_distance { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double dist = 0; + for (size_t i=0; i MTree; + diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h deleted file mode 100644 index 571c073..0000000 --- a/benchmarks/include/btree-util.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include "psu-ds/BTree.h" - -struct btree_record { - int64_t key; - int64_t value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const int64_t &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - - diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h deleted file mode 100644 index 444cb94..0000000 --- a/benchmarks/include/data-proc.h +++ /dev/null @@ -1,258 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "psu-ds/BTree.h" - -#pragma once - -typedef int64_t key_type; -typedef int64_t value_type; -typedef uint64_t weight_type; - -static gsl_rng *g_rng; -static bool g_osm_data; - -struct btree_record { - key_type key; - value_type value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const key_type &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - -static key_type g_min_key = UINT64_MAX; -static key_type g_max_key = 0; - -static size_t g_max_record_cnt = 0; -static size_t g_reccnt = 0; - -static constexpr unsigned int DEFAULT_SEED = 0; - -static unsigned int get_random_seed() -{ - unsigned int seed = 0; - std::fstream urandom; - urandom.open("/dev/urandom", std::ios::in|std::ios::binary); - urandom.read((char *) &seed, sizeof(seed)); - urandom.close(); - - return seed; -} - -static key_type osm_to_key(const char *key_field) { - double tmp_key = (atof(key_field) + 180) * 10e6; - return (key_type) tmp_key; -} - -static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) -{ - g_rng = gsl_rng_alloc(type); - gsl_rng_set(g_rng, seed); -} - -static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) -{ - unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; - init_bench_rng(seed, gsl_rng_mt19937); - g_osm_data = osm_correction; - g_max_record_cnt = max_reccnt; - g_reccnt = 0; -} - -static void delete_bench_env() -{ - gsl_rng_free(g_rng); -} - - -template -static std::vector read_lookup_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.target_key = start; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_range_queries(std::string &fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.lower_bound = start; - q.upper_bound = stop; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_knn_queries(std::string fname, size_t k) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - char *line = NULL; - size_t len = 0; - - while (getline(&line, &len, qf) > 0) { - char *token; - QP query; - size_t idx = 0; - - token = strtok(line, " "); - do { - query.point.data[idx++] = atof(token); - } while ((token = strtok(NULL, " "))); - - query.k = k; - queries.emplace_back(query); - } - - free(line); - fclose(qf); - - return queries; -} - -/* - * NOTE: The QP type must have lower_bound and upper_bound attributes, which - * this function will initialize. Any other query parameter attributes must - * be manually initialized after the call. - */ -template -static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - for (size_t i=0; i<300; i++) { - std::string dimension; - - std::getline(line_stream, dimension, ' '); - record.data[i] = atof(dimension.c_str()); - } - - g_reccnt++; - - return true; - } - - return false; - -} - -template -static bool next_record(std::fstream &file, R &record, bool binary=false) -{ - static value_type value = 1; - if (g_reccnt >= g_max_record_cnt) return false; - - if (binary) { - if (file.good()) { - decltype(R::key) key; - - file.read((char*) &key, sizeof(key)); - record.key = key; - record.value = value; - value++; - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - return true; - } - - return false; - } - - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - std::string key_field; - std::string value_field; - std::string weight_field; - - std::getline(line_stream, value_field, '\t'); - std::getline(line_stream, key_field, '\t'); - std::getline(line_stream, weight_field, '\t'); - - record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record.value = atol(value_field.c_str()); - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - g_reccnt++; - - return true; - } - - return false; -} - -template -static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { - vec.clear(); - - size_t cnt = 0; - while (cnt < n) { - if (to_delete.size() == 0) { - return false; - } - - auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); - vec.emplace_back(to_delete[i]); - to_delete.erase(to_delete.begin() + i); - } -td: - return true; -} - -static std::vector read_sosd_file(std::string &fname, size_t n) { - std::fstream file; - file.open(fname, std::ios::in | std::ios::binary); - - std::vector records(n); - for (size_t i=0; i +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/interface/Record.h" +#include "query/irs.h" + +#pragma once + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +template +static std::vector read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector records(n); + for (size_t i=0; i +static std::vector read_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + std::vector records; + records.reserve(n); + + for (size_t i=0; i + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include +#include +#include + +#include "framework/DynamicExtension.h" +#include "framework/interface/Query.h" +#include "psu-util/progress.h" +#include "benchmark_types.h" + +static size_t g_deleted_records = 0; +static double delete_proportion = 0.05; + +template +static void run_queries(DE *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; iquery(q); + auto r = res.get(); + total += r.size(); + } +} + + +template +static void insert_records(DE *extension, size_t start, size_t stop, + std::vector &records, std::vector &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { + + while (!extension->erase(records[to_delete[delete_idx]])) { + usleep(1); + } + + delete_idx++; + g_deleted_records++; + } + } + + psudb::progress_update(1, "Insert Progress"); +} + +template +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, gsl_rng *rng, std::vector &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + psudb::progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + + return continue_benchmark; +} + +template +static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template Q, bool PROGRESS=true> +static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 49b1630..9895295 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -4,76 +4,32 @@ #define ENABLE_TIMER -#include - #include "framework/DynamicExtension.h" #include "shard/ISAMTree.h" #include "query/irs.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" #include #include "psu-util/timer.h" +#include "include/standard_benchmarks.h" typedef de::Record Rec; -typedef de::ISAMTree ISAM; -typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; +typedef de::ISAMTree Shard; +typedef de::irs::Query Q; +typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; irng = rng; - q->sample_size = 1000; - - auto res = extension->query(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "irs_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -84,7 +40,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(q_fname, .001); + for (auto q : queries) { + q.sample_size = 1000; + q.rng = rng; + } /* warmup structure w/ 10% of records */ size_t warmup = .3 * n; size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h index e33b93d..f495f18 100644 --- a/benchmarks/old-bench/include/bench_utility.h +++ b/benchmarks/old-bench/include/bench_utility.h @@ -79,8 +79,6 @@ struct cosine_similarity { } }; -typedef tlx::BTree TreeMap; -typedef mt::mtree MTree; template static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp index 72d3b52..3643abb 100644 --- a/benchmarks/pgm_bench.cpp +++ b/benchmarks/pgm_bench.cpp @@ -10,7 +10,8 @@ #include "shard/PGM.h" #include "query/rangecount.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" #include @@ -18,60 +19,19 @@ typedef de::Record Rec; -typedef de::PGM S; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::PGM Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; iquery(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "pgm_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -82,7 +42,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp index 3df3371..3dc619e 100644 --- a/benchmarks/ts_bench.cpp +++ b/benchmarks/ts_bench.cpp @@ -10,7 +10,8 @@ #include "shard/TrieSpline.h" #include "query/rangecount.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" #include @@ -18,60 +19,19 @@ typedef de::Record Rec; -typedef de::TrieSpline TS; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::TrieSpline PGM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; iquery(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "ts_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -82,7 +42,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vptree_bench.cpp new file mode 100644 index 0000000..f4c7d0e --- /dev/null +++ b/benchmarks/vptree_bench.cpp @@ -0,0 +1,89 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(100, 1000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/watermark_testing_knn.cpp b/benchmarks/watermark_testing_knn.cpp new file mode 100644 index 0000000..7cea594 --- /dev/null +++ b/benchmarks/watermark_testing_knn.cpp @@ -0,0 +1,61 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + +constexpr size_t D = 100; + +typedef de::EuclidPoint Rec; +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; + +int main(int argc, char **argv) { + std::vector hwms = {1000l, 2000l, 4000l, 10000l}; + std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; + + size_t n = 1000000; + + std::vector records(n); + for (size_t i=0; iinsert(records[i])) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_time = TIMER_RESULT(); + double insert_throughput = (double) n / (double) insert_time * 1e9; + + fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + extension->print_scheduler_statistics(); + + fflush(stdout); + delete extension; + } + } +} + -- cgit v1.2.3 From 481df63c0152e1b643ec0bd16500c4aca0716404 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 14 Mar 2024 14:47:23 -0400 Subject: Benchmarking tweaks --- benchmarks/insert_query_tput.cpp | 59 ++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 26 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp index ce05264..29043af 100644 --- a/benchmarks/insert_query_tput.cpp +++ b/benchmarks/insert_query_tput.cpp @@ -10,7 +10,8 @@ #include "shard/ISAMTree.h" #include "query/irs.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" +#include #include @@ -25,6 +26,8 @@ typedef de::irs::Parms QP; std::atomic inserts_done = false; +struct timespec delay = {0, 500}; + void query_thread(Ext *extension, std::vector *queries) { gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); size_t total = 0; @@ -39,7 +42,7 @@ void query_thread(Ext *extension, std::vector *queries) { auto res = extension->query(&q); auto r = res.get(); total += r.size(); - usleep(1); + nanosleep(&delay, nullptr); } fprintf(stderr, "%ld\n", total); @@ -47,47 +50,39 @@ void query_thread(Ext *extension, std::vector *queries) { gsl_rng_free(rng); } -void insert_thread(Ext *extension, size_t start, std::vector *records) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; isize(); i++) { - r.key = (*records)[i]; - r.value = i; - - while (!extension->insert(r)) { - usleep(1); +void insert_thread(Ext *extension, size_t start, size_t stop, std::vector *records) { + fprintf(stderr, "%ld\t%ld\n", start, stop); + for (size_t i=start; iinsert((*records)[i])) { + nanosleep(&delay, nullptr); } } - - inserts_done.store(true); } int main(int argc, char **argv) { - if (argc < 5) { - fprintf(stderr, "insert_query_tput reccnt query_threads datafile queryfile\n"); + if (argc < 6) { + fprintf(stderr, "Usage:\n"); + fprintf(stderr, "%s reccnt insert_threads query_threads datafile queryfile\n", argv[0]); exit(EXIT_FAILURE); } size_t n = atol(argv[1]); - size_t qthread_cnt = atol(argv[2]); - std::string d_fname = std::string(argv[3]); - std::string q_fname = std::string(argv[4]); + size_t ithread_cnt = atol(argv[2]); + size_t qthread_cnt = atol(argv[3]); + std::string d_fname = std::string(argv[4]); + std::string q_fname = std::string(argv[5]); auto extension = new Ext(1000, 12000, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); auto queries = read_range_queries(q_fname, .001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; - Rec r; for (size_t i=0; iinsert(r)) { + while (!extension->insert(data[i])) { usleep(1); } } @@ -96,14 +91,26 @@ int main(int argc, char **argv) { TIMER_INIT(); + std::vector ithreads(ithread_cnt); std::vector qthreads(qthread_cnt); TIMER_START(); - std::thread i_thrd(insert_thread, extension, warmup, &data); + size_t start = warmup; + size_t per_thread = (n - warmup) / ithread_cnt; + for (size_t i=0; i Date: Fri, 22 Mar 2024 14:04:40 -0400 Subject: FSTrie testing and debugging --- benchmarks/string_insertion_tput.cpp | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 benchmarks/string_insertion_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp new file mode 100644 index 0000000..d205175 --- /dev/null +++ b/benchmarks/string_insertion_tput.cpp @@ -0,0 +1,92 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + + +typedef de::Record Rec; +typedef de::FSTrie Trie; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; //, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; + +std::vector strings; + +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (uint64_t i=start; iinsert(r)) { + _mm_pause(); + } + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +int main(int argc, char **argv) { + size_t n = 100000000; + + std::vector counts = {1 , 2, 4, 8}; //, 16, 32, 64}; + // + read_data("benchmarks/data/ursa-genome.txt", n); + + fprintf(stderr, "Finished reading from file.\n"); + + for (auto thread_count : counts) { + + auto extension = new Ext(1000, 12000, 8); + + size_t per_thread = n / thread_count; + + std::thread threads[thread_count]; + + TIMER_INIT(); + TIMER_START(); + for (size_t i=0; iget_record_count(), + thread_count, tput); + + delete extension; + } + + fflush(stderr); +} + -- cgit v1.2.3 From b1b5ab106122e6917f6b34452be95e617506f05d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 25 Mar 2024 12:54:17 -0400 Subject: Updates for build on OpenBSD Necessary updates to get the codebase building under OpenBSD 7.5 with clang. This is a minimal set of changes to get building to work, which includes disabling several things that aren't directly compatable. More work will be necessary to get full functionality. In particular, Triespline, PGM, and the reference M-tree do not currently build on OpenBSD with clang due to GNU dependencies or other gcc specific features. --- benchmarks/include/benchmark_types.h | 4 +++- benchmarks/include/standard_benchmarks.h | 7 +++++++ benchmarks/watermark_testing.cpp | 6 +++++- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 85e9565..fcdeac6 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -2,7 +2,6 @@ #include #include "psu-ds/BTree.h" -#include "mtree.h" #include "framework/interface/Record.h" /* TLX BTree definitions*/ @@ -46,5 +45,8 @@ struct euclidean_distance { } }; +#ifdef _GNU_SOURCE +#include "mtree.h" typedef mt::mtree MTree; +#endif diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index a42cdd6..5fc549d 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -102,8 +102,10 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) { if constexpr (std::is_same_v) { de_index.erase_one(delete_vec[delete_idx++].key); + #ifdef _GNU_SOURCE } else if constexpr (std::is_same_v) { de_index.remove(delete_vec[delete_idx++]); + #endif } else { de_index.erase(delete_vec[delete_idx++]); } @@ -111,11 +113,16 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn } // insert the record; + #ifdef _GNU_SOURCE if constexpr (std::is_same_v) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); } + #else + de_index.insert(insert_vec[i]); + #endif + applied_inserts++; } auto insert_stop = std::chrono::high_resolution_clock::now(); diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp index 5fa0c0d..c56fc63 100644 --- a/benchmarks/watermark_testing.cpp +++ b/benchmarks/watermark_testing.cpp @@ -12,6 +12,7 @@ #include "psu-util/timer.h" #include +#include typedef uint64_t K; typedef de::Record Rec; @@ -32,7 +33,10 @@ int main(int argc, char **argv) { keys[i] = i; } - std::random_shuffle(keys.begin(), keys.end()); + std::random_device rd; + std::mt19937 g(rd()); + + std::shuffle(keys.begin(), keys.end(), g); TIMER_INIT(); -- cgit v1.2.3 From 3fb7bdce1a7b8a4a27a96c1a3ba759ad6c00b8ba Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 25 Mar 2024 13:51:16 -0400 Subject: Clean up extraneous file --- benchmarks/CMak | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 benchmarks/CMak (limited to 'benchmarks') diff --git a/benchmarks/CMak b/benchmarks/CMak deleted file mode 100644 index e69de29..0000000 -- cgit v1.2.3 From 7e7fd9f7339eee2f1ae974c662a447532dfb1b1a Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Tue, 26 Mar 2024 16:35:12 -0400 Subject: Updated FSTrie benchmark and some minor fixes --- benchmarks/string_insertion_tput.cpp | 73 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 28 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index d205175..e41e996 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -9,7 +9,7 @@ #include "framework/DynamicExtension.h" #include "shard/FSTrie.h" -#include "query/rangequery.h" +#include "query/pointlookup.h" #include "framework/interface/Record.h" #include "psu-util/timer.h" @@ -18,8 +18,8 @@ typedef de::Record Rec; typedef de::FSTrie Trie; -typedef de::rq::Query Q; -typedef de::DynamicExtension Ext; //, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; std::vector strings; @@ -47,45 +47,62 @@ void read_data(std::string fname, size_t n=10000000) { } } +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + int main(int argc, char **argv) { - size_t n = 100000000; - std::vector counts = {1 , 2, 4, 8}; //, 16, 32, 64}; - // - read_data("benchmarks/data/ursa-genome.txt", n); + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } - fprintf(stderr, "Finished reading from file.\n"); + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); - for (auto thread_count : counts) { + read_data(fname, n); - auto extension = new Ext(1000, 12000, 8); + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } - size_t per_thread = n / thread_count; + auto extension = new Ext(1000, 12000, 8); - std::thread threads[thread_count]; + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); - TIMER_INIT(); - TIMER_START(); - for (size_t i=0; i parms; + parms.search_key = strings[j]; - TIMER_STOP(); + auto res = extension->query(&parms); + auto ans = res.get(); - auto total_time = TIMER_RESULT(); + assert(ans[0].value == j); + } + TIMER_STOP(); - double tput = (double) n / (double) total_time * 1e9; + auto query_time = TIMER_RESULT(); - fprintf(stdout, "%ld\t%d\t%lf\n", extension->get_record_count(), - thread_count, tput); - delete extension; - } + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = total_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", extension->get_record_count(), + i_tput, q_lat); + + + delete extension; fflush(stderr); } -- cgit v1.2.3 From 1209553e9b44c355f38736fa53d4130ffff937f0 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 11 Apr 2024 12:23:29 -0400 Subject: trie_bench: Added static query latency --- benchmarks/string_insertion_tput.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index e41e996..5a46a80 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -93,13 +93,31 @@ int main(int argc, char **argv) { TIMER_STOP(); auto query_time = TIMER_RESULT(); + + auto shard = extension->create_static_structure(); + TIMER_START(); + for (size_t i=0; i parms; + parms.search_key = strings[j]; + + auto res = Q::query(shard, nullptr, &parms); + } + TIMER_STOP(); + + auto shard_query_time = TIMER_RESULT(); double i_tput = (double) n / (double) total_time * 1e9; - size_t q_lat = total_time / m; + size_t q_lat = query_time / m; + size_t s_q_lat = shard_query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\n", extension->get_record_count(), + i_tput, q_lat, s_q_lat); + + + - fprintf(stdout, "%ld\t\t%lf\t%ld\n", extension->get_record_count(), - i_tput, q_lat); delete extension; -- cgit v1.2.3 From 0cf96983011bc05a2ed275d3588e41aa4fe3c7a1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 10:22:21 -0400 Subject: Added a dynamic trie benchmark --- benchmarks/poplar_trie.cpp | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 benchmarks/poplar_trie.cpp (limited to 'benchmarks') diff --git a/benchmarks/poplar_trie.cpp b/benchmarks/poplar_trie.cpp new file mode 100644 index 0000000..6a04cb9 --- /dev/null +++ b/benchmarks/poplar_trie.cpp @@ -0,0 +1,98 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "poplar.hpp" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector strings; + +typedef poplar::plain_bonsai_map Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; iupdate(strings[i]); + *res = i+1; + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; ifind(strings[j]); + if (*res != j-1) { + fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str()); + } + assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + delete trie; + + fflush(stderr); +} + -- cgit v1.2.3 From 428658bc76b5b9eec46d3b7e415b5d114ddd3f79 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 12:50:26 -0400 Subject: Print size statistics --- benchmarks/poplar_trie.cpp | 6 ++++-- benchmarks/string_insertion_tput.cpp | 18 +++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/poplar_trie.cpp b/benchmarks/poplar_trie.cpp index 6a04cb9..c85e718 100644 --- a/benchmarks/poplar_trie.cpp +++ b/benchmarks/poplar_trie.cpp @@ -75,10 +75,10 @@ int main(int argc, char **argv) { size_t j = rand() % strings.size(); auto res = trie->find(strings[j]); - if (*res != j-1) { + if (*res != (j+1)) { fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str()); } - assert(*(res)+1 == j); + //assert(*(res)+1 == j); } TIMER_STOP(); @@ -91,6 +91,8 @@ int main(int argc, char **argv) { fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), i_tput, q_lat); + trie->show_stats(std::cerr, 1); + delete trie; fflush(stderr); diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 5a46a80..4923b09 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -88,6 +88,10 @@ int main(int argc, char **argv) { auto res = extension->query(&parms); auto ans = res.get(); + if (ans[0].value != j) { + fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].c_str()); + } + assert(ans[0].value == j); } TIMER_STOP(); @@ -103,6 +107,10 @@ int main(int argc, char **argv) { parms.search_key = strings[j]; auto res = Q::query(shard, nullptr, &parms); + + if (res[0].rec.value != j) { + fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].c_str()); + } } TIMER_STOP(); @@ -112,15 +120,11 @@ int main(int argc, char **argv) { size_t q_lat = query_time / m; size_t s_q_lat = shard_query_time / m; - fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\n", extension->get_record_count(), - i_tput, q_lat, s_q_lat); - - - - - + fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), + i_tput, q_lat, s_q_lat, extension->get_memory_usage(), shard->get_memory_usage()); delete extension; + delete shard; fflush(stderr); } -- cgit v1.2.3 From b25beb13773072c3b143842b45a7c32a1108f347 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 14:00:27 -0400 Subject: Updated FSTrie to use const char * instead of std::string Note: this requires the caller to manage the memory of the strings --- benchmarks/string_insertion_tput.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 4923b09..f4a519a 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -16,16 +16,16 @@ #include "psu-util/progress.h" -typedef de::Record Rec; +typedef de::Record Rec; typedef de::FSTrie Trie; typedef de::pl::Query Q; typedef de::DynamicExtension Ext; -std::vector strings; +std::vector> strings; void insert_thread(int64_t start, int64_t end, Ext *extension) { for (uint64_t i=start; iinsert(r)) { _mm_pause(); } @@ -41,7 +41,7 @@ void read_data(std::string fname, size_t n=10000000) { size_t i=0; std::string line; while (i < n && std::getline(file, line, '\n')) { - strings.emplace_back(line); + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); i++; psudb::progress_update((double) i / (double) n, "Reading file:"); } @@ -82,14 +82,13 @@ int main(int argc, char **argv) { TIMER_START(); for (size_t i=0; i parms; - parms.search_key = strings[j]; + de::pl::Parms parms = {strings[j].get()}; auto res = extension->query(&parms); auto ans = res.get(); if (ans[0].value != j) { - fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].c_str()); + fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].get()); } assert(ans[0].value == j); @@ -103,13 +102,12 @@ int main(int argc, char **argv) { TIMER_START(); for (size_t i=0; i parms; - parms.search_key = strings[j]; + de::pl::Parms parms = {strings[j].get()}; auto res = Q::query(shard, nullptr, &parms); if (res[0].rec.value != j) { - fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].c_str()); + fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].get()); } } TIMER_STOP(); -- cgit v1.2.3 From 7c2f43ff039795576bc0014c367b893fbbaceca4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 14:39:33 -0400 Subject: Benchmark updates --- benchmarks/include/standard_benchmarks.h | 18 +++++++++++++++++- benchmarks/irs_bench.cpp | 15 +++++++++++++-- benchmarks/pgm_bench.cpp | 15 +++++++++++++-- benchmarks/ts_bench.cpp | 21 ++++++++++++++++----- benchmarks/vptree_bench.cpp | 15 +++++++++++++-- 5 files changed, 72 insertions(+), 12 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 5fc549d..fe53d62 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -21,7 +21,7 @@ static size_t g_deleted_records = 0; static double delete_proportion = 0.05; template -static void run_queries(DE *extension, std::vector &queries, gsl_rng *rng) { +static void run_queries(DE *extension, std::vector &queries) { size_t total; for (size_t i=0; i &queries, gsl_rng *rng) { } +template +static void run_static_queries(S *shard, std::vector &queries) { + size_t total; + for (size_t i=0; i static void insert_records(DE *extension, size_t start, size_t stop, std::vector &records, std::vector &to_delete, diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 9895295..84cd8b2 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -72,12 +72,23 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp index 3643abb..ac3ed1b 100644 --- a/benchmarks/pgm_bench.cpp +++ b/benchmarks/pgm_bench.cpp @@ -69,12 +69,23 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp index 3dc619e..93e7616 100644 --- a/benchmarks/ts_bench.cpp +++ b/benchmarks/ts_bench.cpp @@ -19,9 +19,9 @@ typedef de::Record Rec; -typedef de::TrieSpline PGM; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::TrieSpline Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; void usage(char *progname) { @@ -69,12 +69,23 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vptree_bench.cpp index f4c7d0e..1219076 100644 --- a/benchmarks/vptree_bench.cpp +++ b/benchmarks/vptree_bench.cpp @@ -75,12 +75,23 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; -- cgit v1.2.3 From 34fd8ad935e6359d20a5d6c949e67495d0842f8f Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 14:40:19 -0400 Subject: More trie baseline tests --- benchmarks/cedar_trie.cpp | 97 +++++++++++++++++++++++++++++++ benchmarks/hat_trie.cpp | 98 +++++++++++++++++++++++++++++++ benchmarks/louds_insertion_tput.cpp | 112 ++++++++++++++++++++++++++++++++++++ 3 files changed, 307 insertions(+) create mode 100644 benchmarks/cedar_trie.cpp create mode 100644 benchmarks/hat_trie.cpp create mode 100644 benchmarks/louds_insertion_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/cedar_trie.cpp b/benchmarks/cedar_trie.cpp new file mode 100644 index 0000000..7499ce7 --- /dev/null +++ b/benchmarks/cedar_trie.cpp @@ -0,0 +1,97 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include +#include + +#include "cedar.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector strings; + +typedef cedar::da Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; iupdate(strings[i].c_str(), strings[i].size(), i+1); + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; iexactMatchSearch(strings[j].c_str()); + //assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + fprintf(stdout, "%ld\n", trie->total_size()); + + delete trie; + + fflush(stderr); +} + diff --git a/benchmarks/hat_trie.cpp b/benchmarks/hat_trie.cpp new file mode 100644 index 0000000..3b4c7d3 --- /dev/null +++ b/benchmarks/hat_trie.cpp @@ -0,0 +1,98 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "htrie_map.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector strings; + +typedef tsl::htrie_map Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; iinsert(strings[i].c_str(), i+1); + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; ifind(strings[j]); + if (*res != (j+1)) { + fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str()); + } + //assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + + delete trie; + + fflush(stderr); +} + diff --git a/benchmarks/louds_insertion_tput.cpp b/benchmarks/louds_insertion_tput.cpp new file mode 100644 index 0000000..d772f3b --- /dev/null +++ b/benchmarks/louds_insertion_tput.cpp @@ -0,0 +1,112 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "framework/DynamicExtension.h" +#include "shard/LoudsPatricia.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + + +typedef de::Record Rec; +typedef de::LoudsPatricia Trie; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; + +std::vector> strings; + +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (uint64_t i=start; iinsert(r)) { + _mm_pause(); + } + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + std::vector scale_factors = {2, 4, 6, 8, 10, 12}; + std::vector buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000}; + + for (auto &sf : scale_factors) { + for (auto &bf_sz : buffer_sizes) { + + auto extension = new Ext(bf_sz, bf_sz, sf); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; i parms = {strings[j].get()}; + + auto res = extension->query(&parms); + auto ans = res.get(); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), + bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); + + delete extension; + + fflush(stderr); + } + } +} + -- cgit v1.2.3 From ae189be91ec135629c7ad62888cd98183494c075 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 15:43:57 -0400 Subject: Adjusted benchmark parameters --- benchmarks/irs_bench.cpp | 2 +- benchmarks/pgm_bench.cpp | 2 +- benchmarks/ts_bench.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 84cd8b2..976adf9 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -19,7 +19,7 @@ typedef de::Record Rec; typedef de::ISAMTree Shard; typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; void usage(char *progname) { diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp index ac3ed1b..e0baab4 100644 --- a/benchmarks/pgm_bench.cpp +++ b/benchmarks/pgm_bench.cpp @@ -21,7 +21,7 @@ typedef de::Record Rec; typedef de::PGM Shard; typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; void usage(char *progname) { diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp index 93e7616..3d44ac5 100644 --- a/benchmarks/ts_bench.cpp +++ b/benchmarks/ts_bench.cpp @@ -21,7 +21,7 @@ typedef de::Record Rec; typedef de::TrieSpline Shard; typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; void usage(char *progname) { -- cgit v1.2.3 From 8479f3ce863dfb6d3b20ff4678fa6fe92ee86b52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 16:50:18 -0400 Subject: Fixed some benchmarking bugs --- benchmarks/include/standard_benchmarks.h | 6 ++++++ benchmarks/irs_bench.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index fe53d62..83e3aaa 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -40,6 +40,12 @@ static void run_static_queries(S *shard, std::vector &queries) { auto q = &queries[i]; auto state = Q::get_query_state(shard, q); + + std::vector shards = {shard}; + std::vector states = {state}; + + Q::process_query_states(q, states, nullptr); + auto res = Q::query(shard, state, q); total += res.size(); diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 976adf9..36d88f6 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -50,7 +50,7 @@ int main(int argc, char **argv) { } /* read in the range queries and add sample size and rng for sampling */ auto queries = read_range_queries(q_fname, .001); - for (auto q : queries) { + for (auto &q : queries) { q.sample_size = 1000; q.rng = rng; } -- cgit v1.2.3 From 438feac7e56fee425d9c6f1a43298ff9dc5b71d1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 17:38:16 -0400 Subject: Properly implemented support for iteratively decomposable problems --- benchmarks/include/standard_benchmarks.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 83e3aaa..f5af558 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -22,20 +22,17 @@ static double delete_proportion = 0.05; template static void run_queries(DE *extension, std::vector &queries) { - size_t total; for (size_t i=0; iquery(q); auto r = res.get(); - total += r.size(); } } template static void run_static_queries(S *shard, std::vector &queries) { - size_t total; for (size_t i=0; i &queries) { std::vector states = {state}; Q::process_query_states(q, states, nullptr); - auto res = Q::query(shard, state, q); - - total += res.size(); } } -- cgit v1.2.3 From 0bb5b46ec2b64be17f6269631915e62d02e315e4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 12:27:43 -0400 Subject: Added plain BSM and MDSP BSM benchmark --- benchmarks/include/file_util.h | 16 ++++ benchmarks/include/standard_benchmarks.h | 23 +++++- benchmarks/include/triespline_bsm.h | 134 +++++++++++++++++++++++++++++++ benchmarks/ts_bsm_bench.cpp | 70 ++++++++++++++++ benchmarks/ts_mdsp_bench.cpp | 70 ++++++++++++++++ 5 files changed, 311 insertions(+), 2 deletions(-) create mode 100644 benchmarks/include/triespline_bsm.h create mode 100644 benchmarks/ts_bsm_bench.cpp create mode 100644 benchmarks/ts_mdsp_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 2a3300a..ea8b42e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -97,6 +97,22 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { return records; } +template +static std::vector> read_sosd_file_pair(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector> records(n); + for (size_t i=0; i +template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; iquery(q); - auto r = res.get(); + if constexpr (!BSM) { + auto r = res.get(); + } } } @@ -47,6 +50,22 @@ static void run_static_queries(S *shard, std::vector &queries) { } +/* + * Insert records into a standard Bentley-Saxe extension. Deletes are not + * supported. + */ +template +static void insert_records(psudb::bsm::BentleySaxe *extension, + size_t start, size_t stop, std::vector &records) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i]); + } + + psudb::progress_update(1, "Insert Progress"); +} template diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h new file mode 100644 index 0000000..9613a62 --- /dev/null +++ b/benchmarks/include/triespline_bsm.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include "ts/builder.h" + +template +class BSMTrieSpline { +private: + typedef std::pair R; + +public: + struct RangeQueryParameters { + K lower_bound; + K upper_bound; + }; + +public: + static BSMTrieSpline *build(std::vector &records) { + std::sort(records.begin(), records.end()); + return new BSMTrieSpline(records); + } + + static BSMTrieSpline *build_presorted(std::vector &records) { + return new BSMTrieSpline(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMTrieSpline::RangeQueryParameters*) q; + + size_t idx = lower_bound(parms->lower_bound); + + while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + rs.emplace_back(m_data[idx++]); + } + + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb) { + rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + return std::move(rsa); + } + + size_t record_count() { + return m_data.size(); + } + + + ~BSMTrieSpline() = default; + + +private: + std::vector m_data; + K m_max_key; + K m_min_key; + ts::TrieSpline m_ts; + + BSMTrieSpline(std::vector &records) { + m_data = std::move(records); + m_min_key = m_data[0].first; + m_max_key = m_data[m_data.size() - 1].first; + + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; i 1) { + m_ts = bldr.Finalize(); + } + } + + size_t lower_bound(K key) { + if (m_data.size() == 0) { + return 1; + } else if (m_data.size() == 1) { + if (m_data[0].first < key) { + return 1; + } else { + return 0; + } + } + + auto bound = m_ts.GetSearchBound(key); + size_t idx = bound.begin; + + if (idx >= m_data.size()) { + return m_data.size(); + } + + // If the region to search is less than some pre-specified + // amount, perform a linear scan to locate the record. + if (bound.end - bound.begin < 256) { + while (idx < bound.end && m_data[idx].first < key) { + idx++; + } + } else { + // Otherwise, perform a binary search + idx = bound.begin; + size_t max = bound.end; + + while (idx < max) { + size_t mid = (idx + max) / 2; + if (key > m_data[mid].first) { + idx = mid + 1; + } else { + max = mid; + } + } + } + + if (idx == m_data.size()) { + return m_data.size(); + } + + if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) { + return idx-1; + } + + return idx; + } +}; diff --git a/benchmarks/ts_bsm_bench.cpp b/benchmarks/ts_bsm_bench.cpp new file mode 100644 index 0000000..366abce --- /dev/null +++ b/benchmarks/ts_bsm_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "include/triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "include/standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/ts_mdsp_bench.cpp b/benchmarks/ts_mdsp_bench.cpp new file mode 100644 index 0000000..5e5001d --- /dev/null +++ b/benchmarks/ts_mdsp_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "include/triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "include/standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3 From 4a1dde3148e0e84b47c884bc0bb69c60678b4558 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:09:07 -0400 Subject: Benchmark update+reorganization The Alex benchmark isn't updated yet. --- benchmarks/include/benchmark_types.h | 5 +- benchmarks/include/standard_benchmarks.h | 69 +++++++++-- benchmarks/irs_bench.cpp | 97 --------------- benchmarks/pgm_bench.cpp | 94 -------------- benchmarks/string_insertion_tput.cpp | 32 ++--- benchmarks/ts_bench.cpp | 94 -------------- benchmarks/ts_bsm_bench.cpp | 70 ----------- benchmarks/ts_mdsp_bench.cpp | 70 ----------- benchmarks/vldb/alex_bench.cpp | 205 +++++++++++++++++++++++++++++++ benchmarks/vldb/btree_bench.cpp | 90 ++++++++++++++ benchmarks/vldb/dynamic_pgm_bench.cpp | 77 ++++++++++++ benchmarks/vldb/irs_bench.cpp | 97 +++++++++++++++ benchmarks/vldb/mtree_bench.cpp | 80 ++++++++++++ benchmarks/vldb/pgm_bench.cpp | 94 ++++++++++++++ benchmarks/vldb/ts_bench.cpp | 94 ++++++++++++++ benchmarks/vldb/ts_bsm_bench.cpp | 70 +++++++++++ benchmarks/vldb/ts_mdsp_bench.cpp | 70 +++++++++++ benchmarks/vldb/vptree_bench.cpp | 100 +++++++++++++++ benchmarks/vptree_bench.cpp | 100 --------------- 19 files changed, 1047 insertions(+), 561 deletions(-) delete mode 100644 benchmarks/irs_bench.cpp delete mode 100644 benchmarks/pgm_bench.cpp delete mode 100644 benchmarks/ts_bench.cpp delete mode 100644 benchmarks/ts_bsm_bench.cpp delete mode 100644 benchmarks/ts_mdsp_bench.cpp create mode 100644 benchmarks/vldb/alex_bench.cpp create mode 100644 benchmarks/vldb/btree_bench.cpp create mode 100644 benchmarks/vldb/dynamic_pgm_bench.cpp create mode 100644 benchmarks/vldb/irs_bench.cpp create mode 100644 benchmarks/vldb/mtree_bench.cpp create mode 100644 benchmarks/vldb/pgm_bench.cpp create mode 100644 benchmarks/vldb/ts_bench.cpp create mode 100644 benchmarks/vldb/ts_bsm_bench.cpp create mode 100644 benchmarks/vldb/ts_mdsp_bench.cpp create mode 100644 benchmarks/vldb/vptree_bench.cpp delete mode 100644 benchmarks/vptree_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index fcdeac6..13964e8 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -3,8 +3,9 @@ #include #include "psu-ds/BTree.h" #include "framework/interface/Record.h" +#include "pgm/pgm_index_dynamic.hpp" -/* TLX BTree definitions*/ +/* BTree definitions*/ template struct btree_record { K key; @@ -50,3 +51,5 @@ struct euclidean_distance { typedef mt::mtree MTree; #endif +typedef pgm::DynamicPGMIndex> PGM; + diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 74bf93f..aaef679 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -14,6 +14,7 @@ #include "framework/DynamicExtension.h" #include "framework/interface/Query.h" +#include "query/irs.h" #include "psu-util/progress.h" #include "benchmark_types.h" #include "psu-util/bentley-saxe.h" @@ -24,15 +25,41 @@ static double delete_proportion = 0.05; template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; iquery(q); - if constexpr (!BSM) { - auto r = res.get(); + if constexpr (std::is_same_v) { + std::vector result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + } else if constexpr (std::is_same_v) { + size_t tot = 0; + auto ptr = extension->find(queries[i].lower_bound); + while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { + tot++; + ++ptr; + } + } else { + auto res = extension->query(&queries[i]); + if constexpr (!BSM) { + auto r = res.get(); + } } } } +template +static void run_btree_queries(BenchBTree *btree, std::vector> &queries) { + std::vector sample_set; + sample_set.reserve(queries[0].sample_size); + + for (size_t i=0; irange_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng); + } +} + template static void run_static_queries(S *shard, std::vector &queries) { @@ -68,26 +95,42 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, } -template -static void insert_records(DE *extension, size_t start, size_t stop, +template +static void insert_records(DE *structure, size_t start, size_t stop, std::vector &records, std::vector &to_delete, size_t &delete_idx, bool delete_records, gsl_rng *rng) { psudb::progress_update(0, "Insert Progress"); size_t reccnt = 0; for (size_t i=start; iinsert(records[i])) { - psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); - usleep(1); + + if constexpr (std::is_same_v) { + structure->insert(records[i]); + } else if constexpr (std::is_same_v) { + structure->add(records[i]); + } else if constexpr (std::is_same_v) { + structure->insert_or_assign(records[i].key, records[i].value); + } else { + while (!structure->insert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } } if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - while (!extension->erase(records[to_delete[delete_idx]])) { - usleep(1); + if constexpr (std::is_same_v) { + structure->erase_one(records[to_delete[delete_idx]].key); + } else if constexpr (std::is_same_v) { + structure->remove(records[to_delete[delete_idx]]); + } else if constexpr (std::is_same_v) { + structure->erase(records[to_delete[delete_idx]].key); + } else { + while (!structure->erase(records[to_delete[delete_idx]])) { + usleep(1); + } } - delete_idx++; g_deleted_records++; } diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp deleted file mode 100644 index 36d88f6..0000000 --- a/benchmarks/irs_bench.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include "framework/DynamicExtension.h" -#include "shard/ISAMTree.h" -#include "query/irs.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" - -#include - -#include "psu-util/timer.h" -#include "include/standard_benchmarks.h" - - -typedef de::Record Rec; -typedef de::ISAMTree Shard; -typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; -typedef de::irs::Parms QP; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new Ext(12000, 12001, 8, 0, 64); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file(d_fname, n); - std::vector to_delete(n * delete_proportion); - size_t j=0; - for (size_t i=0; i(q_fname, .001); - for (auto &q : queries) { - q.sample_size = 1000; - q.rng = rng; - } - - /* warmup structure w/ 10% of records */ - size_t warmup = .3 * n; - size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); - - extension->await_next_epoch(); - - TIMER_INIT(); - - TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - auto shard = extension->create_static_structure(); - - TIMER_START(); - run_static_queries(shard, queries); - TIMER_STOP(); - - auto static_latency = TIMER_RESULT() / queries.size(); - - auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); - auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage(); - - fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp deleted file mode 100644 index e0baab4..0000000 --- a/benchmarks/pgm_bench.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "framework/DynamicExtension.h" -#include "shard/PGM.h" -#include "query/rangecount.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include "include/standard_benchmarks.h" - -#include - -#include "psu-util/timer.h" - - -typedef de::Record Rec; -typedef de::PGM Shard; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; -typedef de::rc::Parms QP; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new Ext(12000, 12001, 8, 0, 64); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file(d_fname, n); - std::vector to_delete(n * delete_proportion); - size_t j=0; - for (size_t i=0; i(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); - - extension->await_next_epoch(); - - TIMER_INIT(); - - TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - auto shard = extension->create_static_structure(); - - TIMER_START(); - run_static_queries(shard, queries); - TIMER_STOP(); - - auto static_latency = TIMER_RESULT() / queries.size(); - - auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); - auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); - - fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index f4a519a..8fa7f44 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -69,7 +69,13 @@ int main(int argc, char **argv) { fprintf(stderr, "Finished reading from file.\n"); } - auto extension = new Ext(1000, 12000, 8); + std::vector scale_factors = {2, 4, 6, 8, 10, 12}; + std::vector buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000}; + + for (auto &sf : scale_factors) { + for (auto &bf_sz : buffer_sizes) { + + auto extension = new Ext(bf_sz, bf_sz, sf); TIMER_INIT(); TIMER_START(); @@ -97,33 +103,15 @@ int main(int argc, char **argv) { auto query_time = TIMER_RESULT(); - - auto shard = extension->create_static_structure(); - TIMER_START(); - for (size_t i=0; i parms = {strings[j].get()}; - - auto res = Q::query(shard, nullptr, &parms); - - if (res[0].rec.value != j) { - fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].get()); - } - } - TIMER_STOP(); - - auto shard_query_time = TIMER_RESULT(); - double i_tput = (double) n / (double) total_time * 1e9; size_t q_lat = query_time / m; - size_t s_q_lat = shard_query_time / m; - fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), - i_tput, q_lat, s_q_lat, extension->get_memory_usage(), shard->get_memory_usage()); + fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), + bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); delete extension; - delete shard; + }} fflush(stderr); } diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp deleted file mode 100644 index 3d44ac5..0000000 --- a/benchmarks/ts_bench.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "framework/DynamicExtension.h" -#include "shard/TrieSpline.h" -#include "query/rangecount.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include "include/standard_benchmarks.h" - -#include - -#include "psu-util/timer.h" - - -typedef de::Record Rec; -typedef de::TrieSpline Shard; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; -typedef de::rc::Parms QP; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new Ext(12000, 12001, 8, 0, 64); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file(d_fname, n); - std::vector to_delete(n * delete_proportion); - size_t j=0; - for (size_t i=0; i(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); - - extension->await_next_epoch(); - - TIMER_INIT(); - - TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - auto shard = extension->create_static_structure(); - - TIMER_START(); - run_static_queries(shard, queries); - TIMER_STOP(); - - auto static_latency = TIMER_RESULT() / queries.size(); - - auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); - auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); - - fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/ts_bsm_bench.cpp b/benchmarks/ts_bsm_bench.cpp deleted file mode 100644 index 366abce..0000000 --- a/benchmarks/ts_bsm_bench.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "include/triespline_bsm.h" -#include "psu-util/bentley-saxe.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include "query/rangecount.h" -#include "psu-util/timer.h" -#include "include/standard_benchmarks.h" - -typedef std::pair Rec; -typedef de::Record FRec; - -typedef BSMTrieSpline Shard; -typedef de::rc::Parms QP; -typedef psudb::bsm::BentleySaxe Ext; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new psudb::bsm::BentleySaxe(); - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file_pair(d_fname, n); - auto queries = read_range_queries(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - insert_records(extension, 0, warmup, data); - - TIMER_INIT(); - - TIMER_START(); - insert_records(extension, warmup, data.size(), data); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/ts_mdsp_bench.cpp b/benchmarks/ts_mdsp_bench.cpp deleted file mode 100644 index 5e5001d..0000000 --- a/benchmarks/ts_mdsp_bench.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "include/triespline_bsm.h" -#include "psu-util/bentley-saxe.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include "query/rangecount.h" -#include "psu-util/timer.h" -#include "include/standard_benchmarks.h" - -typedef std::pair Rec; -typedef de::Record FRec; - -typedef BSMTrieSpline Shard; -typedef de::rc::Parms QP; -typedef psudb::bsm::BentleySaxe Ext; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new psudb::bsm::BentleySaxe(); - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file_pair(d_fname, n); - auto queries = read_range_queries(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - insert_records(extension, 0, warmup, data); - - TIMER_INIT(); - - TIMER_START(); - insert_records(extension, warmup, data.size(), data); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp new file mode 100644 index 0000000..f75afa6 --- /dev/null +++ b/benchmarks/vldb/alex_bench.cpp @@ -0,0 +1,205 @@ +#include "alex.h" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::pair *insert_vec = new std::pair[count]; + Alex *alex = new Alex(); + + size_t cnt = 0; + record rec; + while (cnt < count && next_record(file, rec)) { + insert_vec[cnt] = {rec.key, rec.value}; + cnt++; + } + + std::sort(insert_vec, insert_vec + count); + + alex->bulk_load(insert_vec, count); + delete[] insert_vec; + + return alex; +} + + +static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: alex_rq_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.8; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries(qfilename, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + + fprintf(stderr, "Size: %ld\n", alex->size()); + size_t insert_cnt = record_count - warmup_cnt; + + alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = alex->model_size() + alex->data_size(); + + fprintf(stderr, "Size: %ld\n", alex->size()); + fprintf(stdout, "%ld\t", memory_usage); + + alex_rq_bench(*alex, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + delete alex; + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp new file mode 100644 index 0000000..12107c6 --- /dev/null +++ b/benchmarks/vldb/btree_bench.cpp @@ -0,0 +1,90 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "benchmark_types.h" + +#include + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" +#include "psu-ds/BTree.h" + +typedef btree_record Rec; + +typedef de::ISAMTree Shard; +typedef de::irs::Query Q; +typedef de::irs::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto btree = BenchBTree(); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .3 * n; + size_t delete_idx = 0; + insert_records(&btree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records(&btree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_btree_queries(&btree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto btree_size = btree.get_stats().inner_nodes * psudb::btree_default_traits::inner_slots * (sizeof(int64_t) + sizeof(void*)); + + /* account for memory wasted on gaps in the structure */ + btree_size += btree.get_stats().leaves * psudb::btree_default_traits::leaf_slots * sizeof(Rec); + btree_size -= btree.size() * sizeof(Rec); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, btree_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp new file mode 100644 index 0000000..249bc92 --- /dev/null +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -0,0 +1,77 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::rc::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + std::vector> tmp_data; + PGM pgm(tmp_data.begin(), tmp_data.end()); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(&pgm, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records(&pgm, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(&pgm, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = pgm.size_in_bytes(); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp new file mode 100644 index 0000000..ca1e555 --- /dev/null +++ b/benchmarks/vldb/irs_bench.cpp @@ -0,0 +1,97 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" + +#include + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + + +typedef de::Record Rec; +typedef de::ISAMTree Shard; +typedef de::irs::Query Q; +typedef de::DynamicExtension Ext; +typedef de::irs::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .3 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp new file mode 100644 index 0000000..35f56be --- /dev/null +++ b/benchmarks/vldb/mtree_bench.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp new file mode 100644 index 0000000..f63ec8e --- /dev/null +++ b/benchmarks/vldb/pgm_bench.cpp @@ -0,0 +1,94 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/PGM.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::PGM Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::rc::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp new file mode 100644 index 0000000..a84635f --- /dev/null +++ b/benchmarks/vldb/ts_bench.cpp @@ -0,0 +1,94 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::TrieSpline Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::rc::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp new file mode 100644 index 0000000..706433d --- /dev/null +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp new file mode 100644 index 0000000..4c5bf1e --- /dev/null +++ b/benchmarks/vldb/ts_mdsp_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + +typedef std::pair Rec; +typedef de::Record FRec; + +typedef BSMTrieSpline Shard; +typedef de::rc::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp new file mode 100644 index 0000000..613c556 --- /dev/null +++ b/benchmarks/vldb/vptree_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(100, 1000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vptree_bench.cpp deleted file mode 100644 index 1219076..0000000 --- a/benchmarks/vptree_bench.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include "framework/DynamicExtension.h" -#include "shard/VPTree.h" -#include "query/knn.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include "include/standard_benchmarks.h" - -#include - -#include "psu-util/timer.h" - - -typedef Word2VecRec Rec; - -typedef de::VPTree Shard; -typedef de::knn::Query Q; -typedef de::DynamicExtension Ext; -typedef de::knn::Parms QP; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new Ext(100, 1000, 8, 0, 64); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - fprintf(stderr, "[I] Reading data file...\n"); - auto data = read_vector_file(d_fname, n); - - fprintf(stderr, "[I] Generating delete vector\n"); - std::vector to_delete(n * delete_proportion); - size_t j=0; - for (size_t i=0; i(q_fname, 10); - - fprintf(stderr, "[I] Warming up structure...\n"); - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); - - extension->await_next_epoch(); - - TIMER_INIT(); - - fprintf(stderr, "[I] Running Insertion Benchmark\n"); - TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - fprintf(stderr, "[I] Running Query Benchmark\n"); - TIMER_START(); - run_queries(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - auto shard = extension->create_static_structure(); - - TIMER_START(); - run_static_queries(shard, queries); - TIMER_STOP(); - - auto static_latency = TIMER_RESULT() / queries.size(); - - auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); - auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); - - fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - -- cgit v1.2.3 From 4d6a164f85352ca9f297b28bd1b677c8fc6ab4f3 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:31:03 -0400 Subject: TS Parameter sweep benchmark --- benchmarks/vldb/ts_parmsweep.cpp | 124 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 benchmarks/vldb/ts_parmsweep.cpp (limited to 'benchmarks') diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp new file mode 100644 index 0000000..fd71e11 --- /dev/null +++ b/benchmarks/vldb/ts_parmsweep.cpp @@ -0,0 +1,124 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::TrieSpline Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext2; +typedef de::rc::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .001); + + const std::vector policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING}; + const std::vector buffer_sizes = {1000, 4000, 8000, 12000, 15000, 20000}; + const std::vector scale_factors = {2, 4, 6, 8, 10, 12}; + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext(bs, bs, sf, 0, 64); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext2(bs, bs, sf, 0, 64); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + gsl_rng_free(rng); + fflush(stderr); +} + -- cgit v1.2.3 From e65edcf4030afdaef6955f366fd29d7518aa7352 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:31:14 -0400 Subject: Fixed usage printf() in benchmarks --- benchmarks/vldb/btree_bench.cpp | 2 +- benchmarks/vldb/dynamic_pgm_bench.cpp | 2 +- benchmarks/vldb/irs_bench.cpp | 2 +- benchmarks/vldb/pgm_bench.cpp | 2 +- benchmarks/vldb/ts_bench.cpp | 2 +- benchmarks/vldb/ts_bsm_bench.cpp | 2 +- benchmarks/vldb/ts_mdsp_bench.cpp | 2 +- benchmarks/vldb/vptree_bench.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp index 12107c6..6d1be9f 100644 --- a/benchmarks/vldb/btree_bench.cpp +++ b/benchmarks/vldb/btree_bench.cpp @@ -23,7 +23,7 @@ typedef de::irs::Query Q; typedef de::irs::Parms QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp index 249bc92..8d2f4dd 100644 --- a/benchmarks/vldb/dynamic_pgm_bench.cpp +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -19,7 +19,7 @@ typedef de::Record Rec; typedef de::rc::Parms QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp index ca1e555..dabe79e 100644 --- a/benchmarks/vldb/irs_bench.cpp +++ b/benchmarks/vldb/irs_bench.cpp @@ -23,7 +23,7 @@ typedef de::DynamicExtension QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp index f63ec8e..498ef8f 100644 --- a/benchmarks/vldb/pgm_bench.cpp +++ b/benchmarks/vldb/pgm_bench.cpp @@ -25,7 +25,7 @@ typedef de::DynamicExtension QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp index a84635f..5a4cc13 100644 --- a/benchmarks/vldb/ts_bench.cpp +++ b/benchmarks/vldb/ts_bench.cpp @@ -25,7 +25,7 @@ typedef de::DynamicExtension QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp index 706433d..941e3da 100644 --- a/benchmarks/vldb/ts_bsm_bench.cpp +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -22,7 +22,7 @@ typedef de::rc::Parms QP; typedef psudb::bsm::BentleySaxe Ext; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp index 4c5bf1e..44c641d 100644 --- a/benchmarks/vldb/ts_mdsp_bench.cpp +++ b/benchmarks/vldb/ts_mdsp_bench.cpp @@ -22,7 +22,7 @@ typedef de::rc::Parms QP; typedef psudb::bsm::BentleySaxe Ext; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp index 613c556..b17a57b 100644 --- a/benchmarks/vldb/vptree_bench.cpp +++ b/benchmarks/vldb/vptree_bench.cpp @@ -24,7 +24,7 @@ typedef de::DynamicExtension QP; void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile", progname); + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); } int main(int argc, char **argv) { -- cgit v1.2.3 From fb08023dea10c9a7673ceb2c8ef67718937717b8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:24:27 -0400 Subject: benchmarks/file_util: removed dependency on framework in prep for ALEX --- benchmarks/include/file_util.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ea8b42e..8dc1b5f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -8,9 +8,6 @@ #include #include -#include "framework/interface/Record.h" -#include "query/irs.h" - #pragma once template @@ -81,7 +78,7 @@ static std::vector read_knn_queries(std::string fname, size_t k) { return queries; } -template +template static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); -- cgit v1.2.3 From d710125be2958f0b76c2601c357966fd74263c87 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:24:50 -0400 Subject: BSMTriespline: updated to use a rangecount query --- benchmarks/include/triespline_bsm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 9613a62..4276074 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -34,6 +34,7 @@ public: /* return an empty result set if q is invalid */ if (q == nullptr) { + rs.push_back({0, 0}); return rs; } @@ -41,15 +42,18 @@ public: size_t idx = lower_bound(parms->lower_bound); + size_t cnt = 0; while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { - rs.emplace_back(m_data[idx++]); + cnt++; } + rs.push_back({cnt, 0}); + return std::move(rs); } std::vector query_merge(std::vector &rsa, std::vector &rsb) { - rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -57,10 +61,8 @@ public: return m_data.size(); } - ~BSMTrieSpline() = default; - private: std::vector m_data; K m_max_key; -- cgit v1.2.3 From 909ba1e59ce654db3ea9294201dec2bc826b0b72 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:25:17 -0400 Subject: Added vptree parmsweep benchmark and fixed some CMake issues --- benchmarks/vldb/vptree_parmsweep.cpp | 129 +++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 benchmarks/vldb/vptree_parmsweep.cpp (limited to 'benchmarks') diff --git a/benchmarks/vldb/vptree_parmsweep.cpp b/benchmarks/vldb/vptree_parmsweep.cpp new file mode 100644 index 0000000..2cbd521 --- /dev/null +++ b/benchmarks/vldb/vptree_parmsweep.cpp @@ -0,0 +1,129 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::DynamicExtension Ext2; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_vector_file(d_fname, n); + + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 10); + + + const std::vector policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING}; + const std::vector buffer_sizes = {100, 400, 800, 1200, 1500, 2000}; + const std::vector scale_factors = {2, 4, 6, 8, 10, 12}; + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext(bs, bs, sf, 0, 64); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext2(bs, bs, sf, 0, 64); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + gsl_rng_free(rng); + fflush(stderr); +} + -- cgit v1.2.3 From e801222023330cf36602d37be64091565172bd2d Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 29 Apr 2024 13:56:23 -0400 Subject: Updated dynamic PGM benchmark to use index size --- benchmarks/vldb/dynamic_pgm_bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp index 8d2f4dd..580fa93 100644 --- a/benchmarks/vldb/dynamic_pgm_bench.cpp +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -67,7 +67,7 @@ int main(int argc, char **argv) { auto query_latency = TIMER_RESULT() / queries.size(); - auto ext_size = pgm.size_in_bytes(); + auto ext_size = pgm.index_size_in_bytes(); fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size); -- cgit v1.2.3 From c61164545f4c113fb17eb993e393bbf97373cfb3 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 29 Apr 2024 14:43:10 -0400 Subject: Alex benchmark --- benchmarks/vldb/alex_bench.cpp | 231 +++++++++++++++-------------------------- 1 file changed, 85 insertions(+), 146 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp index f75afa6..76df410 100644 --- a/benchmarks/vldb/alex_bench.cpp +++ b/benchmarks/vldb/alex_bench.cpp @@ -1,5 +1,10 @@ +#define ENABLE_TIMER + #include "alex.h" -#include "include/standalone_utility.h" + +#include "file_util.h" +#include "psu-util/progress.h" +#include "psu-util/timer.h" typedef uint64_t key_type; typedef uint64_t value_type; @@ -16,190 +21,124 @@ struct query { key_type upper_bound; }; -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::pair *insert_vec = new std::pair[count]; - Alex *alex = new Alex(); +static void insert_records(Alex *structure, size_t start, size_t stop, + std::vector &records, std::vector &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { - size_t cnt = 0; - record rec; - while (cnt < count && next_record(file, rec)) { - insert_vec[cnt] = {rec.key, rec.value}; - cnt++; - } + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i].key, records[i].value); - std::sort(insert_vec, insert_vec + count); + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { - alex->bulk_load(insert_vec, count); - delete[] insert_vec; + structure->erase_one(records[i].key); + delete_idx++; + g_deleted_records++; + } + } - return alex; + psudb::progress_update(1, "Insert Progress"); } +size_t g_global_cnt = 0; -static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector &to_delete, bool binary=false) { - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - size_t BATCH=1000; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i &queries) { + for (size_t i=0; ifind(queries[i].lower_bound); + while (ptr != alex->end() && ptr.key() <= queries[i].upper_bound) { + cnt++; + ptr++; } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); - } - - progress_update(1.0, "inserting:"); - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - fprintf(stdout, "%ld\t", throughput); + g_global_cnt += cnt; + } } +Alex *warmup_alex(std::vector records, size_t cnt) { + if (cnt >= records.size()) { + fprintf(stderr, "[E] Requesting warmup with more records than are available.\n"); + exit(EXIT_FAILURE); + } + auto alex = new Alex(); + std::pair *insert_vec = new std::pair[cnt]; -static void alex_rq_bench(Alex &alex, std::vector queries, size_t trial_cnt=1) -{ - char progbuf[25]; - sprintf(progbuf, "sampling:"); - - size_t batch_size = 100; - size_t batches = trial_cnt / batch_size; - size_t total_time = 0; - - std::vector result_set; - - for (int i=0; i(stop - start).count(); + for (size_t i=0; ibulk_load(insert_vec, cnt); + delete[] insert_vec; - fprintf(stdout, "%ld\t", latency); + return alex; } int main(int argc, char **argv) { - if (argc < 5) { - fprintf(stderr, "Usage: alex_rq_bench \n"); + if (argc < 4) { + usage(argv[0]); exit(EXIT_FAILURE); } - std::string filename = std::string(argv[1]); - size_t record_count = atol(argv[2]); - double delete_prop = atof(argv[3]); - std::string qfilename = std::string(argv[4]); + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - size_t buffer_cap = 12000; - size_t scale_factor = 6; - double max_delete_prop = delete_prop; - bool use_osm = false; - double insert_batch = 0.8; + auto data = read_sosd_file(d_fname, n); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(qfilename, .0001); + auto queries = read_range_queries(q_fname, .001); - std::fstream datafile; - datafile.open(filename, std::ios::in | std::ios::binary); - std::vector to_delete; + size_t warmup = .1 * n; + size_t delete_idx = 0; - // warm up the tree with initial_insertions number of initially inserted - // records - size_t warmup_cnt = insert_batch * record_count; - auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + auto alex = warmup_alex(data, warmup); - fprintf(stderr, "Size: %ld\n", alex->size()); - size_t insert_cnt = record_count - warmup_cnt; + TIMER_INIT(); - alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); - size_t memory_usage = alex->model_size() + alex->data_size(); + TIMER_START(); + insert_records(alex, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); - fprintf(stderr, "Size: %ld\n", alex->size()); - fprintf(stdout, "%ld\t", memory_usage); + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - alex_rq_bench(*alex, queries); - fprintf(stdout, "\n"); + TIMER_START(); + run_queries(alex, queries); + TIMER_STOP(); - delete_bench_env(); - delete alex; + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = alex->model_size() + alex->data_size() - (alex->size() * sizeof(record)); + + fprintf(stdout, "%ld\t%ld\t%lld\t%ld\n", insert_throughput, query_latency, ext_size, g_global_cnt); fflush(stdout); + + gsl_rng_free(rng); fflush(stderr); + delete alex; + exit(EXIT_SUCCESS); } -- cgit v1.2.3 From 47a386b50d904d3f1b7ce3cfc13c29ea96dd1e43 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Apr 2024 14:18:08 -0400 Subject: Added VPTree BSM benchmark --- benchmarks/include/standard_benchmarks.h | 39 +++- benchmarks/include/triespline_bsm.h | 3 +- benchmarks/include/vptree_bsm.h | 317 +++++++++++++++++++++++++++++++ benchmarks/vldb/mtree_bench.cpp | 2 +- benchmarks/vldb/vptree_bench.cpp | 6 +- 5 files changed, 362 insertions(+), 5 deletions(-) create mode 100644 benchmarks/include/vptree_bsm.h (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index aaef679..76b5f7c 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -34,6 +34,14 @@ static void run_queries(DE *extension, std::vector &queries) { result.emplace_back(itr->data); itr++; } + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (auto &r : result) { + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + #endif } else if constexpr (std::is_same_v) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); @@ -44,7 +52,26 @@ static void run_queries(DE *extension, std::vector &queries) { } else { auto res = extension->query(&queries[i]); if constexpr (!BSM) { - auto r = res.get(); + auto result = res.get(); + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=result.size()-1; i>=0; i--) { + auto &r = result[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif + } else { + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif } } } @@ -73,6 +100,16 @@ static void run_static_queries(S *shard, std::vector &queries) { Q::process_query_states(q, states, nullptr); auto res = Q::query(shard, state, q); + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i].rec; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif } } diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 4276074..dfddc43 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -2,6 +2,7 @@ #include #include +#include #include "ts/builder.h" template @@ -52,7 +53,7 @@ public: return std::move(rs); } - std::vector query_merge(std::vector &rsa, std::vector &rsb) { + std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { rsa[0].first += rsb[0].first; return std::move(rsa); } diff --git a/benchmarks/include/vptree_bsm.h b/benchmarks/include/vptree_bsm.h new file mode 100644 index 0000000..2f12fcb --- /dev/null +++ b/benchmarks/include/vptree_bsm.h @@ -0,0 +1,317 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "psu-ds/PriorityQueue.h" +#include "framework/interface/Record.h" + +template +class BSMVPTree { +public: + struct KNNQueryParms { + R point; + size_t k; + }; + +public: + static BSMVPTree *build(std::vector &records) { + return new BSMVPTree(records); + } + + static BSMVPTree *build_presorted(std::vector &records) { + return new BSMVPTree(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMVPTree::KNNQueryParms*) q; + auto pq = psudb::PriorityQueue>(parms->k, &parms->point); + + if (parms->k >= m_data.size()) { + for (size_t i=0; i::max(); + internal_search(m_root, parms->point, parms->k, pq, &farthest); + } + + size_t i=0; + while (pq.size() > 0) { + rs.push_back(*pq.peek().data); + pq.pop(); + } + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb, void* parms) { + KNNQueryParms *p = (KNNQueryParms *) parms; + R rec = p->point; + size_t k = p->k; + + std::vector output; + + psudb::PriorityQueue> pq(k, &rec); + + for (size_t i=0; icalc_distance(rec); + double cur_dist = rsa[i].calc_distance(rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&rsa[i]); + } + } + } + + for (size_t i=0; icalc_distance(rec); + double cur_dist = rsb[i].calc_distance(rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&rsb[i]); + } + } + } + + while (pq.size() > 0) { + output.emplace_back(*pq.peek().data); + pq.pop(); + } + + return std::move(output); + } + + size_t record_count() { + return m_data.size(); + } + + ~BSMVPTree() { + delete m_root; + } + +private: + + struct vp_ptr { + R *ptr; + double dist; + }; + + struct vpnode { + size_t start; + size_t stop; + bool leaf; + + double radius; + vpnode *inside; + vpnode *outside; + + vpnode() : start(0), stop(0), leaf(false), radius(0.0), inside(nullptr), outside(nullptr) {} + + ~vpnode() { + delete inside; + delete outside; + } + }; + + std::vector m_data; + std::vector m_ptrs; + vpnode *m_root; + + size_t m_node_cnt; + + BSMVPTree(std::vector &records) { + m_data = std::move(records); + m_node_cnt = 0; + + for (size_t i=0; i stop) { + return nullptr; + } + + /* base-case: create a leaf node */ + if (stop - start <= LEAFSZ) { + vpnode *node = new vpnode(); + node->start = start; + node->stop = stop; + node->leaf = true; + + m_node_cnt++; + return node; + } + + /* + * select a random element to be the root of the + * subtree + */ + auto i = start + gsl_rng_uniform_int(rng, stop - start + 1); + swap(start, i); + + /* for efficiency, we'll pre-calculate the distances between each point and the root */ + for (size_t i=start+1; i<=stop; i++) { + m_ptrs[i].dist = m_ptrs[start].ptr->calc_distance(*m_ptrs[i].ptr); + } + + /* + * partition elements based on their distance from the start, + * with those elements with distance falling below the median + * distance going into the left sub-array and those above + * the median in the right. This is easily done using QuickSelect. + */ + auto mid = (start + 1 + stop) / 2; + quickselect(start + 1, stop, mid, m_ptrs[start].ptr, rng); + + /* Create a new node based on this partitioning */ + vpnode *node = new vpnode(); + node->start = start; + + /* store the radius of the circle used for partitioning the node. */ + node->radius = m_ptrs[start].ptr->calc_distance(*m_ptrs[mid].ptr); + m_ptrs[start].dist = node->radius; + + /* recursively construct the left and right subtrees */ + node->inside = build_subtree(start + 1, mid-1, rng); + node->outside = build_subtree(mid, stop, rng); + + m_node_cnt++; + + return node; + } + + void quickselect(size_t start, size_t stop, size_t k, R *p, gsl_rng *rng) { + if (start == stop) return; + + auto pivot = partition(start, stop, p, rng); + + if (k < pivot) { + quickselect(start, pivot - 1, k, p, rng); + } else if (k > pivot) { + quickselect(pivot + 1, stop, k, p, rng); + } + } + + size_t partition(size_t start, size_t stop, R *p, gsl_rng *rng) { + auto pivot = start + gsl_rng_uniform_int(rng, stop - start); + + swap(pivot, stop); + + size_t j = start; + for (size_t i=start; i> &pq, double *farthest) { + + if (node == nullptr) return; + + if (node->leaf) { + for (size_t i=node->start; i<=node->stop; i++) { + double d = point.calc_distance(*m_ptrs[i].ptr); + if (d < *farthest) { + if (pq.size() == k) { + pq.pop(); + } + + pq.push(m_ptrs[i].ptr); + if (pq.size() == k) { + *farthest = point.calc_distance(*pq.peek().data); + } + } + } + + return; + } + + double d = point.calc_distance(*m_ptrs[node->start].ptr); + + if (d < *farthest) { + if (pq.size() == k) { + auto t = pq.peek().data; + pq.pop(); + } + pq.push(m_ptrs[node->start].ptr); + if (pq.size() == k) { + *farthest = point.calc_distance(*pq.peek().data); + } + } + + if (d < node->radius) { + if (d - (*farthest) <= node->radius) { + internal_search(node->inside, point, k, pq, farthest); + } + + if (d + (*farthest) >= node->radius) { + internal_search(node->outside, point, k, pq, farthest); + } + } else { + if (d + (*farthest) >= node->radius) { + internal_search(node->outside, point, k, pq, farthest); + } + + if (d - (*farthest) <= node->radius) { + internal_search(node->inside, point, k, pq, farthest); + } + } + } +}; diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp index 35f56be..60425da 100644 --- a/benchmarks/vldb/mtree_bench.cpp +++ b/benchmarks/vldb/mtree_bench.cpp @@ -46,7 +46,7 @@ int main(int argc, char **argv) { } } fprintf(stderr, "[I] Reading Queries\n"); - auto queries = read_knn_queries(q_fname, 10); + auto queries = read_knn_queries(q_fname, 1000); fprintf(stderr, "[I] Warming up structure...\n"); /* warmup structure w/ 10% of records */ diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp index b17a57b..0b98a52 100644 --- a/benchmarks/vldb/vptree_bench.cpp +++ b/benchmarks/vldb/vptree_bench.cpp @@ -38,7 +38,7 @@ int main(int argc, char **argv) { std::string d_fname = std::string(argv[2]); std::string q_fname = std::string(argv[3]); - auto extension = new Ext(100, 1000, 8, 0, 64); + auto extension = new Ext(1400, 1400, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); fprintf(stderr, "[I] Reading data file...\n"); @@ -53,7 +53,7 @@ int main(int argc, char **argv) { } } fprintf(stderr, "[I] Reading Queries\n"); - auto queries = read_knn_queries(q_fname, 10); + auto queries = read_knn_queries(q_fname, 1000); fprintf(stderr, "[I] Warming up structure...\n"); /* warmup structure w/ 10% of records */ @@ -82,6 +82,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); + fprintf(stderr, "Running Static query tests\n\n"); TIMER_START(); run_static_queries(shard, queries); TIMER_STOP(); @@ -96,5 +97,6 @@ int main(int argc, char **argv) { gsl_rng_free(rng); delete extension; fflush(stderr); + fflush(stdout); } -- cgit v1.2.3 From 56e7a202b492fa26137b381eea73e4c773df069d Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Apr 2024 14:50:53 -0400 Subject: VPTree BSM Benchmark --- benchmarks/vldb/vptree_bsm_bench.cpp | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 benchmarks/vldb/vptree_bsm_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp new file mode 100644 index 0000000..0798ec2 --- /dev/null +++ b/benchmarks/vldb/vptree_bsm_bench.cpp @@ -0,0 +1,74 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "vptree_bsm.h" +#include "file_util.h" +#include "standard_benchmarks.h" +#include "query/knn.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef BSMVPTree Shard; +typedef de::knn::Parms QP; +typedef psudb::bsm::BentleySaxe Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file(d_fname, n); + auto queries = read_knn_queries(q_fname, 1000); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records(extension, 0, warmup, data); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + -- cgit v1.2.3 From 160692f4b9f80c6eba7d18d3221fc1c3e3c3139e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 13:31:24 -0400 Subject: Added error checks to file opening, and generalized key types --- benchmarks/include/file_util.h | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 8dc1b5f..b5b3417 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -15,6 +15,12 @@ static std::vector read_lookup_queries(std::string fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -34,6 +40,12 @@ static std::vector read_range_queries(std::string &fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -58,6 +70,11 @@ static std::vector read_knn_queries(std::string fname, size_t k) { char *line = NULL; size_t len = 0; + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + while (getline(&line, &len, qf) > 0) { char *token; QP query; @@ -83,10 +100,15 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records(n); for (size_t i=0; i> read_sosd_file_pair(std::string &fname, size std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector> records(n); for (size_t i=0; i read_vector_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records; records.reserve(n); -- cgit v1.2.3 From 5636838a6e64760c291b00107657a90428a0f9e1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 15:59:48 -0400 Subject: File Util: fixed the reading in of undesired queries --- benchmarks/include/file_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index b5b3417..ebcf17e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -49,7 +49,7 @@ static std::vector read_range_queries(std::string &fname, double selectivity size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { + if (start < stop && std::abs(sel - selectivity) < 0.00001) { QP q; q.lower_bound = start; q.upper_bound = stop; -- cgit v1.2.3 From 349cfd5090f586b7ec189b72c00786522199fe34 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:03:19 -0400 Subject: TS BSM Adjustments --- benchmarks/include/standard_benchmarks.h | 39 +++++++++++++++++++++++++++++++- benchmarks/include/triespline_bsm.h | 31 ++++++++++++++++++++----- benchmarks/vldb/ts_bsm_bench.cpp | 19 ++++++++++++---- 3 files changed, 78 insertions(+), 11 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 76b5f7c..1261a4c 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -22,6 +22,19 @@ static size_t g_deleted_records = 0; static double delete_proportion = 0.05; +static volatile size_t total = 0; + +template +static void run_queries(DE *extension, DE *ghost, std::vector &queries) { + for (size_t i=0; i res = extension->query(&queries[i]); + std::vector negres = ghost->query(&queries[i]); + auto result = res[0].first - negres[0].first; + total = result; + } +} + + template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; i &queries) { fflush(stdout); #endif } else { + total = res.size(); #ifdef BENCH_PRINT_RESULTS fprintf(stdout, "\n\n"); for (int i=res.size()-1; i>=0; i--) { @@ -123,7 +137,6 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, size_t start, size_t stop, std::vector &records) { psudb::progress_update(0, "Insert Progress"); - size_t reccnt = 0; for (size_t i=start; iinsert(records[i]); } @@ -132,6 +145,30 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, } +template +static void insert_records(psudb::bsm::BentleySaxe *extension, + psudb::bsm::BentleySaxe *ghost, + size_t start, size_t stop, std::vector &records, + std::vector &to_delete, size_t &delete_idx, + gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i]); + + if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + ghost->insert(records[to_delete[delete_idx]]); + delete_idx++; + g_deleted_records++; + } + + } + +} + + template static void insert_records(DE *structure, size_t start, size_t stop, std::vector &records, std::vector &to_delete, diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index dfddc43..eaf1079 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -18,11 +18,19 @@ public: public: static BSMTrieSpline *build(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + std::sort(records.begin(), records.end()); return new BSMTrieSpline(records); } static BSMTrieSpline *build_presorted(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + return new BSMTrieSpline(records); } @@ -44,8 +52,9 @@ public: size_t idx = lower_bound(parms->lower_bound); size_t cnt = 0; - while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + while (idx < m_data.size() && m_data[idx].first <= parms->upper_bound) { cnt++; + idx++; } rs.push_back({cnt, 0}); @@ -54,6 +63,10 @@ public: } std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { + /* initialize rsa on the first merge */ + if (rsa.size() == 0) { + rsa.push_back({0, 0}); + } rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -75,6 +88,10 @@ private: m_min_key = m_data[0].first; m_max_key = m_data[m_data.size() - 1].first; + if (m_data.size() < 50) { + return; + } + auto bldr = ts::Builder(m_min_key, m_max_key, E); for (size_t i=0; i= key) { + return i; + } } + + return m_data.size(); } auto bound = m_ts.GetSearchBound(key); diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp index 941e3da..049fd35 100644 --- a/benchmarks/vldb/ts_bsm_bench.cpp +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -37,26 +37,37 @@ int main(int argc, char **argv) { std::string q_fname = std::string(argv[3]); auto extension = new psudb::bsm::BentleySaxe(); + auto ghost = new psudb::bsm::BentleySaxe(); gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); auto data = read_sosd_file_pair(d_fname, n); - auto queries = read_range_queries(q_fname, .001); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, .0001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; - insert_records(extension, 0, warmup, data); + size_t delete_idx = 0; + insert_records(extension, ghost, 0, warmup, data, to_delete, + delete_idx, rng); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data); + insert_records(extension, ghost, warmup, data.size(), data, + to_delete, delete_idx, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries); + run_queries(extension, ghost, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); -- cgit v1.2.3 From ef2ec17c21cb331c37f25501394b009282604fcf Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:06:20 -0400 Subject: Adjusted selectivity for range benches down to .0001 --- benchmarks/vldb/alex_bench.cpp | 2 +- benchmarks/vldb/btree_bench.cpp | 2 +- benchmarks/vldb/dynamic_pgm_bench.cpp | 2 +- benchmarks/vldb/irs_bench.cpp | 2 +- benchmarks/vldb/pgm_bench.cpp | 2 +- benchmarks/vldb/ts_bench.cpp | 5 +++-- benchmarks/vldb/ts_mdsp_bench.cpp | 2 +- 7 files changed, 9 insertions(+), 8 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp index 76df410..ba687f3 100644 --- a/benchmarks/vldb/alex_bench.cpp +++ b/benchmarks/vldb/alex_bench.cpp @@ -107,7 +107,7 @@ int main(int argc, char **argv) } } - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); size_t warmup = .1 * n; diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp index 6d1be9f..673da33 100644 --- a/benchmarks/vldb/btree_bench.cpp +++ b/benchmarks/vldb/btree_bench.cpp @@ -50,7 +50,7 @@ int main(int argc, char **argv) { } } /* read in the range queries and add sample size and rng for sampling */ - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); for (auto &q : queries) { q.sample_size = 1000; q.rng = rng; diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp index 580fa93..15b130f 100644 --- a/benchmarks/vldb/dynamic_pgm_bench.cpp +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) { to_delete[j++] = i; } } - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp index dabe79e..f68de63 100644 --- a/benchmarks/vldb/irs_bench.cpp +++ b/benchmarks/vldb/irs_bench.cpp @@ -49,7 +49,7 @@ int main(int argc, char **argv) { } } /* read in the range queries and add sample size and rng for sampling */ - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); for (auto &q : queries) { q.sample_size = 1000; q.rng = rng; diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp index 498ef8f..cec95df 100644 --- a/benchmarks/vldb/pgm_bench.cpp +++ b/benchmarks/vldb/pgm_bench.cpp @@ -50,7 +50,7 @@ int main(int argc, char **argv) { to_delete[j++] = i; } } - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp index 5a4cc13..81a430a 100644 --- a/benchmarks/vldb/ts_bench.cpp +++ b/benchmarks/vldb/ts_bench.cpp @@ -3,6 +3,7 @@ */ #define ENABLE_TIMER +#define TS_TEST #include @@ -39,7 +40,7 @@ int main(int argc, char **argv) { std::string d_fname = std::string(argv[2]); std::string q_fname = std::string(argv[3]); - auto extension = new Ext(12000, 12001, 8, 0, 64); + auto extension = new Ext(8000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); auto data = read_sosd_file(d_fname, n); @@ -50,7 +51,7 @@ int main(int argc, char **argv) { to_delete[j++] = i; } } - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp index 44c641d..cc0cd99 100644 --- a/benchmarks/vldb/ts_mdsp_bench.cpp +++ b/benchmarks/vldb/ts_mdsp_bench.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) { gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); auto data = read_sosd_file_pair(d_fname, n); - auto queries = read_range_queries(q_fname, .001); + auto queries = read_range_queries(q_fname, .0001); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; -- cgit v1.2.3 From e198d64ca87f6fc05e8d62efdf720f7b2e8a8004 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 3 May 2024 09:58:13 -0400 Subject: Switched to using framework-BSM mode for Bentley-Saxe benchmarks --- benchmarks/vldb/ts_bsm_bench.cpp | 52 +++++++++++++++++++++++------------- benchmarks/vldb/vptree_bsm_bench.cpp | 46 ++++++++++++++++++++++++------- 2 files changed, 70 insertions(+), 28 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp index 049fd35..4511350 100644 --- a/benchmarks/vldb/ts_bsm_bench.cpp +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -3,23 +3,27 @@ */ #define ENABLE_TIMER +#define TS_TEST #include -#include "triespline_bsm.h" -#include "psu-util/bentley-saxe.h" +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" #include "framework/interface/Record.h" #include "file_util.h" -#include "query/rangecount.h" -#include "psu-util/timer.h" #include "standard_benchmarks.h" -typedef std::pair Rec; -typedef de::Record FRec; +#include -typedef BSMTrieSpline Shard; -typedef de::rc::Parms QP; -typedef psudb::bsm::BentleySaxe Ext; +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::TrieSpline Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; +typedef de::rc::Parms QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -36,11 +40,10 @@ int main(int argc, char **argv) { std::string d_fname = std::string(argv[2]); std::string q_fname = std::string(argv[3]); - auto extension = new psudb::bsm::BentleySaxe(); - auto ghost = new psudb::bsm::BentleySaxe(); - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file_pair(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(extension, ghost, 0, warmup, data, to_delete, - delete_idx, rng); + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, ghost, warmup, data.size(), data, - to_delete, delete_idx, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, ghost, queries); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp index 0798ec2..8e6f795 100644 --- a/benchmarks/vldb/vptree_bsm_bench.cpp +++ b/benchmarks/vldb/vptree_bsm_bench.cpp @@ -4,10 +4,12 @@ #define ENABLE_TIMER -#include "vptree_bsm.h" +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" #include "file_util.h" #include "standard_benchmarks.h" -#include "query/knn.h" #include @@ -16,9 +18,10 @@ typedef Word2VecRec Rec; -typedef BSMVPTree Shard; +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; typedef de::knn::Parms QP; -typedef psudb::bsm::BentleySaxe Ext; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -35,23 +38,36 @@ int main(int argc, char **argv) { std::string d_fname = std::string(argv[2]); std::string q_fname = std::string(argv[3]); - auto extension = new Ext(); + auto extension = new Ext(1, 1400, 2, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); fprintf(stderr, "[I] Reading data file...\n"); auto data = read_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 1000); fprintf(stderr, "[I] Warming up structure...\n"); /* warmup structure w/ 10% of records */ size_t warmup = .1 * n; - insert_records(extension, 0, warmup, data); + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); TIMER_INIT(); fprintf(stderr, "[I] Running Insertion Benchmark\n"); TIMER_START(); - insert_records(extension, warmup, data.size(), data); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); @@ -59,12 +75,24 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries(extension, queries); + run_queries(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; -- cgit v1.2.3 From 675cf7f7558ebaef15f398d90cc3d1d91457b219 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 3 May 2024 11:01:47 -0400 Subject: FST benchmarks --- benchmarks/include/file_util.h | 45 ++++++++++++++++- benchmarks/vldb/fst_bench.cpp | 100 ++++++++++++++++++++++++++++++++++++++ benchmarks/vldb/fst_bsm_bench.cpp | 100 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 benchmarks/vldb/fst_bench.cpp create mode 100644 benchmarks/vldb/fst_bsm_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -7,8 +9,10 @@ #include #include #include +#include + +#include "psu-util/progress.h" -#pragma once template static std::vector read_lookup_queries(std::string fname, double selectivity) { @@ -35,6 +39,20 @@ static std::vector read_lookup_queries(std::string fname, double selectivity return queries; } +template +static std::vector generate_string_lookup_queries(std::vector> &strings, size_t cnt, gsl_rng *rng) { + std::vector queries; + + for (size_t i=0; i static std::vector read_range_queries(std::string &fname, double selectivity) { std::vector queries; @@ -173,3 +191,28 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp new file mode 100644 index 0000000..276a922 --- /dev/null +++ b/benchmarks/vldb/fst_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::FSTrie Shard; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; +typedef de::pl::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries(strings, 1000, rng); + + std::vector data; + for (size_t i=0; i to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp new file mode 100644 index 0000000..15a441a --- /dev/null +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::FSTrie Shard; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; +typedef de::pl::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries(strings, 1000, rng); + + std::vector data; + for (size_t i=0; i to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3 From 01729c8772f3e25bce18f0b1fbfeee308b4c4d9f Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 8 May 2024 13:11:32 -0400 Subject: VPTree BSM: Added extra tab to keep numbers from running together --- benchmarks/vldb/vptree_bsm_bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp index 8e6f795..4a7fcb6 100644 --- a/benchmarks/vldb/vptree_bsm_bench.cpp +++ b/benchmarks/vldb/vptree_bsm_bench.cpp @@ -92,7 +92,7 @@ int main(int argc, char **argv) { auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); - fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + fprintf(stdout, "%ld\t%ld\t\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); gsl_rng_free(rng); delete extension; -- cgit v1.2.3 From a23bc3341923509be9b2f587ece8cd5a650f6386 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 8 May 2024 13:20:44 -0400 Subject: TSParmsweep: enabled forcing a full buffer scan --- benchmarks/vldb/ts_parmsweep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp index fd71e11..2c9412a 100644 --- a/benchmarks/vldb/ts_parmsweep.cpp +++ b/benchmarks/vldb/ts_parmsweep.cpp @@ -18,7 +18,7 @@ typedef de::Record Rec; typedef de::TrieSpline Shard; -typedef de::rc::Query Q; +typedef de::rc::Query Q; typedef de::DynamicExtension Ext; typedef de::DynamicExtension Ext2; typedef de::rc::Parms QP; -- cgit v1.2.3 From 265610435e1164a9acc39ca02ea1139acd37c46c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 9 May 2024 14:10:29 -0400 Subject: Added benchmarks for BigANN --- benchmarks/include/benchmark_types.h | 13 ++++ benchmarks/include/file_util.h | 76 +++++++++++++++++++++++ benchmarks/include/standard_benchmarks.h | 19 ++++-- benchmarks/vldb/btree_bench.cpp | 2 +- benchmarks/vldb/irs_bench.cpp | 2 +- benchmarks/vldb/mtree_bench_alt.cpp | 80 ++++++++++++++++++++++++ benchmarks/vldb/vptree_bench_alt.cpp | 102 +++++++++++++++++++++++++++++++ benchmarks/vldb/vptree_bsm_bench_alt.cpp | 92 ++++++++++++++++++++++++++++ 8 files changed, 379 insertions(+), 7 deletions(-) create mode 100644 benchmarks/vldb/mtree_bench_alt.cpp create mode 100644 benchmarks/vldb/vptree_bench_alt.cpp create mode 100644 benchmarks/vldb/vptree_bsm_bench_alt.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 13964e8..51fc52d 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -35,6 +35,9 @@ typedef psudb::BTree, btree_key_extract< const size_t W2V_SIZE = 300; typedef de::EuclidPoint Word2VecRec; +const size_t ANNSize = 128; +typedef de::EuclidPoint ANNRec; + struct euclidean_distance { double operator()(const Word2VecRec &first, const Word2VecRec &second) const { double dist = 0; @@ -44,11 +47,21 @@ struct euclidean_distance { return std::sqrt(dist); } + + double operator()(const ANNRec &first, const ANNRec &second) const { + double dist = 0; + for (size_t i=0; i MTree; +typedef mt::mtree MTree_alt; #endif typedef pgm::DynamicPGMIndex> PGM; diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 586b44f..41eb18c 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -80,6 +80,46 @@ static std::vector read_range_queries(std::string &fname, double selectivity return queries; } + +template +static std::vector read_binary_knn_queries(std::string fname, size_t k, size_t n) { + std::vector queries; + queries.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + for (size_t i=0; i static std::vector read_knn_queries(std::string fname, size_t k) { std::vector queries; @@ -192,6 +232,42 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } +template +static std::vector read_binary_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector records; + records.reserve(n); + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + R rec; + for (size_t i=0; i>read_string_file(std::string fname, size_t n=10000000) { diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 1261a4c..b805c08 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -55,7 +55,16 @@ static void run_queries(DE *extension, std::vector &queries) { r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); } #endif - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { + std::vector result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + }else if constexpr (std::is_same_v) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { @@ -180,7 +189,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v) { structure->insert(records[i]); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { structure->add(records[i]); } else if constexpr (std::is_same_v) { structure->insert_or_assign(records[i].key, records[i].value); @@ -196,7 +205,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v) { structure->erase_one(records[to_delete[delete_idx]].key); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { structure->remove(records[to_delete[delete_idx]]); } else if constexpr (std::is_same_v) { structure->erase(records[to_delete[delete_idx]].key); @@ -255,7 +264,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if constexpr (std::is_same_v) { de_index.erase_one(delete_vec[delete_idx++].key); #ifdef _GNU_SOURCE - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { de_index.remove(delete_vec[delete_idx++]); #endif } else { @@ -266,7 +275,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn // insert the record; #ifdef _GNU_SOURCE - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v || std::is_same_v) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp index 673da33..fa72831 100644 --- a/benchmarks/vldb/btree_bench.cpp +++ b/benchmarks/vldb/btree_bench.cpp @@ -57,7 +57,7 @@ int main(int argc, char **argv) { } /* warmup structure w/ 10% of records */ - size_t warmup = .3 * n; + size_t warmup = .1 * n; size_t delete_idx = 0; insert_records(&btree, 0, warmup, data, to_delete, delete_idx, false, rng); diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp index f68de63..e062e80 100644 --- a/benchmarks/vldb/irs_bench.cpp +++ b/benchmarks/vldb/irs_bench.cpp @@ -56,7 +56,7 @@ int main(int argc, char **argv) { } /* warmup structure w/ 10% of records */ - size_t warmup = .3 * n; + size_t warmup = .1 * n; size_t delete_idx = 0; insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp new file mode 100644 index 0000000..6b08df7 --- /dev/null +++ b/benchmarks/vldb/mtree_bench_alt.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree_alt(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp new file mode 100644 index 0000000..b09ee7d --- /dev/null +++ b/benchmarks/vldb/vptree_bench_alt.cpp @@ -0,0 +1,102 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1400, 1400, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp new file mode 100644 index 0000000..63baf8b --- /dev/null +++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp @@ -0,0 +1,92 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1, 1400, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + -- cgit v1.2.3 From ab0ab297959fcca370e80670e17f90a780607a80 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 10 May 2024 18:35:30 -0400 Subject: MTree structure size --- benchmarks/vldb/mtree_bench.cpp | 4 +++- benchmarks/vldb/mtree_bench_alt.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp index 60425da..cc2f41f 100644 --- a/benchmarks/vldb/mtree_bench.cpp +++ b/benchmarks/vldb/mtree_bench.cpp @@ -71,7 +71,9 @@ int main(int argc, char **argv) { auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size()); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size); gsl_rng_free(rng); delete mtree; diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp index 6b08df7..50c6117 100644 --- a/benchmarks/vldb/mtree_bench_alt.cpp +++ b/benchmarks/vldb/mtree_bench_alt.cpp @@ -71,7 +71,9 @@ int main(int argc, char **argv) { auto query_latency = TIMER_RESULT() / queries.size(); - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size()); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size); gsl_rng_free(rng); delete mtree; -- cgit v1.2.3 From c611e8e56ebe72e09127fff4fb14a08dc3fcb698 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sat, 11 May 2024 12:45:25 -0400 Subject: Added program to sample the binary knn files --- benchmarks/bigann_sample.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 benchmarks/bigann_sample.cpp (limited to 'benchmarks') diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp new file mode 100644 index 0000000..aa12f91 --- /dev/null +++ b/benchmarks/bigann_sample.cpp @@ -0,0 +1,55 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "file_util.h" +#include "benchmark_types.h" + +#include + +typedef ANNRec Rec; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile sampcnt\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + size_t m = atol(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + auto data = read_binary_vector_file(d_fname, n); + + std::vector to_delete(m); + + std::unordered_map> filter; + double ratio = (double) data.size() / (double) m; + size_t j=0; + for (size_t i=0; i Date: Tue, 14 May 2024 16:00:17 -0400 Subject: Poplar Trie: updated benchmark to standard format --- benchmarks/poplar_trie.cpp | 4 ++- benchmarks/string_insertion_tput.cpp | 57 +++++++++++++++++------------------- 2 files changed, 30 insertions(+), 31 deletions(-) (limited to 'benchmarks') diff --git a/benchmarks/poplar_trie.cpp b/benchmarks/poplar_trie.cpp index c85e718..6c47465 100644 --- a/benchmarks/poplar_trie.cpp +++ b/benchmarks/poplar_trie.cpp @@ -61,10 +61,12 @@ int main(int argc, char **argv) { } auto trie = new Trie(); + size_t warmup = strings.size()*.1; + insert_thread(0, warmup, trie); TIMER_INIT(); TIMER_START(); - insert_thread(0, strings.size(), trie); + insert_thread(warmup, strings.size(), trie); TIMER_STOP(); auto total_time = TIMER_RESULT(); diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 8fa7f44..c439cb3 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "framework/DynamicExtension.h" #include "shard/FSTrie.h" @@ -21,7 +22,6 @@ typedef de::FSTrie Trie; typedef de::pl::Query Q; typedef de::DynamicExtension Ext; -std::vector> strings; void insert_thread(int64_t start, int64_t end, Ext *extension) { for (uint64_t i=start; i>read_strings(std::string fname, size_t n=10000000) { + std::vector> strings; strings.reserve(n); std::fstream file; @@ -45,6 +46,8 @@ void read_data(std::string fname, size_t n=10000000) { i++; psudb::progress_update((double) i / (double) n, "Reading file:"); } + + return strings; } void usage(char *name) { @@ -74,44 +77,38 @@ int main(int argc, char **argv) { for (auto &sf : scale_factors) { for (auto &bf_sz : buffer_sizes) { + auto extension = new Ext(bf_sz, bf_sz, sf); - auto extension = new Ext(bf_sz, bf_sz, sf); - - TIMER_INIT(); - TIMER_START(); - insert_thread(0, strings.size(), extension); - TIMER_STOP(); - - auto total_time = TIMER_RESULT(); + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); - size_t m = 100; - TIMER_START(); - for (size_t i=0; i parms = {strings[j].get()}; + auto total_time = TIMER_RESULT(); - auto res = extension->query(&parms); - auto ans = res.get(); - - if (ans[0].value != j) { - fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].get()); - } + size_t m = 100; + TIMER_START(); + for (size_t i=0; i parms = {strings[j].get()}; - assert(ans[0].value == j); - } - TIMER_STOP(); + auto res = extension->query(&parms); + auto ans = res.get(); + } + TIMER_STOP(); - auto query_time = TIMER_RESULT(); - - double i_tput = (double) n / (double) total_time * 1e9; - size_t q_lat = query_time / m; + auto query_time = TIMER_RESULT(); + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); - delete extension; + delete extension; - }} + } + } fflush(stderr); } -- cgit v1.2.3 From c49543e5c23af6bee35c7164ba433fc663c79041 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 14 May 2024 16:04:43 -0400 Subject: Removed patricia trie stuff --- benchmarks/louds_insertion_tput.cpp | 112 ------------------------------------ 1 file changed, 112 deletions(-) delete mode 100644 benchmarks/louds_insertion_tput.cpp (limited to 'benchmarks') diff --git a/benchmarks/louds_insertion_tput.cpp b/benchmarks/louds_insertion_tput.cpp deleted file mode 100644 index d772f3b..0000000 --- a/benchmarks/louds_insertion_tput.cpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include -#include - -#include "framework/DynamicExtension.h" -#include "shard/LoudsPatricia.h" -#include "query/pointlookup.h" -#include "framework/interface/Record.h" - -#include "psu-util/timer.h" -#include "psu-util/progress.h" - - -typedef de::Record Rec; -typedef de::LoudsPatricia Trie; -typedef de::pl::Query Q; -typedef de::DynamicExtension Ext; - -std::vector> strings; - -void insert_thread(int64_t start, int64_t end, Ext *extension) { - for (uint64_t i=start; iinsert(r)) { - _mm_pause(); - } - } -} - -void read_data(std::string fname, size_t n=10000000) { - strings.reserve(n); - - std::fstream file; - file.open(fname, std::ios::in); - - size_t i=0; - std::string line; - while (i < n && std::getline(file, line, '\n')) { - strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); - i++; - psudb::progress_update((double) i / (double) n, "Reading file:"); - } -} - -void usage(char *name) { - fprintf(stderr, "Usage:\n%s datafile record_count\n", name); -} - -int main(int argc, char **argv) { - - if (argc < 3) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - std::string fname = std::string(argv[1]); - size_t n = atol(argv[2]); - - read_data(fname, n); - - if (strings.size() == 0) { - fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); - } else { - fprintf(stderr, "Finished reading from file.\n"); - } - - std::vector scale_factors = {2, 4, 6, 8, 10, 12}; - std::vector buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000}; - - for (auto &sf : scale_factors) { - for (auto &bf_sz : buffer_sizes) { - - auto extension = new Ext(bf_sz, bf_sz, sf); - - TIMER_INIT(); - TIMER_START(); - insert_thread(0, strings.size(), extension); - TIMER_STOP(); - - auto total_time = TIMER_RESULT(); - - size_t m = 100; - TIMER_START(); - for (size_t i=0; i parms = {strings[j].get()}; - - auto res = extension->query(&parms); - auto ans = res.get(); - } - TIMER_STOP(); - - auto query_time = TIMER_RESULT(); - - double i_tput = (double) n / (double) total_time * 1e9; - size_t q_lat = query_time / m; - - fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), - bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); - - delete extension; - - fflush(stderr); - } - } -} - -- cgit v1.2.3 From 096d2e3be15361af90257b69dae4b24f751dcab8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 14 May 2024 16:18:16 -0400 Subject: Moved thread scalability bench to vldb folder --- benchmarks/insert_query_tput.cpp | 128 ------------------------------- benchmarks/vldb/thread_scaling_bench.cpp | 128 +++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 128 deletions(-) delete mode 100644 benchmarks/insert_query_tput.cpp create mode 100644 benchmarks/vldb/thread_scaling_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp deleted file mode 100644 index 29043af..0000000 --- a/benchmarks/insert_query_tput.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "framework/DynamicExtension.h" -#include "shard/ISAMTree.h" -#include "query/irs.h" -#include "framework/interface/Record.h" -#include "include/file_util.h" -#include - -#include - -#include "psu-util/timer.h" - - -typedef de::Record Rec; -typedef de::ISAMTree ISAM; -typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; -typedef de::irs::Parms QP; - -std::atomic inserts_done = false; - -struct timespec delay = {0, 500}; - -void query_thread(Ext *extension, std::vector *queries) { - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - size_t total = 0; - - while (!inserts_done.load()) { - auto q_idx = gsl_rng_uniform_int(rng, queries->size()); - - auto q = (*queries)[q_idx]; - q.rng = rng; - q.sample_size = 1000; - - auto res = extension->query(&q); - auto r = res.get(); - total += r.size(); - nanosleep(&delay, nullptr); - } - - fprintf(stderr, "%ld\n", total); - - gsl_rng_free(rng); -} - -void insert_thread(Ext *extension, size_t start, size_t stop, std::vector *records) { - fprintf(stderr, "%ld\t%ld\n", start, stop); - for (size_t i=start; iinsert((*records)[i])) { - nanosleep(&delay, nullptr); - } - } -} - -int main(int argc, char **argv) { - - if (argc < 6) { - fprintf(stderr, "Usage:\n"); - fprintf(stderr, "%s reccnt insert_threads query_threads datafile queryfile\n", argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - size_t ithread_cnt = atol(argv[2]); - size_t qthread_cnt = atol(argv[3]); - std::string d_fname = std::string(argv[4]); - std::string q_fname = std::string(argv[5]); - - auto extension = new Ext(1000, 12000, 8, 0, 64); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file(d_fname, n); - auto queries = read_range_queries(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - for (size_t i=0; iinsert(data[i])) { - usleep(1); - } - } - - extension->await_next_epoch(); - - TIMER_INIT(); - - std::vector ithreads(ithread_cnt); - std::vector qthreads(qthread_cnt); - - TIMER_START(); - size_t start = warmup; - size_t per_thread = (n - warmup) / ithread_cnt; - for (size_t i=0; i + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::irs::Query Q; +typedef de::DynamicExtension Ext; +typedef de::irs::Parms QP; + +std::atomic inserts_done = false; + +struct timespec delay = {0, 500}; + +void query_thread(Ext *extension, std::vector *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + q.rng = rng; + q.sample_size = 1000; + + auto res = extension->query(&q); + auto r = res.get(); + total += r.size(); + nanosleep(&delay, nullptr); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(Ext *extension, size_t start, size_t stop, std::vector *records) { + fprintf(stderr, "%ld\t%ld\n", start, stop); + for (size_t i=start; iinsert((*records)[i])) { + nanosleep(&delay, nullptr); + } + } +} + +int main(int argc, char **argv) { + + if (argc < 6) { + fprintf(stderr, "Usage:\n"); + fprintf(stderr, "%s reccnt insert_threads query_threads datafile queryfile\n", argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t ithread_cnt = atol(argv[2]); + size_t qthread_cnt = atol(argv[3]); + std::string d_fname = std::string(argv[4]); + std::string q_fname = std::string(argv[5]); + + auto extension = new Ext(1000, 12000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; iinsert(data[i])) { + usleep(1); + } + } + + extension->await_next_epoch(); + + TIMER_INIT(); + + std::vector ithreads(ithread_cnt); + std::vector qthreads(qthread_cnt); + + TIMER_START(); + size_t start = warmup; + size_t per_thread = (n - warmup) / ithread_cnt; + for (size_t i=0; i Date: Tue, 14 May 2024 16:27:42 -0400 Subject: Added btree thread scaling benchmark --- benchmarks/btree_insert_query_tput.cpp | 120 ------------------------- benchmarks/vldb/btree_thread_scaling_bench.cpp | 120 +++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 120 deletions(-) delete mode 100644 benchmarks/btree_insert_query_tput.cpp create mode 100644 benchmarks/vldb/btree_thread_scaling_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/btree_insert_query_tput.cpp b/benchmarks/btree_insert_query_tput.cpp deleted file mode 100644 index f838f80..0000000 --- a/benchmarks/btree_insert_query_tput.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include - -#include "query/irs.h" -#include "include/data-proc.h" -#include "psu-ds/BTree.h" -#include - -#include - -#include "psu-util/timer.h" - - -typedef de::Record Rec; -typedef de::irs::Parms QP; - -std::atomic inserts_done = false; - -std::mutex g_btree_lock; - -void query_thread(BenchBTree *tree, std::vector *queries) { - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - size_t total = 0; - - while (!inserts_done.load()) { - auto q_idx = gsl_rng_uniform_int(rng, queries->size()); - - auto q = (*queries)[q_idx]; - - std::vector result; - g_btree_lock.lock(); - tree->range_sample(q.lower_bound, q.upper_bound, 1000, result, rng); - g_btree_lock.unlock(); - - total += result.size(); - usleep(1); - } - - fprintf(stderr, "%ld\n", total); - - gsl_rng_free(rng); -} - -void insert_thread(BenchBTree *tree, size_t start, std::vector *records) { - size_t reccnt = 0; - for (size_t i=start; isize(); i++) { - btree_record r; - r.key = (*records)[i]; - r.value = i; - - g_btree_lock.lock(); - tree->insert(r); - g_btree_lock.unlock(); - - if (i % 100000 == 0) { - fprintf(stderr, "Inserted %ld records\n", i); - } - } - - inserts_done.store(true); -} - -int main(int argc, char **argv) { - - if (argc < 5) { - fprintf(stderr, "btree_insert_query_tput reccnt query_threads datafile queryfile\n"); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - size_t qthread_cnt = atol(argv[2]); - std::string d_fname = std::string(argv[3]); - std::string q_fname = std::string(argv[4]); - - auto tree = new BenchBTree(); - gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file(d_fname, n); - auto queries = read_range_queries(q_fname, .001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - for (size_t i=0; iinsert(r); - } - - TIMER_INIT(); - - std::vector qthreads(qthread_cnt); - - TIMER_START(); - std::thread i_thrd(insert_thread, tree, warmup, &data); - for (size_t i=0; i + +#include "query/irs.h" +#include "benchmark_types.h" +#include "file_util.h" +#include + +#include + +#include "psu-util/timer.h" + + +typedef btree_record Rec; +typedef de::irs::Parms QP; + +std::atomic inserts_done = false; + +std::mutex g_btree_lock; + +void query_thread(BenchBTree *tree, std::vector *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + + std::vector result; + g_btree_lock.lock(); + tree->range_sample(q.lower_bound, q.upper_bound, 1000, result, rng); + g_btree_lock.unlock(); + + total += result.size(); + usleep(1); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(BenchBTree *tree, size_t start, std::vector *records) { + size_t reccnt = 0; + for (size_t i=start; isize(); i++) { + btree_record r; + r.key = (*records)[i].key; + r.value = i; + + g_btree_lock.lock(); + tree->insert(r); + g_btree_lock.unlock(); + + if (i % 100000 == 0) { + fprintf(stderr, "Inserted %ld records\n", i); + } + } + + inserts_done.store(true); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "btree_insert_query_tput reccnt query_threads datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); + + auto tree = new BenchBTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; i r; + r.key = data[i].key; + r.value = data[i].value; + + tree->insert(r); + } + + TIMER_INIT(); + + std::vector qthreads(qthread_cnt); + + TIMER_START(); + std::thread i_thrd(insert_thread, tree, warmup, &data); + for (size_t i=0; i