From 69f36c9b4f5df19f09156689b333afe29a017eed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 23 Feb 2024 15:40:49 -0500 Subject: Benchmark updates --- benchmarks/include/benchmark_types.h | 50 ++++++ benchmarks/include/btree-util.h | 27 ---- benchmarks/include/data-proc.h | 258 ------------------------------- benchmarks/include/file_util.h | 130 ++++++++++++++++ benchmarks/include/standard_benchmarks.h | 212 +++++++++++++++++++++++++ 5 files changed, 392 insertions(+), 285 deletions(-) create mode 100644 benchmarks/include/benchmark_types.h delete mode 100644 benchmarks/include/btree-util.h delete mode 100644 benchmarks/include/data-proc.h create mode 100644 benchmarks/include/file_util.h create mode 100644 benchmarks/include/standard_benchmarks.h (limited to 'benchmarks/include') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h new file mode 100644 index 0000000..85e9565 --- /dev/null +++ b/benchmarks/include/benchmark_types.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include "psu-ds/BTree.h" +#include "mtree.h" +#include "framework/interface/Record.h" + +/* TLX BTree definitions*/ +template +struct btree_record { + K key; + V value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +template +struct btree_key_extract { + static const K &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree, btree_key_extract> BenchBTree; + + +/*MTree Definitions*/ + +const size_t W2V_SIZE = 300; +typedef de::EuclidPoint Word2VecRec; + +struct euclidean_distance { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double dist = 0; + for (size_t i=0; i MTree; + diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h deleted file mode 100644 index 571c073..0000000 --- a/benchmarks/include/btree-util.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include "psu-ds/BTree.h" - -struct btree_record { - int64_t key; - int64_t value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const int64_t &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - - diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h deleted file mode 100644 index 444cb94..0000000 --- a/benchmarks/include/data-proc.h +++ /dev/null @@ -1,258 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "psu-ds/BTree.h" - -#pragma once - -typedef int64_t key_type; -typedef int64_t value_type; -typedef uint64_t weight_type; - -static gsl_rng *g_rng; -static bool g_osm_data; - -struct btree_record { - key_type key; - value_type value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const key_type &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - -static key_type g_min_key = UINT64_MAX; -static key_type g_max_key = 0; - -static size_t g_max_record_cnt = 0; -static size_t g_reccnt = 0; - -static constexpr unsigned int DEFAULT_SEED = 0; - -static unsigned int get_random_seed() -{ - unsigned int seed = 0; - std::fstream urandom; - urandom.open("/dev/urandom", std::ios::in|std::ios::binary); - urandom.read((char *) &seed, sizeof(seed)); - urandom.close(); - - return seed; -} - -static key_type osm_to_key(const char *key_field) { - double tmp_key = (atof(key_field) + 180) * 10e6; - return (key_type) tmp_key; -} - -static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) -{ - g_rng = gsl_rng_alloc(type); - gsl_rng_set(g_rng, seed); -} - -static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) -{ - unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; - init_bench_rng(seed, gsl_rng_mt19937); - g_osm_data = osm_correction; - g_max_record_cnt = max_reccnt; - g_reccnt = 0; -} - -static void delete_bench_env() -{ - gsl_rng_free(g_rng); -} - - -template -static std::vector read_lookup_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.target_key = start; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_range_queries(std::string &fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.lower_bound = start; - q.upper_bound = stop; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_knn_queries(std::string fname, size_t k) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - char *line = NULL; - size_t len = 0; - - while (getline(&line, &len, qf) > 0) { - char *token; - QP query; - size_t idx = 0; - - token = strtok(line, " "); - do { - query.point.data[idx++] = atof(token); - } while ((token = strtok(NULL, " "))); - - query.k = k; - queries.emplace_back(query); - } - - free(line); - fclose(qf); - - return queries; -} - -/* - * NOTE: The QP type must have lower_bound and upper_bound attributes, which - * this function will initialize. Any other query parameter attributes must - * be manually initialized after the call. - */ -template -static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - for (size_t i=0; i<300; i++) { - std::string dimension; - - std::getline(line_stream, dimension, ' '); - record.data[i] = atof(dimension.c_str()); - } - - g_reccnt++; - - return true; - } - - return false; - -} - -template -static bool next_record(std::fstream &file, R &record, bool binary=false) -{ - static value_type value = 1; - if (g_reccnt >= g_max_record_cnt) return false; - - if (binary) { - if (file.good()) { - decltype(R::key) key; - - file.read((char*) &key, sizeof(key)); - record.key = key; - record.value = value; - value++; - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - return true; - } - - return false; - } - - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - std::string key_field; - std::string value_field; - std::string weight_field; - - std::getline(line_stream, value_field, '\t'); - std::getline(line_stream, key_field, '\t'); - std::getline(line_stream, weight_field, '\t'); - - record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record.value = atol(value_field.c_str()); - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - g_reccnt++; - - return true; - } - - return false; -} - -template -static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { - vec.clear(); - - size_t cnt = 0; - while (cnt < n) { - if (to_delete.size() == 0) { - return false; - } - - auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); - vec.emplace_back(to_delete[i]); - to_delete.erase(to_delete.begin() + i); - } -td: - return true; -} - -static std::vector read_sosd_file(std::string &fname, size_t n) { - std::fstream file; - file.open(fname, std::ios::in | std::ios::binary); - - std::vector records(n); - for (size_t i=0; i +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/interface/Record.h" +#include "query/irs.h" + +#pragma once + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +template +static std::vector read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector records(n); + for (size_t i=0; i +static std::vector read_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + std::vector records; + records.reserve(n); + + for (size_t i=0; i + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include +#include +#include + +#include "framework/DynamicExtension.h" +#include "framework/interface/Query.h" +#include "psu-util/progress.h" +#include "benchmark_types.h" + +static size_t g_deleted_records = 0; +static double delete_proportion = 0.05; + +template +static void run_queries(DE *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; iquery(q); + auto r = res.get(); + total += r.size(); + } +} + + +template +static void insert_records(DE *extension, size_t start, size_t stop, + std::vector &records, std::vector &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { + + while (!extension->erase(records[to_delete[delete_idx]])) { + usleep(1); + } + + delete_idx++; + g_deleted_records++; + } + } + + psudb::progress_update(1, "Insert Progress"); +} + +template +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, gsl_rng *rng, std::vector &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + psudb::progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + + return continue_benchmark; +} + +template +static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template Q, bool PROGRESS=true> +static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} -- cgit v1.2.3 From b1b5ab106122e6917f6b34452be95e617506f05d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 25 Mar 2024 12:54:17 -0400 Subject: Updates for build on OpenBSD Necessary updates to get the codebase building under OpenBSD 7.5 with clang. This is a minimal set of changes to get building to work, which includes disabling several things that aren't directly compatable. More work will be necessary to get full functionality. In particular, Triespline, PGM, and the reference M-tree do not currently build on OpenBSD with clang due to GNU dependencies or other gcc specific features. --- benchmarks/include/benchmark_types.h | 4 +++- benchmarks/include/standard_benchmarks.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 85e9565..fcdeac6 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -2,7 +2,6 @@ #include #include "psu-ds/BTree.h" -#include "mtree.h" #include "framework/interface/Record.h" /* TLX BTree definitions*/ @@ -46,5 +45,8 @@ struct euclidean_distance { } }; +#ifdef _GNU_SOURCE +#include "mtree.h" typedef mt::mtree MTree; +#endif diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index a42cdd6..5fc549d 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -102,8 +102,10 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) { if constexpr (std::is_same_v) { de_index.erase_one(delete_vec[delete_idx++].key); + #ifdef _GNU_SOURCE } else if constexpr (std::is_same_v) { de_index.remove(delete_vec[delete_idx++]); + #endif } else { de_index.erase(delete_vec[delete_idx++]); } @@ -111,11 +113,16 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn } // insert the record; + #ifdef _GNU_SOURCE if constexpr (std::is_same_v) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); } + #else + de_index.insert(insert_vec[i]); + #endif + applied_inserts++; } auto insert_stop = std::chrono::high_resolution_clock::now(); -- cgit v1.2.3 From 7c2f43ff039795576bc0014c367b893fbbaceca4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 14:39:33 -0400 Subject: Benchmark updates --- benchmarks/include/standard_benchmarks.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 5fc549d..fe53d62 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -21,7 +21,7 @@ static size_t g_deleted_records = 0; static double delete_proportion = 0.05; template -static void run_queries(DE *extension, std::vector &queries, gsl_rng *rng) { +static void run_queries(DE *extension, std::vector &queries) { size_t total; for (size_t i=0; i &queries, gsl_rng *rng) { } +template +static void run_static_queries(S *shard, std::vector &queries) { + size_t total; + for (size_t i=0; i static void insert_records(DE *extension, size_t start, size_t stop, std::vector &records, std::vector &to_delete, -- cgit v1.2.3 From 8479f3ce863dfb6d3b20ff4678fa6fe92ee86b52 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 16:50:18 -0400 Subject: Fixed some benchmarking bugs --- benchmarks/include/standard_benchmarks.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'benchmarks/include') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index fe53d62..83e3aaa 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -40,6 +40,12 @@ static void run_static_queries(S *shard, std::vector &queries) { auto q = &queries[i]; auto state = Q::get_query_state(shard, q); + + std::vector shards = {shard}; + std::vector states = {state}; + + Q::process_query_states(q, states, nullptr); + auto res = Q::query(shard, state, q); total += res.size(); -- cgit v1.2.3 From 438feac7e56fee425d9c6f1a43298ff9dc5b71d1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 17:38:16 -0400 Subject: Properly implemented support for iteratively decomposable problems --- benchmarks/include/standard_benchmarks.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 83e3aaa..f5af558 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -22,20 +22,17 @@ static double delete_proportion = 0.05; template static void run_queries(DE *extension, std::vector &queries) { - size_t total; for (size_t i=0; iquery(q); auto r = res.get(); - total += r.size(); } } template static void run_static_queries(S *shard, std::vector &queries) { - size_t total; for (size_t i=0; i &queries) { std::vector states = {state}; Q::process_query_states(q, states, nullptr); - auto res = Q::query(shard, state, q); - - total += res.size(); } } -- cgit v1.2.3 From 0bb5b46ec2b64be17f6269631915e62d02e315e4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 12:27:43 -0400 Subject: Added plain BSM and MDSP BSM benchmark --- benchmarks/include/file_util.h | 16 ++++ benchmarks/include/standard_benchmarks.h | 23 +++++- benchmarks/include/triespline_bsm.h | 134 +++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 2 deletions(-) create mode 100644 benchmarks/include/triespline_bsm.h (limited to 'benchmarks/include') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 2a3300a..ea8b42e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -97,6 +97,22 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { return records; } +template +static std::vector> read_sosd_file_pair(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector> records(n); + for (size_t i=0; i +template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; iquery(q); - auto r = res.get(); + if constexpr (!BSM) { + auto r = res.get(); + } } } @@ -47,6 +50,22 @@ static void run_static_queries(S *shard, std::vector &queries) { } +/* + * Insert records into a standard Bentley-Saxe extension. Deletes are not + * supported. + */ +template +static void insert_records(psudb::bsm::BentleySaxe *extension, + size_t start, size_t stop, std::vector &records) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i]); + } + + psudb::progress_update(1, "Insert Progress"); +} template diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h new file mode 100644 index 0000000..9613a62 --- /dev/null +++ b/benchmarks/include/triespline_bsm.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include "ts/builder.h" + +template +class BSMTrieSpline { +private: + typedef std::pair R; + +public: + struct RangeQueryParameters { + K lower_bound; + K upper_bound; + }; + +public: + static BSMTrieSpline *build(std::vector &records) { + std::sort(records.begin(), records.end()); + return new BSMTrieSpline(records); + } + + static BSMTrieSpline *build_presorted(std::vector &records) { + return new BSMTrieSpline(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMTrieSpline::RangeQueryParameters*) q; + + size_t idx = lower_bound(parms->lower_bound); + + while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + rs.emplace_back(m_data[idx++]); + } + + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb) { + rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + return std::move(rsa); + } + + size_t record_count() { + return m_data.size(); + } + + + ~BSMTrieSpline() = default; + + +private: + std::vector m_data; + K m_max_key; + K m_min_key; + ts::TrieSpline m_ts; + + BSMTrieSpline(std::vector &records) { + m_data = std::move(records); + m_min_key = m_data[0].first; + m_max_key = m_data[m_data.size() - 1].first; + + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; i 1) { + m_ts = bldr.Finalize(); + } + } + + size_t lower_bound(K key) { + if (m_data.size() == 0) { + return 1; + } else if (m_data.size() == 1) { + if (m_data[0].first < key) { + return 1; + } else { + return 0; + } + } + + auto bound = m_ts.GetSearchBound(key); + size_t idx = bound.begin; + + if (idx >= m_data.size()) { + return m_data.size(); + } + + // If the region to search is less than some pre-specified + // amount, perform a linear scan to locate the record. + if (bound.end - bound.begin < 256) { + while (idx < bound.end && m_data[idx].first < key) { + idx++; + } + } else { + // Otherwise, perform a binary search + idx = bound.begin; + size_t max = bound.end; + + while (idx < max) { + size_t mid = (idx + max) / 2; + if (key > m_data[mid].first) { + idx = mid + 1; + } else { + max = mid; + } + } + } + + if (idx == m_data.size()) { + return m_data.size(); + } + + if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) { + return idx-1; + } + + return idx; + } +}; -- cgit v1.2.3 From 4a1dde3148e0e84b47c884bc0bb69c60678b4558 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:09:07 -0400 Subject: Benchmark update+reorganization The Alex benchmark isn't updated yet. --- benchmarks/include/benchmark_types.h | 5 ++- benchmarks/include/standard_benchmarks.h | 69 ++++++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 14 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index fcdeac6..13964e8 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -3,8 +3,9 @@ #include #include "psu-ds/BTree.h" #include "framework/interface/Record.h" +#include "pgm/pgm_index_dynamic.hpp" -/* TLX BTree definitions*/ +/* BTree definitions*/ template struct btree_record { K key; @@ -50,3 +51,5 @@ struct euclidean_distance { typedef mt::mtree MTree; #endif +typedef pgm::DynamicPGMIndex> PGM; + diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 74bf93f..aaef679 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -14,6 +14,7 @@ #include "framework/DynamicExtension.h" #include "framework/interface/Query.h" +#include "query/irs.h" #include "psu-util/progress.h" #include "benchmark_types.h" #include "psu-util/bentley-saxe.h" @@ -24,15 +25,41 @@ static double delete_proportion = 0.05; template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; iquery(q); - if constexpr (!BSM) { - auto r = res.get(); + if constexpr (std::is_same_v) { + std::vector result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + } else if constexpr (std::is_same_v) { + size_t tot = 0; + auto ptr = extension->find(queries[i].lower_bound); + while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { + tot++; + ++ptr; + } + } else { + auto res = extension->query(&queries[i]); + if constexpr (!BSM) { + auto r = res.get(); + } } } } +template +static void run_btree_queries(BenchBTree *btree, std::vector> &queries) { + std::vector sample_set; + sample_set.reserve(queries[0].sample_size); + + for (size_t i=0; irange_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng); + } +} + template static void run_static_queries(S *shard, std::vector &queries) { @@ -68,26 +95,42 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, } -template -static void insert_records(DE *extension, size_t start, size_t stop, +template +static void insert_records(DE *structure, size_t start, size_t stop, std::vector &records, std::vector &to_delete, size_t &delete_idx, bool delete_records, gsl_rng *rng) { psudb::progress_update(0, "Insert Progress"); size_t reccnt = 0; for (size_t i=start; iinsert(records[i])) { - psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); - usleep(1); + + if constexpr (std::is_same_v) { + structure->insert(records[i]); + } else if constexpr (std::is_same_v) { + structure->add(records[i]); + } else if constexpr (std::is_same_v) { + structure->insert_or_assign(records[i].key, records[i].value); + } else { + while (!structure->insert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } } if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - while (!extension->erase(records[to_delete[delete_idx]])) { - usleep(1); + if constexpr (std::is_same_v) { + structure->erase_one(records[to_delete[delete_idx]].key); + } else if constexpr (std::is_same_v) { + structure->remove(records[to_delete[delete_idx]]); + } else if constexpr (std::is_same_v) { + structure->erase(records[to_delete[delete_idx]].key); + } else { + while (!structure->erase(records[to_delete[delete_idx]])) { + usleep(1); + } } - delete_idx++; g_deleted_records++; } -- cgit v1.2.3 From fb08023dea10c9a7673ceb2c8ef67718937717b8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:24:27 -0400 Subject: benchmarks/file_util: removed dependency on framework in prep for ALEX --- benchmarks/include/file_util.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ea8b42e..8dc1b5f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -8,9 +8,6 @@ #include #include -#include "framework/interface/Record.h" -#include "query/irs.h" - #pragma once template @@ -81,7 +78,7 @@ static std::vector read_knn_queries(std::string fname, size_t k) { return queries; } -template +template static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); -- cgit v1.2.3 From d710125be2958f0b76c2601c357966fd74263c87 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:24:50 -0400 Subject: BSMTriespline: updated to use a rangecount query --- benchmarks/include/triespline_bsm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 9613a62..4276074 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -34,6 +34,7 @@ public: /* return an empty result set if q is invalid */ if (q == nullptr) { + rs.push_back({0, 0}); return rs; } @@ -41,15 +42,18 @@ public: size_t idx = lower_bound(parms->lower_bound); + size_t cnt = 0; while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { - rs.emplace_back(m_data[idx++]); + cnt++; } + rs.push_back({cnt, 0}); + return std::move(rs); } std::vector query_merge(std::vector &rsa, std::vector &rsb) { - rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -57,10 +61,8 @@ public: return m_data.size(); } - ~BSMTrieSpline() = default; - private: std::vector m_data; K m_max_key; -- cgit v1.2.3 From 47a386b50d904d3f1b7ce3cfc13c29ea96dd1e43 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Apr 2024 14:18:08 -0400 Subject: Added VPTree BSM benchmark --- benchmarks/include/standard_benchmarks.h | 39 +++- benchmarks/include/triespline_bsm.h | 3 +- benchmarks/include/vptree_bsm.h | 317 +++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+), 2 deletions(-) create mode 100644 benchmarks/include/vptree_bsm.h (limited to 'benchmarks/include') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index aaef679..76b5f7c 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -34,6 +34,14 @@ static void run_queries(DE *extension, std::vector &queries) { result.emplace_back(itr->data); itr++; } + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (auto &r : result) { + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + #endif } else if constexpr (std::is_same_v) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); @@ -44,7 +52,26 @@ static void run_queries(DE *extension, std::vector &queries) { } else { auto res = extension->query(&queries[i]); if constexpr (!BSM) { - auto r = res.get(); + auto result = res.get(); + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=result.size()-1; i>=0; i--) { + auto &r = result[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", result.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif + } else { + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i]; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif } } } @@ -73,6 +100,16 @@ static void run_static_queries(S *shard, std::vector &queries) { Q::process_query_states(q, states, nullptr); auto res = Q::query(shard, state, q); + + #ifdef BENCH_PRINT_RESULTS + fprintf(stdout, "\n\n"); + for (int i=res.size()-1; i>=0; i--) { + auto &r = res[i].rec; + fprintf(stdout, "%ld %lf %lf %lf %lf %lf %lf\n", res.size(), r.data[0], + r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); + } + fflush(stdout); + #endif } } diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 4276074..dfddc43 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -2,6 +2,7 @@ #include #include +#include #include "ts/builder.h" template @@ -52,7 +53,7 @@ public: return std::move(rs); } - std::vector query_merge(std::vector &rsa, std::vector &rsb) { + std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { rsa[0].first += rsb[0].first; return std::move(rsa); } diff --git a/benchmarks/include/vptree_bsm.h b/benchmarks/include/vptree_bsm.h new file mode 100644 index 0000000..2f12fcb --- /dev/null +++ b/benchmarks/include/vptree_bsm.h @@ -0,0 +1,317 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "psu-ds/PriorityQueue.h" +#include "framework/interface/Record.h" + +template +class BSMVPTree { +public: + struct KNNQueryParms { + R point; + size_t k; + }; + +public: + static BSMVPTree *build(std::vector &records) { + return new BSMVPTree(records); + } + + static BSMVPTree *build_presorted(std::vector &records) { + return new BSMVPTree(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMVPTree::KNNQueryParms*) q; + auto pq = psudb::PriorityQueue>(parms->k, &parms->point); + + if (parms->k >= m_data.size()) { + for (size_t i=0; i::max(); + internal_search(m_root, parms->point, parms->k, pq, &farthest); + } + + size_t i=0; + while (pq.size() > 0) { + rs.push_back(*pq.peek().data); + pq.pop(); + } + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb, void* parms) { + KNNQueryParms *p = (KNNQueryParms *) parms; + R rec = p->point; + size_t k = p->k; + + std::vector output; + + psudb::PriorityQueue> pq(k, &rec); + + for (size_t i=0; icalc_distance(rec); + double cur_dist = rsa[i].calc_distance(rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&rsa[i]); + } + } + } + + for (size_t i=0; icalc_distance(rec); + double cur_dist = rsb[i].calc_distance(rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&rsb[i]); + } + } + } + + while (pq.size() > 0) { + output.emplace_back(*pq.peek().data); + pq.pop(); + } + + return std::move(output); + } + + size_t record_count() { + return m_data.size(); + } + + ~BSMVPTree() { + delete m_root; + } + +private: + + struct vp_ptr { + R *ptr; + double dist; + }; + + struct vpnode { + size_t start; + size_t stop; + bool leaf; + + double radius; + vpnode *inside; + vpnode *outside; + + vpnode() : start(0), stop(0), leaf(false), radius(0.0), inside(nullptr), outside(nullptr) {} + + ~vpnode() { + delete inside; + delete outside; + } + }; + + std::vector m_data; + std::vector m_ptrs; + vpnode *m_root; + + size_t m_node_cnt; + + BSMVPTree(std::vector &records) { + m_data = std::move(records); + m_node_cnt = 0; + + for (size_t i=0; i stop) { + return nullptr; + } + + /* base-case: create a leaf node */ + if (stop - start <= LEAFSZ) { + vpnode *node = new vpnode(); + node->start = start; + node->stop = stop; + node->leaf = true; + + m_node_cnt++; + return node; + } + + /* + * select a random element to be the root of the + * subtree + */ + auto i = start + gsl_rng_uniform_int(rng, stop - start + 1); + swap(start, i); + + /* for efficiency, we'll pre-calculate the distances between each point and the root */ + for (size_t i=start+1; i<=stop; i++) { + m_ptrs[i].dist = m_ptrs[start].ptr->calc_distance(*m_ptrs[i].ptr); + } + + /* + * partition elements based on their distance from the start, + * with those elements with distance falling below the median + * distance going into the left sub-array and those above + * the median in the right. This is easily done using QuickSelect. + */ + auto mid = (start + 1 + stop) / 2; + quickselect(start + 1, stop, mid, m_ptrs[start].ptr, rng); + + /* Create a new node based on this partitioning */ + vpnode *node = new vpnode(); + node->start = start; + + /* store the radius of the circle used for partitioning the node. */ + node->radius = m_ptrs[start].ptr->calc_distance(*m_ptrs[mid].ptr); + m_ptrs[start].dist = node->radius; + + /* recursively construct the left and right subtrees */ + node->inside = build_subtree(start + 1, mid-1, rng); + node->outside = build_subtree(mid, stop, rng); + + m_node_cnt++; + + return node; + } + + void quickselect(size_t start, size_t stop, size_t k, R *p, gsl_rng *rng) { + if (start == stop) return; + + auto pivot = partition(start, stop, p, rng); + + if (k < pivot) { + quickselect(start, pivot - 1, k, p, rng); + } else if (k > pivot) { + quickselect(pivot + 1, stop, k, p, rng); + } + } + + size_t partition(size_t start, size_t stop, R *p, gsl_rng *rng) { + auto pivot = start + gsl_rng_uniform_int(rng, stop - start); + + swap(pivot, stop); + + size_t j = start; + for (size_t i=start; i> &pq, double *farthest) { + + if (node == nullptr) return; + + if (node->leaf) { + for (size_t i=node->start; i<=node->stop; i++) { + double d = point.calc_distance(*m_ptrs[i].ptr); + if (d < *farthest) { + if (pq.size() == k) { + pq.pop(); + } + + pq.push(m_ptrs[i].ptr); + if (pq.size() == k) { + *farthest = point.calc_distance(*pq.peek().data); + } + } + } + + return; + } + + double d = point.calc_distance(*m_ptrs[node->start].ptr); + + if (d < *farthest) { + if (pq.size() == k) { + auto t = pq.peek().data; + pq.pop(); + } + pq.push(m_ptrs[node->start].ptr); + if (pq.size() == k) { + *farthest = point.calc_distance(*pq.peek().data); + } + } + + if (d < node->radius) { + if (d - (*farthest) <= node->radius) { + internal_search(node->inside, point, k, pq, farthest); + } + + if (d + (*farthest) >= node->radius) { + internal_search(node->outside, point, k, pq, farthest); + } + } else { + if (d + (*farthest) >= node->radius) { + internal_search(node->outside, point, k, pq, farthest); + } + + if (d - (*farthest) <= node->radius) { + internal_search(node->inside, point, k, pq, farthest); + } + } + } +}; -- cgit v1.2.3 From 160692f4b9f80c6eba7d18d3221fc1c3e3c3139e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 13:31:24 -0400 Subject: Added error checks to file opening, and generalized key types --- benchmarks/include/file_util.h | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 8dc1b5f..b5b3417 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -15,6 +15,12 @@ static std::vector read_lookup_queries(std::string fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -34,6 +40,12 @@ static std::vector read_range_queries(std::string &fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -58,6 +70,11 @@ static std::vector read_knn_queries(std::string fname, size_t k) { char *line = NULL; size_t len = 0; + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + while (getline(&line, &len, qf) > 0) { char *token; QP query; @@ -83,10 +100,15 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records(n); for (size_t i=0; i> read_sosd_file_pair(std::string &fname, size std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector> records(n); for (size_t i=0; i read_vector_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records; records.reserve(n); -- cgit v1.2.3 From 5636838a6e64760c291b00107657a90428a0f9e1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 15:59:48 -0400 Subject: File Util: fixed the reading in of undesired queries --- benchmarks/include/file_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index b5b3417..ebcf17e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -49,7 +49,7 @@ static std::vector read_range_queries(std::string &fname, double selectivity size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { + if (start < stop && std::abs(sel - selectivity) < 0.00001) { QP q; q.lower_bound = start; q.upper_bound = stop; -- cgit v1.2.3 From 349cfd5090f586b7ec189b72c00786522199fe34 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:03:19 -0400 Subject: TS BSM Adjustments --- benchmarks/include/standard_benchmarks.h | 39 +++++++++++++++++++++++++++++++- benchmarks/include/triespline_bsm.h | 31 ++++++++++++++++++++----- 2 files changed, 63 insertions(+), 7 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 76b5f7c..1261a4c 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -22,6 +22,19 @@ static size_t g_deleted_records = 0; static double delete_proportion = 0.05; +static volatile size_t total = 0; + +template +static void run_queries(DE *extension, DE *ghost, std::vector &queries) { + for (size_t i=0; i res = extension->query(&queries[i]); + std::vector negres = ghost->query(&queries[i]); + auto result = res[0].first - negres[0].first; + total = result; + } +} + + template static void run_queries(DE *extension, std::vector &queries) { for (size_t i=0; i &queries) { fflush(stdout); #endif } else { + total = res.size(); #ifdef BENCH_PRINT_RESULTS fprintf(stdout, "\n\n"); for (int i=res.size()-1; i>=0; i--) { @@ -123,7 +137,6 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, size_t start, size_t stop, std::vector &records) { psudb::progress_update(0, "Insert Progress"); - size_t reccnt = 0; for (size_t i=start; iinsert(records[i]); } @@ -132,6 +145,30 @@ static void insert_records(psudb::bsm::BentleySaxe *extension, } +template +static void insert_records(psudb::bsm::BentleySaxe *extension, + psudb::bsm::BentleySaxe *ghost, + size_t start, size_t stop, std::vector &records, + std::vector &to_delete, size_t &delete_idx, + gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i]); + + if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + ghost->insert(records[to_delete[delete_idx]]); + delete_idx++; + g_deleted_records++; + } + + } + +} + + template static void insert_records(DE *structure, size_t start, size_t stop, std::vector &records, std::vector &to_delete, diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index dfddc43..eaf1079 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -18,11 +18,19 @@ public: public: static BSMTrieSpline *build(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + std::sort(records.begin(), records.end()); return new BSMTrieSpline(records); } static BSMTrieSpline *build_presorted(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + return new BSMTrieSpline(records); } @@ -44,8 +52,9 @@ public: size_t idx = lower_bound(parms->lower_bound); size_t cnt = 0; - while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + while (idx < m_data.size() && m_data[idx].first <= parms->upper_bound) { cnt++; + idx++; } rs.push_back({cnt, 0}); @@ -54,6 +63,10 @@ public: } std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { + /* initialize rsa on the first merge */ + if (rsa.size() == 0) { + rsa.push_back({0, 0}); + } rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -75,6 +88,10 @@ private: m_min_key = m_data[0].first; m_max_key = m_data[m_data.size() - 1].first; + if (m_data.size() < 50) { + return; + } + auto bldr = ts::Builder(m_min_key, m_max_key, E); for (size_t i=0; i= key) { + return i; + } } + + return m_data.size(); } auto bound = m_ts.GetSearchBound(key); -- cgit v1.2.3 From 675cf7f7558ebaef15f398d90cc3d1d91457b219 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 3 May 2024 11:01:47 -0400 Subject: FST benchmarks --- benchmarks/include/file_util.h | 45 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -7,8 +9,10 @@ #include #include #include +#include + +#include "psu-util/progress.h" -#pragma once template static std::vector read_lookup_queries(std::string fname, double selectivity) { @@ -35,6 +39,20 @@ static std::vector read_lookup_queries(std::string fname, double selectivity return queries; } +template +static std::vector generate_string_lookup_queries(std::vector> &strings, size_t cnt, gsl_rng *rng) { + std::vector queries; + + for (size_t i=0; i static std::vector read_range_queries(std::string &fname, double selectivity) { std::vector queries; @@ -173,3 +191,28 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} -- cgit v1.2.3 From 265610435e1164a9acc39ca02ea1139acd37c46c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 9 May 2024 14:10:29 -0400 Subject: Added benchmarks for BigANN --- benchmarks/include/benchmark_types.h | 13 ++++++ benchmarks/include/file_util.h | 76 ++++++++++++++++++++++++++++++++ benchmarks/include/standard_benchmarks.h | 19 +++++--- 3 files changed, 103 insertions(+), 5 deletions(-) (limited to 'benchmarks/include') diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 13964e8..51fc52d 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -35,6 +35,9 @@ typedef psudb::BTree, btree_key_extract< const size_t W2V_SIZE = 300; typedef de::EuclidPoint Word2VecRec; +const size_t ANNSize = 128; +typedef de::EuclidPoint ANNRec; + struct euclidean_distance { double operator()(const Word2VecRec &first, const Word2VecRec &second) const { double dist = 0; @@ -44,11 +47,21 @@ struct euclidean_distance { return std::sqrt(dist); } + + double operator()(const ANNRec &first, const ANNRec &second) const { + double dist = 0; + for (size_t i=0; i MTree; +typedef mt::mtree MTree_alt; #endif typedef pgm::DynamicPGMIndex> PGM; diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 586b44f..41eb18c 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -80,6 +80,46 @@ static std::vector read_range_queries(std::string &fname, double selectivity return queries; } + +template +static std::vector read_binary_knn_queries(std::string fname, size_t k, size_t n) { + std::vector queries; + queries.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + for (size_t i=0; i static std::vector read_knn_queries(std::string fname, size_t k) { std::vector queries; @@ -192,6 +232,42 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } +template +static std::vector read_binary_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector records; + records.reserve(n); + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + R rec; + for (size_t i=0; i>read_string_file(std::string fname, size_t n=10000000) { diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 1261a4c..b805c08 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -55,7 +55,16 @@ static void run_queries(DE *extension, std::vector &queries) { r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); } #endif - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { + std::vector result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + }else if constexpr (std::is_same_v) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { @@ -180,7 +189,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v) { structure->insert(records[i]); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { structure->add(records[i]); } else if constexpr (std::is_same_v) { structure->insert_or_assign(records[i].key, records[i].value); @@ -196,7 +205,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v) { structure->erase_one(records[to_delete[delete_idx]].key); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { structure->remove(records[to_delete[delete_idx]]); } else if constexpr (std::is_same_v) { structure->erase(records[to_delete[delete_idx]].key); @@ -255,7 +264,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if constexpr (std::is_same_v) { de_index.erase_one(delete_vec[delete_idx++].key); #ifdef _GNU_SOURCE - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v || std::is_same_v) { de_index.remove(delete_vec[delete_idx++]); #endif } else { @@ -266,7 +275,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn // insert the record; #ifdef _GNU_SOURCE - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v || std::is_same_v) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); -- cgit v1.2.3