From 69f36c9b4f5df19f09156689b333afe29a017eed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 23 Feb 2024 15:40:49 -0500 Subject: Benchmark updates --- benchmarks/include/file_util.h | 130 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 benchmarks/include/file_util.h (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h new file mode 100644 index 0000000..2a3300a --- /dev/null +++ b/benchmarks/include/file_util.h @@ -0,0 +1,130 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/interface/Record.h" +#include "query/irs.h" + +#pragma once + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +template +static std::vector read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector records(n); + for (size_t i=0; i +static std::vector read_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + std::vector records; + records.reserve(n); + + for (size_t i=0; i Date: Mon, 22 Apr 2024 12:27:43 -0400 Subject: Added plain BSM and MDSP BSM benchmark --- benchmarks/include/file_util.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 2a3300a..ea8b42e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -97,6 +97,22 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { return records; } +template +static std::vector> read_sosd_file_pair(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector> records(n); + for (size_t i=0; i Date: Tue, 23 Apr 2024 13:24:27 -0400 Subject: benchmarks/file_util: removed dependency on framework in prep for ALEX --- benchmarks/include/file_util.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ea8b42e..8dc1b5f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -8,9 +8,6 @@ #include #include -#include "framework/interface/Record.h" -#include "query/irs.h" - #pragma once template @@ -81,7 +78,7 @@ static std::vector read_knn_queries(std::string fname, size_t k) { return queries; } -template +template static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); -- cgit v1.2.3 From 160692f4b9f80c6eba7d18d3221fc1c3e3c3139e Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 13:31:24 -0400 Subject: Added error checks to file opening, and generalized key types --- benchmarks/include/file_util.h | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 8dc1b5f..b5b3417 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -15,6 +15,12 @@ static std::vector read_lookup_queries(std::string fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -34,6 +40,12 @@ static std::vector read_range_queries(std::string &fname, double selectivity std::vector queries; FILE *qf = fopen(fname.c_str(), "r"); + + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { @@ -58,6 +70,11 @@ static std::vector read_knn_queries(std::string fname, size_t k) { char *line = NULL; size_t len = 0; + if (!qf) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + while (getline(&line, &len, qf) > 0) { char *token; QP query; @@ -83,10 +100,15 @@ static std::vector read_sosd_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records(n); for (size_t i=0; i> read_sosd_file_pair(std::string &fname, size std::fstream file; file.open(fname, std::ios::in | std::ios::binary); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector> records(n); for (size_t i=0; i read_vector_file(std::string &fname, size_t n) { std::fstream file; file.open(fname, std::ios::in); + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + std::vector records; records.reserve(n); -- cgit v1.2.3 From 5636838a6e64760c291b00107657a90428a0f9e1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 15:59:48 -0400 Subject: File Util: fixed the reading in of undesired queries --- benchmarks/include/file_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index b5b3417..ebcf17e 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -49,7 +49,7 @@ static std::vector read_range_queries(std::string &fname, double selectivity size_t start, stop; double sel; while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { + if (start < stop && std::abs(sel - selectivity) < 0.00001) { QP q; q.lower_bound = start; q.upper_bound = stop; -- cgit v1.2.3 From 675cf7f7558ebaef15f398d90cc3d1d91457b219 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 3 May 2024 11:01:47 -0400 Subject: FST benchmarks --- benchmarks/include/file_util.h | 45 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -7,8 +9,10 @@ #include #include #include +#include + +#include "psu-util/progress.h" -#pragma once template static std::vector read_lookup_queries(std::string fname, double selectivity) { @@ -35,6 +39,20 @@ static std::vector read_lookup_queries(std::string fname, double selectivity return queries; } +template +static std::vector generate_string_lookup_queries(std::vector> &strings, size_t cnt, gsl_rng *rng) { + std::vector queries; + + for (size_t i=0; i static std::vector read_range_queries(std::string &fname, double selectivity) { std::vector queries; @@ -173,3 +191,28 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} -- cgit v1.2.3 From 265610435e1164a9acc39ca02ea1139acd37c46c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 9 May 2024 14:10:29 -0400 Subject: Added benchmarks for BigANN --- benchmarks/include/file_util.h | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'benchmarks/include/file_util.h') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 586b44f..41eb18c 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -80,6 +80,46 @@ static std::vector read_range_queries(std::string &fname, double selectivity return queries; } + +template +static std::vector read_binary_knn_queries(std::string fname, size_t k, size_t n) { + std::vector queries; + queries.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + for (size_t i=0; i static std::vector read_knn_queries(std::string fname, size_t k) { std::vector queries; @@ -192,6 +232,42 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } +template +static std::vector read_binary_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector records; + records.reserve(n); + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + R rec; + for (size_t i=0; i>read_string_file(std::string fname, size_t n=10000000) { -- cgit v1.2.3