From 71118b6f69229fb2a649f52af970cd6203ea7a72 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 23 Jul 2023 17:10:47 -0400 Subject: Benchmarking: Added utility functions for VPTree/KNN --- benchmarks/include/bench_utility.h | 71 +++++++++++++++++++++++++++++++++++--- benchmarks/vptree_knn_bench.cpp | 60 ++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 benchmarks/vptree_knn_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index cc926a3..a1a2773 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -15,6 +15,7 @@ #include "shard/TrieSpline.h" #include "shard/WIRS.h" #include "ds/BTree.h" +#include "shard/VPTree.h" #include #include @@ -37,11 +38,14 @@ typedef uint64_t weight_type; typedef de::WeightedRecord WRec; typedef de::Record Rec; +typedef de::Point Word2VecRec; + typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; +typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; struct btree_record { key_type key; @@ -140,6 +144,55 @@ static std::vector read_range_queries(std::string fname, double selectivity) return queries; } +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +static bool next_vector_record(std::fstream &file, Word2VecRec &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + + template static bool next_record(std::fstream &file, R &record, bool binary=false) { @@ -205,12 +258,22 @@ static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, vec.clear(); for (size_t i=0; i) { + if (!next_vector_record(file, rec)) { + if (i == 0) { + return false; + } + + break; } + } else { + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } - break; + break; + } } vec.emplace_back(rec); diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp new file mode 100644 index 0000000..1655209 --- /dev/null +++ b/benchmarks/vptree_knn_bench.cpp @@ -0,0 +1,60 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: vptree_knn_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + + double insert_batch = 0.1; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, 50); + + auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); +/* + query_latency_bench>(de_vp_knn, queries); + fprintf(stdout, "\n"); + + auto ts = de_vp_knn.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench, Word2VecRec, de::KNNQueryParms, de::KNNQuery>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + */ + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} -- cgit v1.2.3