From 71118b6f69229fb2a649f52af970cd6203ea7a72 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 23 Jul 2023 17:10:47 -0400 Subject: Benchmarking: Added utility functions for VPTree/KNN --- benchmarks/include/bench_utility.h | 71 +++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 4 deletions(-) (limited to 'benchmarks/include/bench_utility.h') diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index cc926a3..a1a2773 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -15,6 +15,7 @@ #include "shard/TrieSpline.h" #include "shard/WIRS.h" #include "ds/BTree.h" +#include "shard/VPTree.h" #include #include @@ -37,11 +38,14 @@ typedef uint64_t weight_type; typedef de::WeightedRecord WRec; typedef de::Record Rec; +typedef de::Point Word2VecRec; + typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; +typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; struct btree_record { key_type key; @@ -140,6 +144,55 @@ static std::vector read_range_queries(std::string fname, double selectivity) return queries; } +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +static bool next_vector_record(std::fstream &file, Word2VecRec &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + + template static bool next_record(std::fstream &file, R &record, bool binary=false) { @@ -205,12 +258,22 @@ static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, vec.clear(); for (size_t i=0; i) { + if (!next_vector_record(file, rec)) { + if (i == 0) { + return false; + } + + break; } + } else { + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } - break; + break; + } } vec.emplace_back(rec); -- cgit v1.2.3