diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2023-07-23 17:10:47 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2023-07-23 17:10:47 -0400 |
| commit | 71118b6f69229fb2a649f52af970cd6203ea7a72 (patch) | |
| tree | 55fbbd560e2d32a9bf3df4ced112a97c59bffcfc | |
| parent | d72277b86dec0ff56502befc573c0e34bf64e87e (diff) | |
| download | dynamic-extension-71118b6f69229fb2a649f52af970cd6203ea7a72.tar.gz | |
Benchmarking: Added utility functions for VPTree/KNN
| -rw-r--r-- | benchmarks/include/bench_utility.h | 71 | ||||
| -rw-r--r-- | benchmarks/vptree_knn_bench.cpp | 60 |
2 files changed, 127 insertions, 4 deletions
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index cc926a3..a1a2773 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -15,6 +15,7 @@ #include "shard/TrieSpline.h" #include "shard/WIRS.h" #include "ds/BTree.h" +#include "shard/VPTree.h" #include <cstdlib> #include <cstdio> @@ -37,11 +38,14 @@ typedef uint64_t weight_type; typedef de::WeightedRecord<key_type, value_type, weight_type> WRec; typedef de::Record<key_type, value_type> Rec; +typedef de::Point<double, 300> Word2VecRec; + typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS; typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ; typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGMRQ; typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedISAM_IRS; typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ; +typedef de::DynamicExtension<Word2VecRec, de::VPTree<Word2VecRec>, de::KNNQuery<Word2VecRec>> ExtendedVPTree_KNN; struct btree_record { key_type key; @@ -140,6 +144,55 @@ static std::vector<QP> read_range_queries(std::string fname, double selectivity) return queries; } +template <typename QP> +static std::vector<QP> read_knn_queries(std::string fname, size_t k) { + std::vector<QP> queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +static bool next_vector_record(std::fstream &file, Word2VecRec &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + + template <de::KVPInterface R> static bool next_record(std::fstream &file, R &record, bool binary=false) { @@ -205,12 +258,22 @@ static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n, vec.clear(); for (size_t i=0; i<n; i++) { R rec; - if (!next_record(file, rec, binary)) { - if (i == 0) { - return false; + if constexpr (std::is_same_v<R, Word2VecRec>) { + if (!next_vector_record(file, rec)) { + if (i == 0) { + return false; + } + + break; } + } else { + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } - break; + break; + } } vec.emplace_back(rec); diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp new file mode 100644 index 0000000..1655209 --- /dev/null +++ b/benchmarks/vptree_knn_bench.cpp @@ -0,0 +1,60 @@ +#include "include/bench.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: vptree_knn_bench <filename> <record_count> <delete_proportion> <query_file>\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + + double insert_batch = 0.1; + + init_bench_env(record_count, true); + auto queries = read_knn_queries<de::KNNQueryParms<Word2VecRec>>(qfilename, 50); + + auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector<Word2VecRec> to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup<ExtendedVPTree_KNN, Word2VecRec>(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench<ExtendedVPTree_KNN, Word2VecRec>(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); + fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); +/* + query_latency_bench<ExtendedVPTree_KNN, Word2VecRec, de::KNNQueryParms<Word2VecRec>>(de_vp_knn, queries); + fprintf(stdout, "\n"); + + auto ts = de_vp_knn.create_static_structure(); + + fprintf(stdout, "%ld\t", ts->get_memory_usage()); + static_latency_bench<de::VPTree<Word2VecRec>, Word2VecRec, de::KNNQueryParms<Word2VecRec>, de::KNNQuery<Word2VecRec>>( + ts, queries, 1 + ); + fprintf(stdout, "\n"); + + delete ts; + */ + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} |