summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-07-23 17:10:47 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-07-23 17:10:47 -0400
commit71118b6f69229fb2a649f52af970cd6203ea7a72 (patch)
tree55fbbd560e2d32a9bf3df4ced112a97c59bffcfc
parentd72277b86dec0ff56502befc573c0e34bf64e87e (diff)
downloaddynamic-extension-71118b6f69229fb2a649f52af970cd6203ea7a72.tar.gz
Benchmarking: Added utility functions for VPTree/KNN
-rw-r--r--benchmarks/include/bench_utility.h71
-rw-r--r--benchmarks/vptree_knn_bench.cpp60
2 files changed, 127 insertions, 4 deletions
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
index cc926a3..a1a2773 100644
--- a/benchmarks/include/bench_utility.h
+++ b/benchmarks/include/bench_utility.h
@@ -15,6 +15,7 @@
#include "shard/TrieSpline.h"
#include "shard/WIRS.h"
#include "ds/BTree.h"
+#include "shard/VPTree.h"
#include <cstdlib>
#include <cstdio>
@@ -37,11 +38,14 @@ typedef uint64_t weight_type;
typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
typedef de::Record<key_type, value_type> Rec;
+typedef de::Point<double, 300> Word2VecRec;
+
typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ;
typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGMRQ;
typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedISAM_IRS;
typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ;
+typedef de::DynamicExtension<Word2VecRec, de::VPTree<Word2VecRec>, de::KNNQuery<Word2VecRec>> ExtendedVPTree_KNN;
struct btree_record {
key_type key;
@@ -140,6 +144,55 @@ static std::vector<QP> read_range_queries(std::string fname, double selectivity)
return queries;
}
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+static bool next_vector_record(std::fstream &file, Word2VecRec &record, bool binary=false) {
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ for (size_t i=0; i<300; i++) {
+ std::string dimension;
+
+ std::getline(line_stream, dimension, ' ');
+ record.data[i] = atof(dimension.c_str());
+ }
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+
+}
+
+
template <de::KVPInterface R>
static bool next_record(std::fstream &file, R &record, bool binary=false)
{
@@ -205,12 +258,22 @@ static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
vec.clear();
for (size_t i=0; i<n; i++) {
R rec;
- if (!next_record(file, rec, binary)) {
- if (i == 0) {
- return false;
+ if constexpr (std::is_same_v<R, Word2VecRec>) {
+ if (!next_vector_record(file, rec)) {
+ if (i == 0) {
+ return false;
+ }
+
+ break;
}
+ } else {
+ if (!next_record(file, rec, binary)) {
+ if (i == 0) {
+ return false;
+ }
- break;
+ break;
+ }
}
vec.emplace_back(rec);
diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp
new file mode 100644
index 0000000..1655209
--- /dev/null
+++ b/benchmarks/vptree_knn_bench.cpp
@@ -0,0 +1,60 @@
+#include "include/bench.h"
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage: vptree_knn_bench <filename> <record_count> <delete_proportion> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string filename = std::string(argv[1]);
+ size_t record_count = atol(argv[2]);
+ double delete_prop = atof(argv[3]);
+ std::string qfilename = std::string(argv[4]);
+
+ size_t buffer_cap = 12000;
+ size_t scale_factor = 6;
+ double max_delete_prop = delete_prop;
+
+ double insert_batch = 0.1;
+
+ init_bench_env(record_count, true);
+ auto queries = read_knn_queries<de::KNNQueryParms<Word2VecRec>>(qfilename, 50);
+
+ auto de_vp_knn = ExtendedVPTree_KNN(buffer_cap, scale_factor, max_delete_prop);
+
+ std::fstream datafile;
+ datafile.open(filename, std::ios::in | std::ios::binary);
+
+ std::vector<Word2VecRec> to_delete;
+
+ // warm up the tree with initial_insertions number of initially inserted
+ // records
+ size_t warmup_cnt = insert_batch * record_count;
+ warmup<ExtendedVPTree_KNN, Word2VecRec>(datafile, de_vp_knn, warmup_cnt, delete_prop, to_delete, true, true);
+
+ size_t insert_cnt = record_count - warmup_cnt;
+
+ insert_tput_bench<ExtendedVPTree_KNN, Word2VecRec>(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true);
+ fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage());
+/*
+ query_latency_bench<ExtendedVPTree_KNN, Word2VecRec, de::KNNQueryParms<Word2VecRec>>(de_vp_knn, queries);
+ fprintf(stdout, "\n");
+
+ auto ts = de_vp_knn.create_static_structure();
+
+ fprintf(stdout, "%ld\t", ts->get_memory_usage());
+ static_latency_bench<de::VPTree<Word2VecRec>, Word2VecRec, de::KNNQueryParms<Word2VecRec>, de::KNNQuery<Word2VecRec>>(
+ ts, queries, 1
+ );
+ fprintf(stdout, "\n");
+
+ delete ts;
+ */
+
+ delete_bench_env();
+ fflush(stdout);
+ fflush(stderr);
+
+ exit(EXIT_SUCCESS);
+}