From 37434f5baf632e839dc14b3c7d8745287cb9368a Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 25 Jul 2023 11:17:36 -0400 Subject: Benchmarks: mtree and vptree benchmark updates Note: cosine similarity doesn't seem to work for VPTree--I don't think that it is actually a metric, upon further research. At the very least I can't find anyone claiming it is, and I've found several people claiming it isn't. On testing with the Word2Vec data, Euclidean distance works insofar as the M-Tree and VPTree return the same KNN results for test queries, whereas Cosine Similarity does not work. --- benchmarks/mtree_knn_bench.cpp | 46 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'benchmarks/mtree_knn_bench.cpp') diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/mtree_knn_bench.cpp index 3c1792a..7ae4e83 100644 --- a/benchmarks/mtree_knn_bench.cpp +++ b/benchmarks/mtree_knn_bench.cpp @@ -1,6 +1,44 @@ #include "include/bench.h" #include "mtree.h" +static void mtree_knn_bench(MTree &tree, std::vector> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector result_set; + + for (int i=0; i results; + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; jdata); + itr++; + } + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + int main(int argc, char **argv) { if (argc < 5) { @@ -20,7 +58,7 @@ int main(int argc, char **argv) double insert_batch = 0.1; init_bench_env(record_count, true); - auto queries = read_knn_queries>(qfilename, 50); + auto queries = read_knn_queries>(qfilename, 10); auto mtree = MTree(); @@ -37,10 +75,10 @@ int main(int argc, char **argv) size_t insert_cnt = record_count - warmup_cnt; insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); - //fprintf(stdout, "%ld\t", mtree.get_memory_usage()); + // fprintf(stdout, "%ld\t", mtree.get_memory_usage()); -// query_latency_bench>(mtree, queries); - // fprintf(stdout, "\n"); + mtree_knn_bench(mtree, queries); + fprintf(stdout, "\n"); delete_bench_env(); fflush(stdout); -- cgit v1.2.3