From ad95b1312b86426e149362166a560dea0ba920fe Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 24 Jul 2023 18:51:12 -0400 Subject: M-Tree benchmarks --- benchmarks/include/bench.h | 8 +++++- benchmarks/include/bench_utility.h | 32 +++++++++++++++++++++--- benchmarks/mtree_knn_bench.cpp | 50 ++++++++++++++++++++++++++++++++++++++ benchmarks/vptree_knn_bench.cpp | 3 +-- 4 files changed, 87 insertions(+), 6 deletions(-) create mode 100644 benchmarks/mtree_knn_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h index d90bc3f..12d0a7e 100644 --- a/benchmarks/include/bench.h +++ b/benchmarks/include/bench.h @@ -51,6 +51,8 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { if constexpr (std::is_same_v) { de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); } else { de_index.erase(delete_vec[delete_idx++]); } @@ -58,7 +60,11 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn } // insert the record; - de_index.insert(insert_vec[i]); + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } applied_inserts++; } auto insert_stop = std::chrono::high_resolution_clock::now(); diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h index b728cbd..6610ab4 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/include/bench_utility.h @@ -16,6 +16,7 @@ #include "shard/WIRS.h" #include "ds/BTree.h" #include "shard/VPTree.h" +#include "mtree.h" #include #include @@ -38,7 +39,8 @@ typedef uint64_t weight_type; typedef de::WeightedRecord WRec; typedef de::Record Rec; -typedef de::CosinePoint Word2VecRec; +const size_t W2V_SIZE = 300; +typedef de::CosinePoint Word2VecRec; typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; @@ -66,8 +68,25 @@ struct btree_key_extract { } }; -typedef tlx::BTree TreeMap; +struct cosine_similarity { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double prod = 0; + double asquared = 0; + double bsquared = 0; + + for (size_t i=0; i TreeMap; +typedef mt::mtree MTree; static gsl_rng *g_rng; static std::set *g_to_delete; @@ -347,13 +366,20 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count, if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { if constexpr (std::is_same_v) { extended_index.erase_one(delete_vec[delete_idx++].key); + } + else if constexpr (std::is_same_v) { + extended_index.remove(delete_vec[delete_idx++]); } else { extended_index.erase(delete_vec[delete_idx++]); } } // insert the record; - extended_index.insert(insert_vec[i]); + if constexpr (std::is_same_v) { + extended_index.add(insert_vec[i]); + } else { + extended_index.insert(insert_vec[i]); + } inserted++; if (progress) { diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/mtree_knn_bench.cpp new file mode 100644 index 0000000..3c1792a --- /dev/null +++ b/benchmarks/mtree_knn_bench.cpp @@ -0,0 +1,50 @@ +#include "include/bench.h" +#include "mtree.h" + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: mtree_knn_bench \n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + + double insert_batch = 0.1; + + init_bench_env(record_count, true); + auto queries = read_knn_queries>(qfilename, 50); + + auto mtree = MTree(); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + warmup(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true); + + size_t insert_cnt = record_count - warmup_cnt; + + insert_tput_bench(mtree, datafile, insert_cnt, delete_prop, to_delete, true); + //fprintf(stdout, "%ld\t", mtree.get_memory_usage()); + +// query_latency_bench>(mtree, queries); + // fprintf(stdout, "\n"); + + delete_bench_env(); + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp index 1655209..a5c45f4 100644 --- a/benchmarks/vptree_knn_bench.cpp +++ b/benchmarks/vptree_knn_bench.cpp @@ -37,7 +37,7 @@ int main(int argc, char **argv) insert_tput_bench(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true); fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage()); -/* + query_latency_bench>(de_vp_knn, queries); fprintf(stdout, "\n"); @@ -50,7 +50,6 @@ int main(int argc, char **argv) fprintf(stdout, "\n"); delete ts; - */ delete_bench_env(); fflush(stdout); -- cgit v1.2.3