summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-07-24 18:51:12 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-07-24 18:51:12 -0400
commitad95b1312b86426e149362166a560dea0ba920fe (patch)
treef1ceed253329803b77aa6d095b4540d7ad402428
parent5ab408321dd45865a88fed71d11efe01dd7715d9 (diff)
downloaddynamic-extension-ad95b1312b86426e149362166a560dea0ba920fe.tar.gz
M-Tree benchmarks
-rw-r--r--benchmarks/include/bench.h8
-rw-r--r--benchmarks/include/bench_utility.h32
-rw-r--r--benchmarks/mtree_knn_bench.cpp50
-rw-r--r--benchmarks/vptree_knn_bench.cpp3
4 files changed, 87 insertions, 6 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h
index d90bc3f..12d0a7e 100644
--- a/benchmarks/include/bench.h
+++ b/benchmarks/include/bench.h
@@ -51,6 +51,8 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
if constexpr (std::is_same_v<TreeMap, DE>) {
de_index.erase_one(delete_vec[delete_idx++].key);
+ } else if constexpr (std::is_same_v<MTree, DE>) {
+ de_index.remove(delete_vec[delete_idx++]);
} else {
de_index.erase(delete_vec[delete_idx++]);
}
@@ -58,7 +60,11 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
}
// insert the record;
- de_index.insert(insert_vec[i]);
+ if constexpr (std::is_same_v<MTree, DE>) {
+ de_index.add(insert_vec[i]);
+ } else {
+ de_index.insert(insert_vec[i]);
+ }
applied_inserts++;
}
auto insert_stop = std::chrono::high_resolution_clock::now();
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
index b728cbd..6610ab4 100644
--- a/benchmarks/include/bench_utility.h
+++ b/benchmarks/include/bench_utility.h
@@ -16,6 +16,7 @@
#include "shard/WIRS.h"
#include "ds/BTree.h"
#include "shard/VPTree.h"
+#include "mtree.h"
#include <cstdlib>
#include <cstdio>
@@ -38,7 +39,8 @@ typedef uint64_t weight_type;
typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
typedef de::Record<key_type, value_type> Rec;
-typedef de::CosinePoint<double, 300> Word2VecRec;
+const size_t W2V_SIZE = 300;
+typedef de::CosinePoint<double, W2V_SIZE> Word2VecRec;
typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ;
@@ -66,8 +68,25 @@ struct btree_key_extract {
}
};
-typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
+struct cosine_similarity {
+ double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
+ double prod = 0;
+ double asquared = 0;
+ double bsquared = 0;
+
+ for (size_t i=0; i<W2V_SIZE; i++) {
+ prod += first.data[i] * second.data[i];
+ asquared += first.data[i]*first.data[i];
+ bsquared += second.data[i]*second.data[i];
+ }
+
+ return prod / std::sqrt(asquared * bsquared);
+ }
+};
+
+typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
+typedef mt::mtree<Word2VecRec, cosine_similarity> MTree;
static gsl_rng *g_rng;
static std::set<WRec> *g_to_delete;
@@ -347,13 +366,20 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count,
if (delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
if constexpr (std::is_same_v<TreeMap, DE>) {
extended_index.erase_one(delete_vec[delete_idx++].key);
+ }
+ else if constexpr (std::is_same_v<MTree, DE>) {
+ extended_index.remove(delete_vec[delete_idx++]);
} else {
extended_index.erase(delete_vec[delete_idx++]);
}
}
// insert the record;
- extended_index.insert(insert_vec[i]);
+ if constexpr (std::is_same_v<MTree, DE>) {
+ extended_index.add(insert_vec[i]);
+ } else {
+ extended_index.insert(insert_vec[i]);
+ }
inserted++;
if (progress) {
diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/mtree_knn_bench.cpp
new file mode 100644
index 0000000..3c1792a
--- /dev/null
+++ b/benchmarks/mtree_knn_bench.cpp
@@ -0,0 +1,50 @@
+#include "include/bench.h"
+#include "mtree.h"
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage: mtree_knn_bench <filename> <record_count> <delete_proportion> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string filename = std::string(argv[1]);
+ size_t record_count = atol(argv[2]);
+ double delete_prop = atof(argv[3]);
+ std::string qfilename = std::string(argv[4]);
+
+ size_t buffer_cap = 12000;
+ size_t scale_factor = 6;
+ double max_delete_prop = delete_prop;
+
+ double insert_batch = 0.1;
+
+ init_bench_env(record_count, true);
+ auto queries = read_knn_queries<de::KNNQueryParms<Word2VecRec>>(qfilename, 50);
+
+ auto mtree = MTree();
+
+ std::fstream datafile;
+ datafile.open(filename, std::ios::in | std::ios::binary);
+
+ std::vector<Word2VecRec> to_delete;
+
+ // warm up the tree with initial_insertions number of initially inserted
+ // records
+ size_t warmup_cnt = insert_batch * record_count;
+ warmup<MTree, Word2VecRec>(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true);
+
+ size_t insert_cnt = record_count - warmup_cnt;
+
+ insert_tput_bench<MTree, Word2VecRec>(mtree, datafile, insert_cnt, delete_prop, to_delete, true);
+ //fprintf(stdout, "%ld\t", mtree.get_memory_usage());
+
+// query_latency_bench<MTree, Word2VecRec, de::KNNQueryParms<Word2VecRec>>(mtree, queries);
+ // fprintf(stdout, "\n");
+
+ delete_bench_env();
+ fflush(stdout);
+ fflush(stderr);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/vptree_knn_bench.cpp
index 1655209..a5c45f4 100644
--- a/benchmarks/vptree_knn_bench.cpp
+++ b/benchmarks/vptree_knn_bench.cpp
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
insert_tput_bench<ExtendedVPTree_KNN, Word2VecRec>(de_vp_knn, datafile, insert_cnt, delete_prop, to_delete, true);
fprintf(stdout, "%ld\t", de_vp_knn.get_memory_usage());
-/*
+
query_latency_bench<ExtendedVPTree_KNN, Word2VecRec, de::KNNQueryParms<Word2VecRec>>(de_vp_knn, queries);
fprintf(stdout, "\n");
@@ -50,7 +50,6 @@ int main(int argc, char **argv)
fprintf(stdout, "\n");
delete ts;
- */
delete_bench_env();
fflush(stdout);