diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-09 14:10:29 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-09 14:10:29 -0400 |
| commit | 265610435e1164a9acc39ca02ea1139acd37c46c (patch) | |
| tree | 1308cb37c141a0961837c18793214d004e592158 /benchmarks/include | |
| parent | a23bc3341923509be9b2f587ece8cd5a650f6386 (diff) | |
| download | dynamic-extension-265610435e1164a9acc39ca02ea1139acd37c46c.tar.gz | |
Added benchmarks for BigANN
Diffstat (limited to 'benchmarks/include')
| -rw-r--r-- | benchmarks/include/benchmark_types.h | 13 | ||||
| -rw-r--r-- | benchmarks/include/file_util.h | 76 | ||||
| -rw-r--r-- | benchmarks/include/standard_benchmarks.h | 19 |
3 files changed, 103 insertions, 5 deletions
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 13964e8..51fc52d 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -35,6 +35,9 @@ typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract< const size_t W2V_SIZE = 300; typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec; +const size_t ANNSize = 128; +typedef de::EuclidPoint<uint64_t, ANNSize> ANNRec; + struct euclidean_distance { double operator()(const Word2VecRec &first, const Word2VecRec &second) const { double dist = 0; @@ -44,11 +47,21 @@ struct euclidean_distance { return std::sqrt(dist); } + + double operator()(const ANNRec &first, const ANNRec &second) const { + double dist = 0; + for (size_t i=0; i<ANNSize; i++) { + dist += ((double) first.data[i] - (double) second.data[i]) * ((double) first.data[i] - (double) second.data[i]); + } + + return std::sqrt(dist); + } }; #ifdef _GNU_SOURCE #include "mtree.h" typedef mt::mtree<Word2VecRec, euclidean_distance> MTree; +typedef mt::mtree<ANNRec, euclidean_distance> MTree_alt; #endif typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM; diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 586b44f..41eb18c 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -80,6 +80,46 @@ static std::vector<QP> read_range_queries(std::string &fname, double selectivity return queries; } + +template <typename QP> +static std::vector<QP> read_binary_knn_queries(std::string fname, size_t k, size_t n) { + std::vector<QP> queries; + queries.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + for (size_t i=0; i<n; i++) { + QP query; + for (size_t j=0; j<dim; j++) { + uint64_t val; + file.read((char*) &(val), sizeof(uint64_t)); + query.point.data[j] = val; + } + query.k = k; + queries.push_back(query); + } + + return queries; +} + + template <typename QP> static std::vector<QP> read_knn_queries(std::string fname, size_t k) { std::vector<QP> queries; @@ -192,6 +232,42 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) { return records; } +template <typename R> +static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector<R> records; + records.reserve(n); + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + R rec; + for (size_t i=0; i<n; i++) { + for (size_t j=0; j<dim; j++) { + uint64_t val; + file.read((char*) &(val), sizeof(uint64_t)); + rec.data[j] = val; + } + + records.emplace_back(rec); + } + + return records; +} static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 1261a4c..b805c08 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -55,7 +55,16 @@ static void run_queries(DE *extension, std::vector<QP> &queries) { r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); } #endif - } else if constexpr (std::is_same_v<PGM, DE>) { + } else if constexpr (std::is_same_v<MTree_alt, DE>) { + std::vector<ANNRec> result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + }else if constexpr (std::is_same_v<PGM, DE>) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { @@ -180,7 +189,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v<BenchBTree, DE>) { structure->insert(records[i]); - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { structure->add(records[i]); } else if constexpr (std::is_same_v<PGM, DE>) { structure->insert_or_assign(records[i].key, records[i].value); @@ -196,7 +205,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v<BenchBTree, DE>) { structure->erase_one(records[to_delete[delete_idx]].key); - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { structure->remove(records[to_delete[delete_idx]]); } else if constexpr (std::is_same_v<PGM, DE>) { structure->erase(records[to_delete[delete_idx]].key); @@ -255,7 +264,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if constexpr (std::is_same_v<BenchBTree, DE>) { de_index.erase_one(delete_vec[delete_idx++].key); #ifdef _GNU_SOURCE - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { de_index.remove(delete_vec[delete_idx++]); #endif } else { @@ -266,7 +275,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn // insert the record; #ifdef _GNU_SOURCE - if constexpr (std::is_same_v<MTree, DE>) { + if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); |