diff options
| -rw-r--r-- | CMakeLists.txt | 24 | ||||
| -rw-r--r-- | benchmarks/include/benchmark_types.h | 13 | ||||
| -rw-r--r-- | benchmarks/include/file_util.h | 76 | ||||
| -rw-r--r-- | benchmarks/include/standard_benchmarks.h | 19 | ||||
| -rw-r--r-- | benchmarks/vldb/mtree_bench_alt.cpp | 80 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bench_alt.cpp | 102 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bsm_bench_alt.cpp | 92 |
7 files changed, 396 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index b4e801c..7f2e782 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,6 +168,14 @@ if (vldb_bench) target_link_options(vptree_bench PUBLIC -mcx16) target_compile_options(vptree_bench PUBLIC -fopenmp) + + add_executable(vptree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench_alt.cpp) + target_link_libraries(vptree_bench_alt PUBLIC gsl pthread atomic) + target_include_directories(vptree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(vptree_bench_alt PUBLIC -mcx16) + target_compile_options(vptree_bench_alt PUBLIC -fopenmp) + + add_executable(vptree_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_parmsweep.cpp) target_link_libraries(vptree_parmsweep PUBLIC gsl pthread atomic) target_include_directories(vptree_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) @@ -180,6 +188,12 @@ if (vldb_bench) target_link_options(vptree_bsm_bench PUBLIC -mcx16) target_compile_options(vptree_bsm_bench PUBLIC -fopenmp) + add_executable(vptree_bsm_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench_alt.cpp) + target_link_libraries(vptree_bsm_bench_alt PUBLIC gsl pthread atomic) + target_include_directories(vptree_bsm_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(vptree_bsm_bench_alt PUBLIC -mcx16) + target_compile_options(vptree_bsm_bench_alt PUBLIC -fopenmp) + add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp) target_link_libraries(fst_bench PUBLIC gsl pthread atomic) target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) @@ -240,11 +254,11 @@ if (vldb_bench) target_compile_options(alex_bench PUBLIC -fopenmp) set_property(TARGET alex_bench PROPERTY CXX_STANDARD 14) - add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp) - target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp) - target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) - target_link_options(mtree_bench PUBLIC -mcx16) - target_compile_options(mtree_bench PUBLIC -fopenmp) + add_executable(mtree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench_alt.cpp) + target_link_libraries(mtree_bench_alt PUBLIC gsl pthread atomic gomp) + target_include_directories(mtree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(mtree_bench_alt PUBLIC -mcx16) + target_compile_options(mtree_bench_alt PUBLIC -fopenmp) endif() diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h index 13964e8..51fc52d 100644 --- a/benchmarks/include/benchmark_types.h +++ b/benchmarks/include/benchmark_types.h @@ -35,6 +35,9 @@ typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract< const size_t W2V_SIZE = 300; typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec; +const size_t ANNSize = 128; +typedef de::EuclidPoint<uint64_t, ANNSize> ANNRec; + struct euclidean_distance { double operator()(const Word2VecRec &first, const Word2VecRec &second) const { double dist = 0; @@ -44,11 +47,21 @@ struct euclidean_distance { return std::sqrt(dist); } + + double operator()(const ANNRec &first, const ANNRec &second) const { + double dist = 0; + for (size_t i=0; i<ANNSize; i++) { + dist += ((double) first.data[i] - (double) second.data[i]) * ((double) first.data[i] - (double) second.data[i]); + } + + return std::sqrt(dist); + } }; #ifdef _GNU_SOURCE #include "mtree.h" typedef mt::mtree<Word2VecRec, euclidean_distance> MTree; +typedef mt::mtree<ANNRec, euclidean_distance> MTree_alt; #endif typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM; diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 586b44f..41eb18c 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -80,6 +80,46 @@ static std::vector<QP> read_range_queries(std::string &fname, double selectivity return queries; } + +template <typename QP> +static std::vector<QP> read_binary_knn_queries(std::string fname, size_t k, size_t n) { + std::vector<QP> queries; + queries.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + for (size_t i=0; i<n; i++) { + QP query; + for (size_t j=0; j<dim; j++) { + uint64_t val; + file.read((char*) &(val), sizeof(uint64_t)); + query.point.data[j] = val; + } + query.k = k; + queries.push_back(query); + } + + return queries; +} + + template <typename QP> static std::vector<QP> read_knn_queries(std::string fname, size_t k) { std::vector<QP> queries; @@ -192,6 +232,42 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) { return records; } +template <typename R> +static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector<R> records; + records.reserve(n); + + int32_t dim; + int32_t cnt; + + file.read((char*) &(cnt), sizeof(cnt)); + file.read((char*) &(dim), sizeof(dim)); + + if (n > cnt) { + n = cnt; + } + + R rec; + for (size_t i=0; i<n; i++) { + for (size_t j=0; j<dim; j++) { + uint64_t val; + file.read((char*) &(val), sizeof(uint64_t)); + rec.data[j] = val; + } + + records.emplace_back(rec); + } + + return records; +} static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index 1261a4c..b805c08 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -55,7 +55,16 @@ static void run_queries(DE *extension, std::vector<QP> &queries) { r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]); } #endif - } else if constexpr (std::is_same_v<PGM, DE>) { + } else if constexpr (std::is_same_v<MTree_alt, DE>) { + std::vector<ANNRec> result; + auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k); + + auto itr = res.begin(); + while (itr != res.end()) { + result.emplace_back(itr->data); + itr++; + } + }else if constexpr (std::is_same_v<PGM, DE>) { size_t tot = 0; auto ptr = extension->find(queries[i].lower_bound); while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) { @@ -180,7 +189,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v<BenchBTree, DE>) { structure->insert(records[i]); - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { structure->add(records[i]); } else if constexpr (std::is_same_v<PGM, DE>) { structure->insert_or_assign(records[i].key, records[i].value); @@ -196,7 +205,7 @@ static void insert_records(DE *structure, size_t start, size_t stop, if constexpr (std::is_same_v<BenchBTree, DE>) { structure->erase_one(records[to_delete[delete_idx]].key); - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { structure->remove(records[to_delete[delete_idx]]); } else if constexpr (std::is_same_v<PGM, DE>) { structure->erase(records[to_delete[delete_idx]].key); @@ -255,7 +264,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn if constexpr (std::is_same_v<BenchBTree, DE>) { de_index.erase_one(delete_vec[delete_idx++].key); #ifdef _GNU_SOURCE - } else if constexpr (std::is_same_v<MTree, DE>) { + } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { de_index.remove(delete_vec[delete_idx++]); #endif } else { @@ -266,7 +275,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn // insert the record; #ifdef _GNU_SOURCE - if constexpr (std::is_same_v<MTree, DE>) { + if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) { de_index.add(insert_vec[i]); } else { de_index.insert(insert_vec[i]); diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp new file mode 100644 index 0000000..6b08df7 --- /dev/null +++ b/benchmarks/vldb/mtree_bench_alt.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree_alt(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<MTree_alt, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<MTree_alt, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<MTree_alt, QP>(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp new file mode 100644 index 0000000..b09ee7d --- /dev/null +++ b/benchmarks/vldb/vptree_bench_alt.cpp @@ -0,0 +1,102 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1400, 1400, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp new file mode 100644 index 0000000..63baf8b --- /dev/null +++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp @@ -0,0 +1,92 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1, 1400, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + |