summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt24
-rw-r--r--benchmarks/include/benchmark_types.h13
-rw-r--r--benchmarks/include/file_util.h76
-rw-r--r--benchmarks/include/standard_benchmarks.h19
-rw-r--r--benchmarks/vldb/btree_bench.cpp2
-rw-r--r--benchmarks/vldb/irs_bench.cpp2
-rw-r--r--benchmarks/vldb/mtree_bench_alt.cpp80
-rw-r--r--benchmarks/vldb/vptree_bench_alt.cpp102
-rw-r--r--benchmarks/vldb/vptree_bsm_bench_alt.cpp92
9 files changed, 398 insertions, 12 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b4e801c..7f2e782 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -168,6 +168,14 @@ if (vldb_bench)
target_link_options(vptree_bench PUBLIC -mcx16)
target_compile_options(vptree_bench PUBLIC -fopenmp)
+
+ add_executable(vptree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench_alt.cpp)
+ target_link_libraries(vptree_bench_alt PUBLIC gsl pthread atomic)
+ target_include_directories(vptree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(vptree_bench_alt PUBLIC -mcx16)
+ target_compile_options(vptree_bench_alt PUBLIC -fopenmp)
+
+
add_executable(vptree_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_parmsweep.cpp)
target_link_libraries(vptree_parmsweep PUBLIC gsl pthread atomic)
target_include_directories(vptree_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
@@ -180,6 +188,12 @@ if (vldb_bench)
target_link_options(vptree_bsm_bench PUBLIC -mcx16)
target_compile_options(vptree_bsm_bench PUBLIC -fopenmp)
+ add_executable(vptree_bsm_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench_alt.cpp)
+ target_link_libraries(vptree_bsm_bench_alt PUBLIC gsl pthread atomic)
+ target_include_directories(vptree_bsm_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(vptree_bsm_bench_alt PUBLIC -mcx16)
+ target_compile_options(vptree_bsm_bench_alt PUBLIC -fopenmp)
+
add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp)
target_link_libraries(fst_bench PUBLIC gsl pthread atomic)
target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
@@ -240,11 +254,11 @@ if (vldb_bench)
target_compile_options(alex_bench PUBLIC -fopenmp)
set_property(TARGET alex_bench PROPERTY CXX_STANDARD 14)
- add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp)
- target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp)
- target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
- target_link_options(mtree_bench PUBLIC -mcx16)
- target_compile_options(mtree_bench PUBLIC -fopenmp)
+ add_executable(mtree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench_alt.cpp)
+ target_link_libraries(mtree_bench_alt PUBLIC gsl pthread atomic gomp)
+ target_include_directories(mtree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(mtree_bench_alt PUBLIC -mcx16)
+ target_compile_options(mtree_bench_alt PUBLIC -fopenmp)
endif()
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h
index 13964e8..51fc52d 100644
--- a/benchmarks/include/benchmark_types.h
+++ b/benchmarks/include/benchmark_types.h
@@ -35,6 +35,9 @@ typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract<
const size_t W2V_SIZE = 300;
typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec;
+const size_t ANNSize = 128;
+typedef de::EuclidPoint<uint64_t, ANNSize> ANNRec;
+
struct euclidean_distance {
double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
double dist = 0;
@@ -44,11 +47,21 @@ struct euclidean_distance {
return std::sqrt(dist);
}
+
+ double operator()(const ANNRec &first, const ANNRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<ANNSize; i++) {
+ dist += ((double) first.data[i] - (double) second.data[i]) * ((double) first.data[i] - (double) second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
};
#ifdef _GNU_SOURCE
#include "mtree.h"
typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
+typedef mt::mtree<ANNRec, euclidean_distance> MTree_alt;
#endif
typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM;
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
index 586b44f..41eb18c 100644
--- a/benchmarks/include/file_util.h
+++ b/benchmarks/include/file_util.h
@@ -80,6 +80,46 @@ static std::vector<QP> read_range_queries(std::string &fname, double selectivity
return queries;
}
+
+template <typename QP>
+static std::vector<QP> read_binary_knn_queries(std::string fname, size_t k, size_t n) {
+ std::vector<QP> queries;
+ queries.reserve(n);
+
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ for (size_t i=0; i<n; i++) {
+ QP query;
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ query.point.data[j] = val;
+ }
+ query.k = k;
+ queries.push_back(query);
+ }
+
+ return queries;
+}
+
+
template <typename QP>
static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
std::vector<QP> queries;
@@ -192,6 +232,42 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) {
return records;
}
+template <typename R>
+static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ int32_t dim;
+ int32_t cnt;
+
+ file.read((char*) &(cnt), sizeof(cnt));
+ file.read((char*) &(dim), sizeof(dim));
+
+ if (n > cnt) {
+ n = cnt;
+ }
+
+ R rec;
+ for (size_t i=0; i<n; i++) {
+ for (size_t j=0; j<dim; j++) {
+ uint64_t val;
+ file.read((char*) &(val), sizeof(uint64_t));
+ rec.data[j] = val;
+ }
+
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
index 1261a4c..b805c08 100644
--- a/benchmarks/include/standard_benchmarks.h
+++ b/benchmarks/include/standard_benchmarks.h
@@ -55,7 +55,16 @@ static void run_queries(DE *extension, std::vector<QP> &queries) {
r.data[1], r.data[2], r.data[3], r.data[4], r.data[5]);
}
#endif
- } else if constexpr (std::is_same_v<PGM, DE>) {
+ } else if constexpr (std::is_same_v<MTree_alt, DE>) {
+ std::vector<ANNRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+ }else if constexpr (std::is_same_v<PGM, DE>) {
size_t tot = 0;
auto ptr = extension->find(queries[i].lower_bound);
while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) {
@@ -180,7 +189,7 @@ static void insert_records(DE *structure, size_t start, size_t stop,
if constexpr (std::is_same_v<BenchBTree, DE>) {
structure->insert(records[i]);
- } else if constexpr (std::is_same_v<MTree, DE>) {
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
structure->add(records[i]);
} else if constexpr (std::is_same_v<PGM, DE>) {
structure->insert_or_assign(records[i].key, records[i].value);
@@ -196,7 +205,7 @@ static void insert_records(DE *structure, size_t start, size_t stop,
if constexpr (std::is_same_v<BenchBTree, DE>) {
structure->erase_one(records[to_delete[delete_idx]].key);
- } else if constexpr (std::is_same_v<MTree, DE>) {
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
structure->remove(records[to_delete[delete_idx]]);
} else if constexpr (std::is_same_v<PGM, DE>) {
structure->erase(records[to_delete[delete_idx]].key);
@@ -255,7 +264,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
if constexpr (std::is_same_v<BenchBTree, DE>) {
de_index.erase_one(delete_vec[delete_idx++].key);
#ifdef _GNU_SOURCE
- } else if constexpr (std::is_same_v<MTree, DE>) {
+ } else if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
de_index.remove(delete_vec[delete_idx++]);
#endif
} else {
@@ -266,7 +275,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
// insert the record;
#ifdef _GNU_SOURCE
- if constexpr (std::is_same_v<MTree, DE>) {
+ if constexpr (std::is_same_v<MTree, DE> || std::is_same_v<MTree_alt, DE>) {
de_index.add(insert_vec[i]);
} else {
de_index.insert(insert_vec[i]);
diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp
index 673da33..fa72831 100644
--- a/benchmarks/vldb/btree_bench.cpp
+++ b/benchmarks/vldb/btree_bench.cpp
@@ -57,7 +57,7 @@ int main(int argc, char **argv) {
}
/* warmup structure w/ 10% of records */
- size_t warmup = .3 * n;
+ size_t warmup = .1 * n;
size_t delete_idx = 0;
insert_records<BenchBTree, Rec>(&btree, 0, warmup, data, to_delete, delete_idx, false, rng);
diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp
index f68de63..e062e80 100644
--- a/benchmarks/vldb/irs_bench.cpp
+++ b/benchmarks/vldb/irs_bench.cpp
@@ -56,7 +56,7 @@ int main(int argc, char **argv) {
}
/* warmup structure w/ 10% of records */
- size_t warmup = .3 * n;
+ size_t warmup = .1 * n;
size_t delete_idx = 0;
insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp
new file mode 100644
index 0000000..6b08df7
--- /dev/null
+++ b/benchmarks/vldb/mtree_bench_alt.cpp
@@ -0,0 +1,80 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "query/knn.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto mtree = new MTree_alt();
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<MTree_alt, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<MTree_alt, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<MTree_alt, QP>(mtree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
+
+ gsl_rng_free(rng);
+ delete mtree;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp
new file mode 100644
index 0000000..b09ee7d
--- /dev/null
+++ b/benchmarks/vldb/vptree_bench_alt.cpp
@@ -0,0 +1,102 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1400, 1400, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ fprintf(stderr, "Running Static query tests\n\n");
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+
diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
new file mode 100644
index 0000000..63baf8b
--- /dev/null
+++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
@@ -0,0 +1,92 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef ANNRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(1, 1400, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t\t%ld\n", insert_throughput, query_latency, ext_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+ fflush(stdout);
+}
+