summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt6
-rw-r--r--benchmarks/bigann_sample.cpp55
2 files changed, 61 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9b65b8b..c6dad75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -260,6 +260,12 @@ if (vldb_bench)
target_link_options(mtree_bench_alt PUBLIC -mcx16)
target_compile_options(mtree_bench_alt PUBLIC -fopenmp)
+ add_executable(bigann_sample ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/bigann_sample.cpp)
+ target_link_libraries(bigann_sample PUBLIC gsl pthread atomic gomp)
+ target_include_directories(bigann_sample PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(bigann_sample PUBLIC -mcx16)
+ target_compile_options(bigann_sample PUBLIC -fopenmp)
+
add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp)
target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp)
target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp
new file mode 100644
index 0000000..aa12f91
--- /dev/null
+++ b/benchmarks/bigann_sample.cpp
@@ -0,0 +1,55 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "file_util.h"
+#include "benchmark_types.h"
+
+#include <gsl/gsl_rng.h>
+
+typedef ANNRec Rec;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile sampcnt\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ size_t m = atol(argv[3]);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ std::vector<size_t> to_delete(m);
+
+ std::unordered_map<Rec, size_t, de::RecordHash<Rec>> filter;
+ double ratio = (double) data.size() / (double) m;
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= ratio && filter.find(data[i]) == filter.end()) {
+ to_delete[j++] = i;
+ filter.insert({data[i], i});
+ }
+ }
+
+ for (size_t i=0; i<to_delete.size(); i++) {
+ for (size_t j=0; j<ANNSize; j++ ) {
+ fprintf(stdout, "%ld ", data[to_delete[i]].data[j]);
+ }
+ fprintf(stdout, "\n");
+ }
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+ fflush(stdout);
+}
+