From c611e8e56ebe72e09127fff4fb14a08dc3fcb698 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sat, 11 May 2024 12:45:25 -0400 Subject: Added program to sample the binary knn files --- benchmarks/bigann_sample.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 benchmarks/bigann_sample.cpp (limited to 'benchmarks') diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp new file mode 100644 index 0000000..aa12f91 --- /dev/null +++ b/benchmarks/bigann_sample.cpp @@ -0,0 +1,55 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "file_util.h" +#include "benchmark_types.h" + +#include + +typedef ANNRec Rec; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile sampcnt\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + size_t m = atol(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + auto data = read_binary_vector_file(d_fname, n); + + std::vector to_delete(m); + + std::unordered_map> filter; + double ratio = (double) data.size() / (double) m; + size_t j=0; + for (size_t i=0; i