summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-05-11 12:45:25 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2024-05-11 12:45:25 -0400
commitc611e8e56ebe72e09127fff4fb14a08dc3fcb698 (patch)
tree7fb65b3a82eb21f5788153fb3693553f05286228 /benchmarks
parentab0ab297959fcca370e80670e17f90a780607a80 (diff)
downloaddynamic-extension-c611e8e56ebe72e09127fff4fb14a08dc3fcb698.tar.gz
Added program to sample the binary knn files
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/bigann_sample.cpp55
1 files changed, 55 insertions, 0 deletions
diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp
new file mode 100644
index 0000000..aa12f91
--- /dev/null
+++ b/benchmarks/bigann_sample.cpp
@@ -0,0 +1,55 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "file_util.h"
+#include "benchmark_types.h"
+
+#include <gsl/gsl_rng.h>
+
+typedef ANNRec Rec;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile sampcnt\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ size_t m = atol(argv[3]);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+ auto data = read_binary_vector_file<Rec>(d_fname, n);
+
+ std::vector<size_t> to_delete(m);
+
+ std::unordered_map<Rec, size_t, de::RecordHash<Rec>> filter;
+ double ratio = (double) data.size() / (double) m;
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= ratio && filter.find(data[i]) == filter.end()) {
+ to_delete[j++] = i;
+ filter.insert({data[i], i});
+ }
+ }
+
+ for (size_t i=0; i<to_delete.size(); i++) {
+ for (size_t j=0; j<ANNSize; j++ ) {
+ fprintf(stdout, "%ld ", data[to_delete[i]].data[j]);
+ }
+ fprintf(stdout, "\n");
+ }
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+ fflush(stdout);
+}
+