diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-11 12:45:25 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-11 12:45:25 -0400 |
| commit | c611e8e56ebe72e09127fff4fb14a08dc3fcb698 (patch) | |
| tree | 7fb65b3a82eb21f5788153fb3693553f05286228 | |
| parent | ab0ab297959fcca370e80670e17f90a780607a80 (diff) | |
| download | dynamic-extension-c611e8e56ebe72e09127fff4fb14a08dc3fcb698.tar.gz | |
Added program to sample the binary knn files
| -rw-r--r-- | CMakeLists.txt | 6 | ||||
| -rw-r--r-- | benchmarks/bigann_sample.cpp | 55 |
2 files changed, 61 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b65b8b..c6dad75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,12 @@ if (vldb_bench) target_link_options(mtree_bench_alt PUBLIC -mcx16) target_compile_options(mtree_bench_alt PUBLIC -fopenmp) + add_executable(bigann_sample ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/bigann_sample.cpp) + target_link_libraries(bigann_sample PUBLIC gsl pthread atomic gomp) + target_include_directories(bigann_sample PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(bigann_sample PUBLIC -mcx16) + target_compile_options(bigann_sample PUBLIC -fopenmp) + add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp) target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp) target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) diff --git a/benchmarks/bigann_sample.cpp b/benchmarks/bigann_sample.cpp new file mode 100644 index 0000000..aa12f91 --- /dev/null +++ b/benchmarks/bigann_sample.cpp @@ -0,0 +1,55 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "file_util.h" +#include "benchmark_types.h" + +#include <gsl/gsl_rng.h> + +typedef ANNRec Rec; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile sampcnt\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + size_t m = atol(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + std::vector<size_t> to_delete(m); + + std::unordered_map<Rec, size_t, de::RecordHash<Rec>> filter; + double ratio = (double) data.size() / (double) m; + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= ratio && filter.find(data[i]) == filter.end()) { + to_delete[j++] = i; + filter.insert({data[i], i}); + } + } + + for (size_t i=0; i<to_delete.size(); i++) { + for (size_t j=0; j<ANNSize; j++ ) { + fprintf(stdout, "%ld ", data[to_delete[i]].data[j]); + } + fprintf(stdout, "\n"); + } + + gsl_rng_free(rng); + fflush(stderr); + fflush(stdout); +} + |