From 0cf96983011bc05a2ed275d3588e41aa4fe3c7a1 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 10:22:21 -0400 Subject: Added a dynamic trie benchmark --- .gitmodules | 3 ++ CMakeLists.txt | 4 ++ benchmarks/poplar_trie.cpp | 98 ++++++++++++++++++++++++++++++++++++++++++++++ external/poplar-trie | 1 + 4 files changed, 106 insertions(+) create mode 100644 benchmarks/poplar_trie.cpp create mode 160000 external/poplar-trie diff --git a/.gitmodules b/.gitmodules index aac5eb8..e5edcb6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -31,3 +31,6 @@ [submodule "external/fast_succinct_trie"] path = external/fast_succinct_trie url = git@github.com:dbrumbaugh/fast_succinct_trie.git +[submodule "external/poplar-trie"] + path = external/poplar-trie + url = git@github.com:kampersanda/poplar-trie.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8395fc7..314fc8c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,6 +177,10 @@ if (bench) target_include_directories(insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) target_link_options(insert_query_tput PUBLIC -mcx16) + add_executable(poplar_trie ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/poplar_trie.cpp) + target_link_libraries(poplar_trie PUBLIC gsl pthread atomic) + target_include_directories(poplar_trie PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include external/poplar-trie/include) + target_link_options(poplar_trie PUBLIC -mcx16) #add_executable(btree_insert_query_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/btree_insert_query_tput.cpp) #target_link_libraries(btree_insert_query_tput PUBLIC gsl cblas pthread atomic) diff --git a/benchmarks/poplar_trie.cpp b/benchmarks/poplar_trie.cpp new file mode 100644 index 0000000..6a04cb9 --- /dev/null +++ b/benchmarks/poplar_trie.cpp @@ -0,0 +1,98 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "poplar.hpp" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector strings; + +typedef poplar::plain_bonsai_map Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; iupdate(strings[i]); + *res = i+1; + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; ifind(strings[j]); + if (*res != j-1) { + fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str()); + } + assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + delete trie; + + fflush(stderr); +} + diff --git a/external/poplar-trie b/external/poplar-trie new file mode 160000 index 0000000..4d8aaa9 --- /dev/null +++ b/external/poplar-trie @@ -0,0 +1 @@ +Subproject commit 4d8aaa9d1e84ba10dc338c1a5acc590bb6d86c50 -- cgit v1.2.3