diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-04-19 14:40:19 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-04-19 14:40:19 -0400 |
| commit | 34fd8ad935e6359d20a5d6c949e67495d0842f8f (patch) | |
| tree | 234994ef2753bb78c5e619070415415b53d37348 /benchmarks | |
| parent | 7c2f43ff039795576bc0014c367b893fbbaceca4 (diff) | |
| download | dynamic-extension-34fd8ad935e6359d20a5d6c949e67495d0842f8f.tar.gz | |
More trie baseline tests
Diffstat (limited to 'benchmarks')
| -rw-r--r-- | benchmarks/cedar_trie.cpp | 97 | ||||
| -rw-r--r-- | benchmarks/hat_trie.cpp | 98 | ||||
| -rw-r--r-- | benchmarks/louds_insertion_tput.cpp | 112 |
3 files changed, 307 insertions, 0 deletions
diff --git a/benchmarks/cedar_trie.cpp b/benchmarks/cedar_trie.cpp new file mode 100644 index 0000000..7499ce7 --- /dev/null +++ b/benchmarks/cedar_trie.cpp @@ -0,0 +1,97 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <fstream> +#include <sstream> +#include <vector> + +#include "cedar.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector<std::string> strings; + +typedef cedar::da<int> Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; i<end; i++) { + auto res = trie->update(strings[i].c_str(), strings[i].size(), i+1); + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; i<m; i++) { + size_t j = rand() % strings.size(); + + auto res = trie->exactMatchSearch<int>(strings[j].c_str()); + //assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + fprintf(stdout, "%ld\n", trie->total_size()); + + delete trie; + + fflush(stderr); +} + diff --git a/benchmarks/hat_trie.cpp b/benchmarks/hat_trie.cpp new file mode 100644 index 0000000..3b4c7d3 --- /dev/null +++ b/benchmarks/hat_trie.cpp @@ -0,0 +1,98 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <fstream> +#include <sstream> + +#include "htrie_map.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + +std::vector<std::string> strings; + +typedef tsl::htrie_map<char, size_t> Trie; + +void insert_thread(int64_t start, int64_t end, Trie * trie) { + for (uint64_t i=start; i<end; i++) { + auto res = trie->insert(strings[i].c_str(), i+1); + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + auto trie = new Trie(); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), trie); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; i<m; i++) { + size_t j = rand() % strings.size(); + + auto res = trie->find(strings[j]); + if (*res != (j+1)) { + fprintf(stderr, "%ld %d %s\n", j, *res, strings[j].c_str()); + } + //assert(*(res)+1 == j); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", trie->size(), + i_tput, q_lat); + + + delete trie; + + fflush(stderr); +} + diff --git a/benchmarks/louds_insertion_tput.cpp b/benchmarks/louds_insertion_tput.cpp new file mode 100644 index 0000000..d772f3b --- /dev/null +++ b/benchmarks/louds_insertion_tput.cpp @@ -0,0 +1,112 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <fstream> +#include <sstream> + +#include "framework/DynamicExtension.h" +#include "shard/LoudsPatricia.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::LoudsPatricia<Rec> Trie; +typedef de::pl::Query<Rec, Trie> Q; +typedef de::DynamicExtension<Rec, Trie, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; + +std::vector<std::unique_ptr<char[]>> strings; + +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (uint64_t i=start; i<end; i++) { + Rec r = {strings[i].get(), i, strlen(strings[i].get())}; + while (!extension->insert(r)) { + _mm_pause(); + } + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); + + read_data(fname, n); + + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } + + std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12}; + std::vector<size_t> buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000}; + + for (auto &sf : scale_factors) { + for (auto &bf_sz : buffer_sizes) { + + auto extension = new Ext(bf_sz, bf_sz, sf); + + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + size_t m = 100; + TIMER_START(); + for (size_t i=0; i<m; i++) { + size_t j = rand() % strings.size(); + de::pl::Parms<Rec> parms = {strings[j].get()}; + + auto res = extension->query(&parms); + auto ans = res.get(); + } + TIMER_STOP(); + + auto query_time = TIMER_RESULT(); + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; + + fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), + bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); + + delete extension; + + fflush(stderr); + } + } +} + |