From 147f0df58e1ff4973bffb7e4628e6b2fdc20eb57 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 22 Mar 2024 14:04:40 -0400 Subject: FSTrie testing and debugging --- benchmarks/string_insertion_tput.cpp | 92 ++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 benchmarks/string_insertion_tput.cpp (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp new file mode 100644 index 0000000..d205175 --- /dev/null +++ b/benchmarks/string_insertion_tput.cpp @@ -0,0 +1,92 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" +#include "psu-util/progress.h" + + +typedef de::Record Rec; +typedef de::FSTrie Trie; +typedef de::rq::Query Q; +typedef de::DynamicExtension Ext; //, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; + +std::vector strings; + +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (uint64_t i=start; iinsert(r)) { + _mm_pause(); + } + } +} + +void read_data(std::string fname, size_t n=10000000) { + strings.reserve(n); + + std::fstream file; + file.open(fname, std::ios::in); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(line); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } +} + +int main(int argc, char **argv) { + size_t n = 100000000; + + std::vector counts = {1 , 2, 4, 8}; //, 16, 32, 64}; + // + read_data("benchmarks/data/ursa-genome.txt", n); + + fprintf(stderr, "Finished reading from file.\n"); + + for (auto thread_count : counts) { + + auto extension = new Ext(1000, 12000, 8); + + size_t per_thread = n / thread_count; + + std::thread threads[thread_count]; + + TIMER_INIT(); + TIMER_START(); + for (size_t i=0; iget_record_count(), + thread_count, tput); + + delete extension; + } + + fflush(stderr); +} + -- cgit v1.2.3 From 7e7fd9f7339eee2f1ae974c662a447532dfb1b1a Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Tue, 26 Mar 2024 16:35:12 -0400 Subject: Updated FSTrie benchmark and some minor fixes --- benchmarks/string_insertion_tput.cpp | 73 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 28 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index d205175..e41e996 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -9,7 +9,7 @@ #include "framework/DynamicExtension.h" #include "shard/FSTrie.h" -#include "query/rangequery.h" +#include "query/pointlookup.h" #include "framework/interface/Record.h" #include "psu-util/timer.h" @@ -18,8 +18,8 @@ typedef de::Record Rec; typedef de::FSTrie Trie; -typedef de::rq::Query Q; -typedef de::DynamicExtension Ext; //, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; std::vector strings; @@ -47,45 +47,62 @@ void read_data(std::string fname, size_t n=10000000) { } } +void usage(char *name) { + fprintf(stderr, "Usage:\n%s datafile record_count\n", name); +} + int main(int argc, char **argv) { - size_t n = 100000000; - std::vector counts = {1 , 2, 4, 8}; //, 16, 32, 64}; - // - read_data("benchmarks/data/ursa-genome.txt", n); + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } - fprintf(stderr, "Finished reading from file.\n"); + std::string fname = std::string(argv[1]); + size_t n = atol(argv[2]); - for (auto thread_count : counts) { + read_data(fname, n); - auto extension = new Ext(1000, 12000, 8); + if (strings.size() == 0) { + fprintf(stderr, "[E]: No string data read from file. Aborting execution.\n"); + } else { + fprintf(stderr, "Finished reading from file.\n"); + } - size_t per_thread = n / thread_count; + auto extension = new Ext(1000, 12000, 8); - std::thread threads[thread_count]; + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); - TIMER_INIT(); - TIMER_START(); - for (size_t i=0; i parms; + parms.search_key = strings[j]; - TIMER_STOP(); + auto res = extension->query(&parms); + auto ans = res.get(); - auto total_time = TIMER_RESULT(); + assert(ans[0].value == j); + } + TIMER_STOP(); - double tput = (double) n / (double) total_time * 1e9; + auto query_time = TIMER_RESULT(); - fprintf(stdout, "%ld\t%d\t%lf\n", extension->get_record_count(), - thread_count, tput); - delete extension; - } + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = total_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\n", extension->get_record_count(), + i_tput, q_lat); + + + delete extension; fflush(stderr); } -- cgit v1.2.3 From 1209553e9b44c355f38736fa53d4130ffff937f0 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 11 Apr 2024 12:23:29 -0400 Subject: trie_bench: Added static query latency --- benchmarks/string_insertion_tput.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index e41e996..5a46a80 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -93,13 +93,31 @@ int main(int argc, char **argv) { TIMER_STOP(); auto query_time = TIMER_RESULT(); + + auto shard = extension->create_static_structure(); + TIMER_START(); + for (size_t i=0; i parms; + parms.search_key = strings[j]; + + auto res = Q::query(shard, nullptr, &parms); + } + TIMER_STOP(); + + auto shard_query_time = TIMER_RESULT(); double i_tput = (double) n / (double) total_time * 1e9; - size_t q_lat = total_time / m; + size_t q_lat = query_time / m; + size_t s_q_lat = shard_query_time / m; + + fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\n", extension->get_record_count(), + i_tput, q_lat, s_q_lat); + + + - fprintf(stdout, "%ld\t\t%lf\t%ld\n", extension->get_record_count(), - i_tput, q_lat); delete extension; -- cgit v1.2.3 From 428658bc76b5b9eec46d3b7e415b5d114ddd3f79 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 12:50:26 -0400 Subject: Print size statistics --- benchmarks/string_insertion_tput.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 5a46a80..4923b09 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -88,6 +88,10 @@ int main(int argc, char **argv) { auto res = extension->query(&parms); auto ans = res.get(); + if (ans[0].value != j) { + fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].c_str()); + } + assert(ans[0].value == j); } TIMER_STOP(); @@ -103,6 +107,10 @@ int main(int argc, char **argv) { parms.search_key = strings[j]; auto res = Q::query(shard, nullptr, &parms); + + if (res[0].rec.value != j) { + fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].c_str()); + } } TIMER_STOP(); @@ -112,15 +120,11 @@ int main(int argc, char **argv) { size_t q_lat = query_time / m; size_t s_q_lat = shard_query_time / m; - fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\n", extension->get_record_count(), - i_tput, q_lat, s_q_lat); - - - - - + fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), + i_tput, q_lat, s_q_lat, extension->get_memory_usage(), shard->get_memory_usage()); delete extension; + delete shard; fflush(stderr); } -- cgit v1.2.3 From b25beb13773072c3b143842b45a7c32a1108f347 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 15 Apr 2024 14:00:27 -0400 Subject: Updated FSTrie to use const char * instead of std::string Note: this requires the caller to manage the memory of the strings --- benchmarks/string_insertion_tput.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 4923b09..f4a519a 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -16,16 +16,16 @@ #include "psu-util/progress.h" -typedef de::Record Rec; +typedef de::Record Rec; typedef de::FSTrie Trie; typedef de::pl::Query Q; typedef de::DynamicExtension Ext; -std::vector strings; +std::vector> strings; void insert_thread(int64_t start, int64_t end, Ext *extension) { for (uint64_t i=start; iinsert(r)) { _mm_pause(); } @@ -41,7 +41,7 @@ void read_data(std::string fname, size_t n=10000000) { size_t i=0; std::string line; while (i < n && std::getline(file, line, '\n')) { - strings.emplace_back(line); + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); i++; psudb::progress_update((double) i / (double) n, "Reading file:"); } @@ -82,14 +82,13 @@ int main(int argc, char **argv) { TIMER_START(); for (size_t i=0; i parms; - parms.search_key = strings[j]; + de::pl::Parms parms = {strings[j].get()}; auto res = extension->query(&parms); auto ans = res.get(); if (ans[0].value != j) { - fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].c_str()); + fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].get()); } assert(ans[0].value == j); @@ -103,13 +102,12 @@ int main(int argc, char **argv) { TIMER_START(); for (size_t i=0; i parms; - parms.search_key = strings[j]; + de::pl::Parms parms = {strings[j].get()}; auto res = Q::query(shard, nullptr, &parms); if (res[0].rec.value != j) { - fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].c_str()); + fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].get()); } } TIMER_STOP(); -- cgit v1.2.3 From 4a1dde3148e0e84b47c884bc0bb69c60678b4558 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 15:09:07 -0400 Subject: Benchmark update+reorganization The Alex benchmark isn't updated yet. --- benchmarks/string_insertion_tput.cpp | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index f4a519a..8fa7f44 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -69,7 +69,13 @@ int main(int argc, char **argv) { fprintf(stderr, "Finished reading from file.\n"); } - auto extension = new Ext(1000, 12000, 8); + std::vector scale_factors = {2, 4, 6, 8, 10, 12}; + std::vector buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000}; + + for (auto &sf : scale_factors) { + for (auto &bf_sz : buffer_sizes) { + + auto extension = new Ext(bf_sz, bf_sz, sf); TIMER_INIT(); TIMER_START(); @@ -97,33 +103,15 @@ int main(int argc, char **argv) { auto query_time = TIMER_RESULT(); - - auto shard = extension->create_static_structure(); - TIMER_START(); - for (size_t i=0; i parms = {strings[j].get()}; - - auto res = Q::query(shard, nullptr, &parms); - - if (res[0].rec.value != j) { - fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].get()); - } - } - TIMER_STOP(); - - auto shard_query_time = TIMER_RESULT(); - double i_tput = (double) n / (double) total_time * 1e9; size_t q_lat = query_time / m; - size_t s_q_lat = shard_query_time / m; - fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), - i_tput, q_lat, s_q_lat, extension->get_memory_usage(), shard->get_memory_usage()); + fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), + bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); delete extension; - delete shard; + }} fflush(stderr); } -- cgit v1.2.3 From 764a9c41cad220513523afb6b610b2bdf74e5476 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 14 May 2024 16:00:17 -0400 Subject: Poplar Trie: updated benchmark to standard format --- benchmarks/string_insertion_tput.cpp | 57 +++++++++++++++++------------------- 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'benchmarks/string_insertion_tput.cpp') diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp index 8fa7f44..c439cb3 100644 --- a/benchmarks/string_insertion_tput.cpp +++ b/benchmarks/string_insertion_tput.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "framework/DynamicExtension.h" #include "shard/FSTrie.h" @@ -21,7 +22,6 @@ typedef de::FSTrie Trie; typedef de::pl::Query Q; typedef de::DynamicExtension Ext; -std::vector> strings; void insert_thread(int64_t start, int64_t end, Ext *extension) { for (uint64_t i=start; i>read_strings(std::string fname, size_t n=10000000) { + std::vector> strings; strings.reserve(n); std::fstream file; @@ -45,6 +46,8 @@ void read_data(std::string fname, size_t n=10000000) { i++; psudb::progress_update((double) i / (double) n, "Reading file:"); } + + return strings; } void usage(char *name) { @@ -74,44 +77,38 @@ int main(int argc, char **argv) { for (auto &sf : scale_factors) { for (auto &bf_sz : buffer_sizes) { + auto extension = new Ext(bf_sz, bf_sz, sf); - auto extension = new Ext(bf_sz, bf_sz, sf); - - TIMER_INIT(); - TIMER_START(); - insert_thread(0, strings.size(), extension); - TIMER_STOP(); - - auto total_time = TIMER_RESULT(); + TIMER_INIT(); + TIMER_START(); + insert_thread(0, strings.size(), extension); + TIMER_STOP(); - size_t m = 100; - TIMER_START(); - for (size_t i=0; i parms = {strings[j].get()}; + auto total_time = TIMER_RESULT(); - auto res = extension->query(&parms); - auto ans = res.get(); - - if (ans[0].value != j) { - fprintf(stderr, "ext:\t%ld %ld %s\n", ans[0].value, j, strings[j].get()); - } + size_t m = 100; + TIMER_START(); + for (size_t i=0; i parms = {strings[j].get()}; - assert(ans[0].value == j); - } - TIMER_STOP(); + auto res = extension->query(&parms); + auto ans = res.get(); + } + TIMER_STOP(); - auto query_time = TIMER_RESULT(); - - double i_tput = (double) n / (double) total_time * 1e9; - size_t q_lat = query_time / m; + auto query_time = TIMER_RESULT(); + + double i_tput = (double) n / (double) total_time * 1e9; + size_t q_lat = query_time / m; fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(), bf_sz, sf, i_tput, q_lat, extension->get_memory_usage()); - delete extension; + delete extension; - }} + } + } fflush(stderr); } -- cgit v1.2.3