From 675cf7f7558ebaef15f398d90cc3d1d91457b219 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 3 May 2024 11:01:47 -0400 Subject: FST benchmarks --- benchmarks/include/file_util.h | 45 ++++++++++++++++- benchmarks/vldb/fst_bench.cpp | 100 ++++++++++++++++++++++++++++++++++++++ benchmarks/vldb/fst_bsm_bench.cpp | 100 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 benchmarks/vldb/fst_bench.cpp create mode 100644 benchmarks/vldb/fst_bsm_bench.cpp (limited to 'benchmarks') diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include @@ -7,8 +9,10 @@ #include #include #include +#include + +#include "psu-util/progress.h" -#pragma once template static std::vector read_lookup_queries(std::string fname, double selectivity) { @@ -35,6 +39,20 @@ static std::vector read_lookup_queries(std::string fname, double selectivity return queries; } +template +static std::vector generate_string_lookup_queries(std::vector> &strings, size_t cnt, gsl_rng *rng) { + std::vector queries; + + for (size_t i=0; i static std::vector read_range_queries(std::string &fname, double selectivity) { std::vector queries; @@ -173,3 +191,28 @@ static std::vector read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp new file mode 100644 index 0000000..276a922 --- /dev/null +++ b/benchmarks/vldb/fst_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::FSTrie Shard; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; +typedef de::pl::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries(strings, 1000, rng); + + std::vector data; + for (size_t i=0; i to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp new file mode 100644 index 0000000..15a441a --- /dev/null +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::FSTrie Shard; +typedef de::pl::Query Q; +typedef de::DynamicExtension Ext; +typedef de::pl::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries(strings, 1000, rng); + + std::vector data; + for (size_t i=0; i to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + -- cgit v1.2.3