diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-03 11:01:47 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-05-03 11:01:47 -0400 |
| commit | 675cf7f7558ebaef15f398d90cc3d1d91457b219 (patch) | |
| tree | b761abd4d9f258a475036b3a3eeefa6d9b09d7a4 /benchmarks | |
| parent | e198d64ca87f6fc05e8d62efdf720f7b2e8a8004 (diff) | |
| download | dynamic-extension-675cf7f7558ebaef15f398d90cc3d1d91457b219.tar.gz | |
FST benchmarks
Diffstat (limited to 'benchmarks')
| -rw-r--r-- | benchmarks/include/file_util.h | 45 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bench.cpp | 100 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bsm_bench.cpp | 100 |
3 files changed, 244 insertions, 1 deletions
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include <cstdlib> #include <cstdio> #include <iostream> @@ -7,8 +9,10 @@ #include <gsl/gsl_rng.h> #include <cstring> #include <vector> +#include <memory> + +#include "psu-util/progress.h" -#pragma once template <typename QP> static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) { @@ -36,6 +40,20 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity } template <typename QP> +static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) { + std::vector<QP> queries; + + for (size_t i=0; i<cnt; i++) { + auto idx = gsl_rng_uniform_int(rng, strings.size()); + QP q; + q.search_key = strings[idx].get(); + queries.push_back(q); + } + + return queries; +} + +template <typename QP> static std::vector<QP> read_range_queries(std::string &fname, double selectivity) { std::vector<QP> queries; @@ -173,3 +191,28 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector<std::unique_ptr<char[]>> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp new file mode 100644 index 0000000..276a922 --- /dev/null +++ b/benchmarks/vldb/fst_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp new file mode 100644 index 0000000..15a441a --- /dev/null +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + |