summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2024-05-03 11:01:47 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2024-05-03 11:01:47 -0400
commit675cf7f7558ebaef15f398d90cc3d1d91457b219 (patch)
treeb761abd4d9f258a475036b3a3eeefa6d9b09d7a4
parente198d64ca87f6fc05e8d62efdf720f7b2e8a8004 (diff)
downloaddynamic-extension-675cf7f7558ebaef15f398d90cc3d1d91457b219.tar.gz
FST benchmarks
-rw-r--r--CMakeLists.txt30
-rw-r--r--benchmarks/include/file_util.h45
-rw-r--r--benchmarks/vldb/fst_bench.cpp100
-rw-r--r--benchmarks/vldb/fst_bsm_bench.cpp100
4 files changed, 265 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cdc114f..b4e801c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -147,10 +147,10 @@ if (tests)
target_link_options(fst_tests PUBLIC -mcx16)
target_include_directories(fst_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia)
- add_executable(louds_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/louds_tests.cpp)
- target_link_libraries(louds_tests PUBLIC gsl check subunit pthread atomic)
- target_link_options(louds_tests PUBLIC -mcx16)
- target_include_directories(louds_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia)
+ #add_executable(louds_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/louds_tests.cpp)
+ #target_link_libraries(louds_tests PUBLIC gsl check subunit pthread atomic)
+ #target_link_options(louds_tests PUBLIC -mcx16)
+ #target_include_directories(louds_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia)
endif()
if (vldb_bench)
@@ -180,6 +180,18 @@ if (vldb_bench)
target_link_options(vptree_bsm_bench PUBLIC -mcx16)
target_compile_options(vptree_bsm_bench PUBLIC -fopenmp)
+ add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp)
+ target_link_libraries(fst_bench PUBLIC gsl pthread atomic)
+ target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(fst_bench PUBLIC -mcx16)
+ target_compile_options(fst_bench PUBLIC -fopenmp)
+
+ add_executable(fst_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bsm_bench.cpp)
+ target_link_libraries(fst_bsm_bench PUBLIC gsl pthread atomic)
+ target_include_directories(fst_bsm_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(fst_bsm_bench PUBLIC -mcx16)
+ target_compile_options(fst_bsm_bench PUBLIC -fopenmp)
+
add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bench.cpp)
target_link_libraries(ts_bench PUBLIC gsl pthread atomic)
target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
@@ -198,11 +210,11 @@ if (vldb_bench)
target_link_options(ts_bsm_bench PUBLIC -mcx16)
target_compile_options(ts_bsm_bench PUBLIC -fopenmp)
- add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp)
- target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic)
- target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
- target_link_options(ts_mdsp_bench PUBLIC -mcx16)
- target_compile_options(ts_mdsp_bench PUBLIC -fopenmp)
+ #add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp)
+ #target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic)
+ #target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ #target_link_options(ts_mdsp_bench PUBLIC -mcx16)
+ #target_compile_options(ts_mdsp_bench PUBLIC -fopenmp)
add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/pgm_bench.cpp)
target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp)
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
index ebcf17e..586b44f 100644
--- a/benchmarks/include/file_util.h
+++ b/benchmarks/include/file_util.h
@@ -1,3 +1,5 @@
+#pragma once
+
#include <cstdlib>
#include <cstdio>
#include <iostream>
@@ -7,8 +9,10 @@
#include <gsl/gsl_rng.h>
#include <cstring>
#include <vector>
+#include <memory>
+
+#include "psu-util/progress.h"
-#pragma once
template <typename QP>
static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
@@ -36,6 +40,20 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity
}
template <typename QP>
+static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) {
+ std::vector<QP> queries;
+
+ for (size_t i=0; i<cnt; i++) {
+ auto idx = gsl_rng_uniform_int(rng, strings.size());
+ QP q;
+ q.search_key = strings[idx].get();
+ queries.push_back(q);
+ }
+
+ return queries;
+}
+
+template <typename QP>
static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
std::vector<QP> queries;
@@ -173,3 +191,28 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) {
return records;
}
+
+
+static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
+
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ if (!file.is_open()) {
+ fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str());
+ exit(EXIT_FAILURE);
+ }
+
+ std::vector<std::unique_ptr<char[]>> strings;
+ strings.reserve(n);
+
+ size_t i=0;
+ std::string line;
+ while (i < n && std::getline(file, line, '\n')) {
+ strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str())));
+ i++;
+ psudb::progress_update((double) i / (double) n, "Reading file:");
+ }
+
+ return strings;
+}
diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp
new file mode 100644
index 0000000..276a922
--- /dev/null
+++ b/benchmarks/vldb/fst_bench.cpp
@@ -0,0 +1,100 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/FSTrie.h"
+#include "query/pointlookup.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<const char *, uint64_t> Rec;
+typedef de::FSTrie<Rec> Shard;
+typedef de::pl::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::pl::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+
+ auto extension = new Ext(12000, 12001, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto strings = read_string_file(d_fname, n);
+ auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng);
+
+ std::vector<Rec> data;
+ for (size_t i=0; i<strings.size(); i++) {
+ data.push_back({strings[i].get(), i, strlen(strings[i].get())});
+ }
+
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp
new file mode 100644
index 0000000..15a441a
--- /dev/null
+++ b/benchmarks/vldb/fst_bsm_bench.cpp
@@ -0,0 +1,100 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/FSTrie.h"
+#include "query/pointlookup.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<const char *, uint64_t> Rec;
+typedef de::FSTrie<Rec> Shard;
+typedef de::pl::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::pl::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 3) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+
+ auto extension = new Ext(1, 12001, 2, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto strings = read_string_file(d_fname, n);
+ auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng);
+
+ std::vector<Rec> data;
+ for (size_t i=0; i<strings.size(); i++) {
+ data.push_back({strings[i].get(), i, strlen(strings[i].get())});
+ }
+
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto shard = extension->create_static_structure();
+
+ TIMER_START();
+ run_static_queries<Shard, QP, Q>(shard, queries);
+ TIMER_STOP();
+
+ auto static_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage();
+ auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+