diff options
| -rw-r--r-- | CMakeLists.txt | 30 | ||||
| -rw-r--r-- | benchmarks/include/file_util.h | 45 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bench.cpp | 100 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bsm_bench.cpp | 100 |
4 files changed, 265 insertions, 10 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index cdc114f..b4e801c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,10 +147,10 @@ if (tests) target_link_options(fst_tests PUBLIC -mcx16) target_include_directories(fst_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia) - add_executable(louds_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/louds_tests.cpp) - target_link_libraries(louds_tests PUBLIC gsl check subunit pthread atomic) - target_link_options(louds_tests PUBLIC -mcx16) - target_include_directories(louds_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia) + #add_executable(louds_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/louds_tests.cpp) + #target_link_libraries(louds_tests PUBLIC gsl check subunit pthread atomic) + #target_link_options(louds_tests PUBLIC -mcx16) + #target_include_directories(louds_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia) endif() if (vldb_bench) @@ -180,6 +180,18 @@ if (vldb_bench) target_link_options(vptree_bsm_bench PUBLIC -mcx16) target_compile_options(vptree_bsm_bench PUBLIC -fopenmp) + add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp) + target_link_libraries(fst_bench PUBLIC gsl pthread atomic) + target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(fst_bench PUBLIC -mcx16) + target_compile_options(fst_bench PUBLIC -fopenmp) + + add_executable(fst_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bsm_bench.cpp) + target_link_libraries(fst_bsm_bench PUBLIC gsl pthread atomic) + target_include_directories(fst_bsm_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + target_link_options(fst_bsm_bench PUBLIC -mcx16) + target_compile_options(fst_bsm_bench PUBLIC -fopenmp) + add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bench.cpp) target_link_libraries(ts_bench PUBLIC gsl pthread atomic) target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) @@ -198,11 +210,11 @@ if (vldb_bench) target_link_options(ts_bsm_bench PUBLIC -mcx16) target_compile_options(ts_bsm_bench PUBLIC -fopenmp) - add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp) - target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic) - target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) - target_link_options(ts_mdsp_bench PUBLIC -mcx16) - target_compile_options(ts_mdsp_bench PUBLIC -fopenmp) + #add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp) + #target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic) + #target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) + #target_link_options(ts_mdsp_bench PUBLIC -mcx16) + #target_compile_options(ts_mdsp_bench PUBLIC -fopenmp) add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/pgm_bench.cpp) target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp) diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include <cstdlib> #include <cstdio> #include <iostream> @@ -7,8 +9,10 @@ #include <gsl/gsl_rng.h> #include <cstring> #include <vector> +#include <memory> + +#include "psu-util/progress.h" -#pragma once template <typename QP> static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) { @@ -36,6 +40,20 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity } template <typename QP> +static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) { + std::vector<QP> queries; + + for (size_t i=0; i<cnt; i++) { + auto idx = gsl_rng_uniform_int(rng, strings.size()); + QP q; + q.search_key = strings[idx].get(); + queries.push_back(q); + } + + return queries; +} + +template <typename QP> static std::vector<QP> read_range_queries(std::string &fname, double selectivity) { std::vector<QP> queries; @@ -173,3 +191,28 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector<std::unique_ptr<char[]>> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp new file mode 100644 index 0000000..276a922 --- /dev/null +++ b/benchmarks/vldb/fst_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp new file mode 100644 index 0000000..15a441a --- /dev/null +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + |