diff options
82 files changed, 4414 insertions, 4964 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a03c351..b185b0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,10 +1,10 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.22) -#set(CMAKE_C_COMPILER clang) -#set(CMAKE_CXX_COMPILER clang++) +set(CMAKE_C_COMPILER gcc) +set(CMAKE_CXX_COMPILER g++) set(CMAKE_CXX_STANDARD 20) -#set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_CXX_STANDARD_REQUIRED True) set(namespace "de") project("Practical Dynamic Extension" VERSION 0.1.0) @@ -21,6 +21,10 @@ set(CMAKE_CXX_FLAGS=-latomic -mcx16) add_compile_options(-Iinclude -Iexternal/PLEX/include -Iexternal -mcx16 -march=native) # -fconcepts-diagnostics-depth=3) +find_package(OpenMP REQUIRED) +add_compile_options(${OpenMP_CXX_FLAGS}) +link_libraries(OpenMP::OpenMP_CXX) + if (BSD) add_link_options(-L/usr/local/lib) add_compile_options(-I/usr/local/include) @@ -28,7 +32,7 @@ if (BSD) endif() if (debug) - add_compile_options(-g -O0) + add_compile_options(-g -O0 -Wall) if (!BSD) add_compile_options(-fsanitize=address) add_link_options(-fsanitize=address) @@ -36,7 +40,7 @@ if (debug) add_link_options(-fsanitize=undefined) endif() else() - add_compile_options(-O3 -g) + add_compile_options(-O3 -g -Wall) endif() # Test build instructions @@ -44,11 +48,6 @@ if (tests) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/tests") file(MAKE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/tests/data") - add_executable(augbtree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/augbtree_tests.cpp) - target_link_libraries(augbtree_tests PUBLIC gsl check subunit pthread atomic) - target_link_options(augbtree_tests PUBLIC -mcx16) - target_include_directories(augbtree_tests PRIVATE include external/psudb-common/cpp/include external/ctpl) - add_executable(internal_level_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/internal_level_tests.cpp) target_link_libraries(internal_level_tests PUBLIC gsl check subunit pthread atomic) target_link_options(internal_level_tests PUBLIC -mcx16) @@ -64,6 +63,11 @@ if (tests) target_link_options(rangequery_tests PUBLIC -mcx16) target_include_directories(rangequery_tests PRIVATE include external/psudb-common/cpp/include) + add_executable(irs_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/irs_tests.cpp) + target_link_libraries(irs_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(irs_tests PUBLIC -mcx16) + target_include_directories(irs_tests PRIVATE include external/psudb-common/cpp/include) + add_executable(rangecount_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/rangecount_tests.cpp) target_link_libraries(rangecount_tests PUBLIC gsl check subunit pthread atomic) @@ -101,6 +105,11 @@ if (tests) target_link_options(de_bsm_tomb PUBLIC -mcx16) target_include_directories(de_bsm_tomb PRIVATE include external/PLEX/include external/psudb-common/cpp/include external) + add_executable(de_bsm_tag ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_bsm_tag.cpp) + target_link_libraries(de_bsm_tag PUBLIC gsl check subunit pthread atomic) + target_link_options(de_bsm_tag PUBLIC -mcx16) + target_include_directories(de_bsm_tag PRIVATE include external/PLEX/include external/psudb-common/cpp/include external) + add_executable(de_level_concurrent ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_level_concurrent.cpp) target_link_libraries(de_level_concurrent PUBLIC gsl check subunit pthread atomic) target_link_options(de_level_concurrent PUBLIC -mcx16) @@ -116,31 +125,30 @@ if (tests) target_link_options(memisam_tests PUBLIC -mcx16) target_include_directories(memisam_tests PRIVATE include external/psudb-common/cpp/include) - add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp) - target_link_libraries(triespline_tests PUBLIC gsl check subunit pthread atomic) - target_link_options(triespline_tests PUBLIC -mcx16) - target_include_directories(triespline_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include) - add_executable(alias_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/alias_tests.cpp) target_link_libraries(alias_tests PUBLIC gsl check subunit pthread atomic) target_link_options(alias_tests PUBLIC -mcx16) target_include_directories(alias_tests PRIVATE include external/psudb-common/cpp/include) - # OpenBSD doesn't have OpenMP support, so don't build the PGM code on that - # platform. + add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp) + target_link_libraries(triespline_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(triespline_tests PUBLIC -mcx16) + target_include_directories(triespline_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include) + add_executable(pgm_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/pgm_tests.cpp) target_link_libraries(pgm_tests PUBLIC gsl check subunit pthread gomp atomic) target_include_directories(pgm_tests PRIVATE include external/PGM-index/include external/psudb-common/cpp/include) target_link_options(pgm_tests PUBLIC -mcx16) - target_compile_options(pgm_tests PUBLIC -fopenmp) + target_compile_options(pgm_tests PUBLIC) - # Triespline code doesn't build under OpenBSD either due to ambiguous function call; + # Triespline code doesn't build under OpenBSD due to ambiguous function call; # this is likely a difference between gcc and clang, rather than an OS thing - add_executable(triespline_debug ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_debug.cpp) - target_link_libraries(triespline_debug PUBLIC gsl check subunit pthread atomic) - target_link_options(triespline_debug PUBLIC -mcx16) - target_include_directories(triespline_debug PRIVATE include external/psudb-common/cpp/include external/PLEX/include) - + if (NOT BSD) + add_executable(triespline_debug ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_debug.cpp) + target_link_libraries(triespline_debug PUBLIC gsl check subunit pthread atomic) + target_link_options(triespline_debug PUBLIC -mcx16) + target_include_directories(triespline_debug PRIVATE include external/psudb-common/cpp/include external/PLEX/include) + endif() add_executable(fst_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/fst_tests.cpp) target_link_libraries(fst_tests PUBLIC gsl check subunit pthread atomic) @@ -155,123 +163,123 @@ if (vldb_bench) target_link_libraries(irs_bench PUBLIC gsl pthread atomic) target_include_directories(irs_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(irs_bench PUBLIC -mcx16) - target_compile_options(irs_bench PUBLIC -fopenmp) + target_compile_options(irs_bench PUBLIC) add_executable(vptree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench.cpp) target_link_libraries(vptree_bench PUBLIC gsl pthread atomic) target_include_directories(vptree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(vptree_bench PUBLIC -mcx16) - target_compile_options(vptree_bench PUBLIC -fopenmp) + target_compile_options(vptree_bench PUBLIC) add_executable(vptree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench_alt.cpp) target_link_libraries(vptree_bench_alt PUBLIC gsl pthread atomic) target_include_directories(vptree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(vptree_bench_alt PUBLIC -mcx16) - target_compile_options(vptree_bench_alt PUBLIC -fopenmp) + target_compile_options(vptree_bench_alt PUBLIC) add_executable(vptree_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_parmsweep.cpp) target_link_libraries(vptree_parmsweep PUBLIC gsl pthread atomic) target_include_directories(vptree_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(vptree_parmsweep PUBLIC -mcx16) - target_compile_options(vptree_parmsweep PUBLIC -fopenmp) + target_compile_options(vptree_parmsweep PUBLIC) add_executable(vptree_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench.cpp) target_link_libraries(vptree_bsm_bench PUBLIC gsl pthread atomic) target_include_directories(vptree_bsm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(vptree_bsm_bench PUBLIC -mcx16) - target_compile_options(vptree_bsm_bench PUBLIC -fopenmp) + target_compile_options(vptree_bsm_bench PUBLIC) add_executable(vptree_bsm_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench_alt.cpp) target_link_libraries(vptree_bsm_bench_alt PUBLIC gsl pthread atomic) target_include_directories(vptree_bsm_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(vptree_bsm_bench_alt PUBLIC -mcx16) - target_compile_options(vptree_bsm_bench_alt PUBLIC -fopenmp) + target_compile_options(vptree_bsm_bench_alt PUBLIC) add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp) target_link_libraries(fst_bench PUBLIC gsl pthread atomic) target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(fst_bench PUBLIC -mcx16) - target_compile_options(fst_bench PUBLIC -fopenmp) + target_compile_options(fst_bench PUBLIC) add_executable(fst_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bsm_bench.cpp) target_link_libraries(fst_bsm_bench PUBLIC gsl pthread atomic) target_include_directories(fst_bsm_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(fst_bsm_bench PUBLIC -mcx16) - target_compile_options(fst_bsm_bench PUBLIC -fopenmp) + target_compile_options(fst_bsm_bench PUBLIC) add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bench.cpp) target_link_libraries(ts_bench PUBLIC gsl pthread atomic) target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(ts_bench PUBLIC -mcx16) - target_compile_options(ts_bench PUBLIC -fopenmp) + target_compile_options(ts_bench PUBLIC) add_executable(ts_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_parmsweep.cpp) target_link_libraries(ts_parmsweep PUBLIC gsl pthread atomic) target_include_directories(ts_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(ts_parmsweep PUBLIC -mcx16) - target_compile_options(ts_parmsweep PUBLIC -fopenmp) + target_compile_options(ts_parmsweep PUBLIC) add_executable(ts_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bsm_bench.cpp) target_link_libraries(ts_bsm_bench PUBLIC gsl pthread atomic) target_include_directories(ts_bsm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(ts_bsm_bench PUBLIC -mcx16) - target_compile_options(ts_bsm_bench PUBLIC -fopenmp) + target_compile_options(ts_bsm_bench PUBLIC) #add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp) #target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic) #target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) #target_link_options(ts_mdsp_bench PUBLIC -mcx16) - #target_compile_options(ts_mdsp_bench PUBLIC -fopenmp) + #target_compile_options(ts_mdsp_bench PUBLIC) add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/pgm_bench.cpp) target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp) target_include_directories(pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(pgm_bench PUBLIC -mcx16) - target_compile_options(pgm_bench PUBLIC -fopenmp) + target_compile_options(pgm_bench PUBLIC) add_executable(dynamic_pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/dynamic_pgm_bench.cpp) target_link_libraries(dynamic_pgm_bench PUBLIC gsl pthread atomic gomp) target_include_directories(dynamic_pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(dynamic_pgm_bench PUBLIC -mcx16) - target_compile_options(dynamic_pgm_bench PUBLIC -fopenmp) + target_compile_options(dynamic_pgm_bench PUBLIC) add_executable(btree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/btree_bench.cpp) target_link_libraries(btree_bench PUBLIC gsl pthread atomic gomp) target_include_directories(btree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(btree_bench PUBLIC -mcx16) - target_compile_options(btree_bench PUBLIC -fopenmp) + target_compile_options(btree_bench PUBLIC) add_executable(alex_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/alex_bench.cpp) target_link_libraries(alex_bench PUBLIC gsl ) target_include_directories(alex_bench PRIVATE external/psudb-common/cpp/include external/alex/src/core/ benchmarks/include) - target_compile_options(alex_bench PUBLIC -fopenmp) + target_compile_options(alex_bench PUBLIC) set_property(TARGET alex_bench PROPERTY CXX_STANDARD 14) add_executable(mtree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench_alt.cpp) target_link_libraries(mtree_bench_alt PUBLIC gsl pthread atomic gomp) target_include_directories(mtree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(mtree_bench_alt PUBLIC -mcx16) - target_compile_options(mtree_bench_alt PUBLIC -fopenmp) + target_compile_options(mtree_bench_alt PUBLIC) add_executable(bigann_sample ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/bigann_sample.cpp) target_link_libraries(bigann_sample PUBLIC gsl pthread atomic gomp) target_include_directories(bigann_sample PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(bigann_sample PUBLIC -mcx16) - target_compile_options(bigann_sample PUBLIC -fopenmp) + target_compile_options(bigann_sample PUBLIC) add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp) target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp) target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(mtree_bench PUBLIC -mcx16) - target_compile_options(mtree_bench PUBLIC -fopenmp) + target_compile_options(mtree_bench PUBLIC) add_executable(thread_scaling_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/thread_scaling_bench.cpp) target_link_libraries(thread_scaling_bench PUBLIC gsl pthread atomic) target_include_directories(thread_scaling_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(thread_scaling_bench PUBLIC -mcx16) - target_compile_options(thread_scaling_bench PUBLIC -fopenmp) + target_compile_options(thread_scaling_bench PUBLIC) add_executable(btree_thread_scaling_bench @@ -279,7 +287,7 @@ if (vldb_bench) target_link_libraries(btree_thread_scaling_bench PUBLIC gsl pthread atomic) target_include_directories(btree_thread_scaling_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include) target_link_options(btree_thread_scaling_bench PUBLIC -mcx16) - target_compile_options(btree_thread_scaling_bench PUBLIC -fopenmp) + target_compile_options(btree_thread_scaling_bench PUBLIC) endif() diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index 41eb18c..1a40a78 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -269,7 +269,7 @@ static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) { return records; } -static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { +[[maybe_unused]] static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { std::fstream file; file.open(fname, std::ios::in); diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h index b805c08..797b0c5 100644 --- a/benchmarks/include/standard_benchmarks.h +++ b/benchmarks/include/standard_benchmarks.h @@ -18,25 +18,15 @@ #include "psu-util/progress.h" #include "benchmark_types.h" #include "psu-util/bentley-saxe.h" +#include "shard/ISAMTree.h" static size_t g_deleted_records = 0; -static double delete_proportion = 0.05; +static double delete_proportion = 0.5; static volatile size_t total = 0; -template<typename DE, typename QP, typename R> -static void run_queries(DE *extension, DE *ghost, std::vector<QP> &queries) { - for (size_t i=0; i<queries.size(); i++) { - std::vector<R> res = extension->query(&queries[i]); - std::vector<R> negres = ghost->query(&queries[i]); - auto result = res[0].first - negres[0].first; - total = result; - } -} - - -template<typename DE, typename QP, bool BSM=false> -static void run_queries(DE *extension, std::vector<QP> &queries) { +template<typename DE, typename Q, bool BSM=false> +static void run_queries(DE *extension, std::vector<typename Q::Parameters> &queries) { for (size_t i=0; i<queries.size(); i++) { if constexpr (std::is_same_v<MTree, DE>) { std::vector<Word2VecRec> result; @@ -72,7 +62,8 @@ static void run_queries(DE *extension, std::vector<QP> &queries) { ++ptr; } } else { - auto res = extension->query(&queries[i]); + auto q = queries[i]; + auto res = extension->query(std::move(q)); if constexpr (!BSM) { auto result = res.get(); #ifdef BENCH_PRINT_RESULTS @@ -100,8 +91,8 @@ static void run_queries(DE *extension, std::vector<QP> &queries) { } } -template <typename R> -static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) { +template <typename R, typename Q> +static void run_btree_queries(BenchBTree *btree, std::vector<typename Q::Parameters> &queries) { std::vector<int64_t> sample_set; sample_set.reserve(queries[0].sample_size); @@ -111,18 +102,16 @@ static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> } -template<typename S, typename QP, typename Q> -static void run_static_queries(S *shard, std::vector<QP> &queries) { +template<typename S, typename Q> +static void run_static_queries(S *shard, std::vector<typename Q::Parameters> &queries) { for (size_t i=0; i<queries.size(); i++) { auto q = &queries[i]; - auto state = Q::get_query_state(shard, q); - - std::vector<void*> shards = {shard}; - std::vector<void*> states = {state}; + std::vector<S *> shards = {shard}; + std::vector<typename Q::LocalQuery*> local_queries = {Q::local_preproc(shard, q)}; - Q::process_query_states(q, states, nullptr); - auto res = Q::query(shard, state, q); + Q::distribute_query(q, local_queries, nullptr); + auto res = Q::local_query(shard, local_queries[0]); #ifdef BENCH_PRINT_RESULTS fprintf(stdout, "\n\n"); @@ -136,55 +125,12 @@ static void run_static_queries(S *shard, std::vector<QP> &queries) { } } - -/* - * Insert records into a standard Bentley-Saxe extension. Deletes are not - * supported. - */ -template<typename DS, typename R, bool MDSP=false> -static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, - size_t start, size_t stop, std::vector<R> &records) { - - psudb::progress_update(0, "Insert Progress"); - for (size_t i=start; i<stop; i++) { - extension->insert(records[i]); - } - - psudb::progress_update(1, "Insert Progress"); -} - - -template<typename DS, typename R, bool MDSP=false> -static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, - psudb::bsm::BentleySaxe<R, DS, MDSP> *ghost, - size_t start, size_t stop, std::vector<R> &records, - std::vector<size_t> &to_delete, size_t &delete_idx, - gsl_rng *rng) { - - psudb::progress_update(0, "Insert Progress"); - size_t reccnt = 0; - for (size_t i=start; i<stop; i++) { - - extension->insert(records[i]); - - if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - ghost->insert(records[to_delete[delete_idx]]); - delete_idx++; - g_deleted_records++; - } - - } - -} - - template<typename DE, typename R> static void insert_records(DE *structure, size_t start, size_t stop, std::vector<R> &records, std::vector<size_t> &to_delete, size_t &delete_idx, bool delete_records, gsl_rng *rng) { psudb::progress_update(0, "Insert Progress"); - size_t reccnt = 0; for (size_t i=start; i<stop; i++) { if constexpr (std::is_same_v<BenchBTree, DE>) { @@ -302,8 +248,8 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn return continue_benchmark; } -template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true> -static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) { +template <typename DE, typename Q, bool PROGRESS=true> +static bool query_latency_bench(DE &de_index, std::vector<typename Q::Parameters> queries, size_t trial_cnt=1) { char progbuf[25]; if constexpr (PROGRESS) { sprintf(progbuf, "querying:"); @@ -339,8 +285,8 @@ static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t tr } -template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true> -static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) { +template <typename Shard, typename Q, bool PROGRESS=true> +static bool static_latency_bench(Shard *shard, std::vector<typename Q::Parameters> queries, size_t trial_cnt=100) { char progbuf[25]; if constexpr (PROGRESS) { sprintf(progbuf, "querying:"); @@ -354,15 +300,15 @@ static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t t psudb::progress_update((double) (i) / (double) trial_cnt, progbuf); } - std::vector<void *> states(1); + std::vector<typename Q::LocalQuery*> local_queries(1); auto start = std::chrono::high_resolution_clock::now(); for (size_t j=0; j<queries.size(); j++) { - states[0] = Q::get_query_state(shard, &queries[j]); - Q::process_query_states(&queries[j], states, nullptr); - auto res = Q::query(shard, states[0], &queries[j]); + local_queries[0] = Q::local_preproc(shard, &queries[j]); + Q::distribute_query(&queries[j], local_queries, nullptr); + auto res = Q::local_query(shard, local_queries[0]); total_results += res.size(); - Q::delete_query_state(states[0]); + delete local_queries[0]; } auto stop = std::chrono::high_resolution_clock::now(); diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp index ba687f3..636f576 100644 --- a/benchmarks/vldb/alex_bench.cpp +++ b/benchmarks/vldb/alex_bench.cpp @@ -33,7 +33,6 @@ static void insert_records(Alex *structure, size_t start, size_t stop, size_t &delete_idx, bool delete_records, gsl_rng *rng) { psudb::progress_update(0, "Insert Progress"); - size_t reccnt = 0; for (size_t i=start; i<stop; i++) { structure->insert(records[i].key, records[i].value); diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp index fa72831..dc5142a 100644 --- a/benchmarks/vldb/btree_bench.cpp +++ b/benchmarks/vldb/btree_bench.cpp @@ -19,8 +19,8 @@ typedef btree_record<int64_t, int64_t> Rec; typedef de::ISAMTree<Rec> Shard; -typedef de::irs::Query<Rec, Shard> Q; -typedef de::irs::Parms<Rec> QP; +typedef de::irs::Query<Shard> Q; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -71,7 +71,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_btree_queries<Rec>(&btree, queries); + run_btree_queries<Rec, Q>(&btree, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/btree_thread_scaling_bench.cpp b/benchmarks/vldb/btree_thread_scaling_bench.cpp index 557e966..d33a1f8 100644 --- a/benchmarks/vldb/btree_thread_scaling_bench.cpp +++ b/benchmarks/vldb/btree_thread_scaling_bench.cpp @@ -7,6 +7,7 @@ #include <thread> #include "query/irs.h" +#include "shard/ISAMTree.h" #include "benchmark_types.h" #include "file_util.h" #include <mutex> @@ -17,7 +18,10 @@ typedef btree_record<int64_t, int64_t> Rec; -typedef de::irs::Parms<Rec> QP; + +typedef de::ISAMTree<Rec> Shard; +typedef de::irs::Query<Shard> Q; +typedef Q::Parameters QP; std::atomic<bool> inserts_done = false; @@ -47,7 +51,6 @@ void query_thread(BenchBTree *tree, std::vector<QP> *queries) { } void insert_thread(BenchBTree *tree, size_t start, std::vector<Rec> *records) { - size_t reccnt = 0; for (size_t i=start; i<records->size(); i++) { btree_record<int64_t, int64_t> r; r.key = (*records)[i].key; diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp index 15b130f..9206e40 100644 --- a/benchmarks/vldb/dynamic_pgm_bench.cpp +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -14,9 +14,11 @@ #include "psu-util/timer.h" - typedef de::Record<uint64_t, uint64_t> Rec; -typedef de::rc::Parms<Rec> QP; + +typedef de::ISAMTree<Rec> Shard; +typedef de::rc::Query<Shard> Q; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -62,7 +64,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<PGM, QP>(&pgm, queries); + run_queries<PGM, Q>(&pgm, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp index 276a922..e4b5bf6 100644 --- a/benchmarks/vldb/fst_bench.cpp +++ b/benchmarks/vldb/fst_bench.cpp @@ -21,9 +21,9 @@ typedef de::Record<const char *, uint64_t> Rec; typedef de::FSTrie<Rec> Shard; -typedef de::pl::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::pl::Parms<Rec> QP; +typedef de::pl::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile\n", progname); @@ -75,7 +75,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -83,7 +83,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp index 15a441a..b0be115 100644 --- a/benchmarks/vldb/fst_bsm_bench.cpp +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -21,9 +21,9 @@ typedef de::Record<const char *, uint64_t> Rec; typedef de::FSTrie<Rec> Shard; -typedef de::pl::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::pl::Parms<Rec> QP; +typedef de::pl::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile\n", progname); @@ -75,7 +75,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -83,7 +83,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp index e062e80..a772326 100644 --- a/benchmarks/vldb/irs_bench.cpp +++ b/benchmarks/vldb/irs_bench.cpp @@ -18,9 +18,9 @@ typedef de::Record<uint64_t, uint64_t> Rec; typedef de::ISAMTree<Rec> Shard; -typedef de::irs::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::irs::Parms<Rec> QP; +typedef de::irs::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -72,7 +72,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -80,7 +80,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp index cc2f41f..ed107b5 100644 --- a/benchmarks/vldb/mtree_bench.cpp +++ b/benchmarks/vldb/mtree_bench.cpp @@ -5,6 +5,7 @@ #define ENABLE_TIMER #include "query/knn.h" +#include "shard/VPTree.h" #include "file_util.h" #include "standard_benchmarks.h" @@ -14,7 +15,9 @@ typedef Word2VecRec Rec; -typedef de::knn::Parms<Rec> QP; +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Shard> Q; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -66,7 +69,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<MTree, QP>(mtree, queries); + run_queries<MTree, Q>(mtree, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp index 50c6117..c5ab283 100644 --- a/benchmarks/vldb/mtree_bench_alt.cpp +++ b/benchmarks/vldb/mtree_bench_alt.cpp @@ -5,6 +5,7 @@ #define ENABLE_TIMER #include "query/knn.h" +#include "shard/VPTree.h" #include "file_util.h" #include "standard_benchmarks.h" @@ -14,7 +15,9 @@ typedef ANNRec Rec; -typedef de::knn::Parms<Rec> QP; +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Shard> Q; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -66,7 +69,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<MTree_alt, QP>(mtree, queries); + run_queries<MTree_alt, Q>(mtree, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp index cec95df..3b4340b 100644 --- a/benchmarks/vldb/pgm_bench.cpp +++ b/benchmarks/vldb/pgm_bench.cpp @@ -20,9 +20,9 @@ typedef de::Record<uint64_t, uint64_t> Rec; typedef de::PGM<Rec> Shard; -typedef de::rc::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::rc::Parms<Rec> QP; +typedef de::rc::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -69,7 +69,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -77,7 +77,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/thread_scaling_bench.cpp b/benchmarks/vldb/thread_scaling_bench.cpp index b679e92..3b9311b 100644 --- a/benchmarks/vldb/thread_scaling_bench.cpp +++ b/benchmarks/vldb/thread_scaling_bench.cpp @@ -20,9 +20,9 @@ typedef de::Record<int64_t, int64_t> Rec; typedef de::ISAMTree<Rec> ISAM; -typedef de::irs::Query<Rec, ISAM> Q; -typedef de::DynamicExtension<Rec, ISAM, Q> Ext; -typedef de::irs::Parms<Rec> QP; +typedef de::irs::Query<ISAM> Q; +typedef de::DynamicExtension<ISAM, Q> Ext; +typedef Q::Parameters QP; std::atomic<bool> inserts_done = false; @@ -39,7 +39,7 @@ void query_thread(Ext *extension, std::vector<QP> *queries) { q.rng = rng; q.sample_size = 1000; - auto res = extension->query(&q); + auto res = extension->query(std::move(q)); auto r = res.get(); total += r.size(); nanosleep(&delay, nullptr); diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp index 81a430a..1bc75b6 100644 --- a/benchmarks/vldb/ts_bench.cpp +++ b/benchmarks/vldb/ts_bench.cpp @@ -21,9 +21,9 @@ typedef de::Record<uint64_t, uint64_t> Rec; typedef de::TrieSpline<Rec> Shard; -typedef de::rc::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::rc::Parms<Rec> QP; +typedef de::rc::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -70,7 +70,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -78,7 +78,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp index 4511350..5bcfb5d 100644 --- a/benchmarks/vldb/ts_bsm_bench.cpp +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -21,9 +21,9 @@ typedef de::Record<uint64_t, uint64_t> Rec; typedef de::TrieSpline<Rec> Shard; -typedef de::rc::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::rc::Parms<Rec> QP; +typedef de::rc::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -70,7 +70,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -78,7 +78,7 @@ int main(int argc, char **argv) { auto shard = extension->create_static_structure(); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp deleted file mode 100644 index cc0cd99..0000000 --- a/benchmarks/vldb/ts_mdsp_bench.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * - */ - -#define ENABLE_TIMER - -#include <thread> - -#include "triespline_bsm.h" -#include "psu-util/bentley-saxe.h" -#include "framework/interface/Record.h" -#include "file_util.h" -#include "query/rangecount.h" -#include "psu-util/timer.h" -#include "standard_benchmarks.h" - -typedef std::pair<uint64_t, uint64_t> Rec; -typedef de::Record<uint64_t, uint64_t> FRec; - -typedef BSMTrieSpline<uint64_t, uint64_t> Shard; -typedef de::rc::Parms<FRec> QP; -typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext; - -void usage(char *progname) { - fprintf(stderr, "%s reccnt datafile queryfile\n", progname); -} - -int main(int argc, char **argv) { - - if (argc < 4) { - usage(argv[0]); - exit(EXIT_FAILURE); - } - - size_t n = atol(argv[1]); - std::string d_fname = std::string(argv[2]); - std::string q_fname = std::string(argv[3]); - - auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>(); - gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n); - auto queries = read_range_queries<QP>(q_fname, .0001); - - /* warmup structure w/ 10% of records */ - size_t warmup = .1 * n; - insert_records<Shard, Rec, true>(extension, 0, warmup, data); - - TIMER_INIT(); - - TIMER_START(); - insert_records<Shard, Rec, true>(extension, warmup, data.size(), data); - TIMER_STOP(); - - auto insert_latency = TIMER_RESULT(); - size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); - - TIMER_START(); - run_queries<Ext, QP, true>(extension, queries); - TIMER_STOP(); - - auto query_latency = TIMER_RESULT() / queries.size(); - - fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); - - gsl_rng_free(rng); - delete extension; - fflush(stderr); -} - diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp index 2c9412a..a9203ab 100644 --- a/benchmarks/vldb/ts_parmsweep.cpp +++ b/benchmarks/vldb/ts_parmsweep.cpp @@ -18,10 +18,10 @@ typedef de::Record<uint64_t, uint64_t> Rec; typedef de::TrieSpline<Rec> Shard; -typedef de::rc::Query<Rec, Shard, true> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2; -typedef de::rc::Parms<Rec> QP; +typedef de::rc::Query<Shard, true> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -74,7 +74,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -106,7 +106,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext2, QP>(extension, queries); + run_queries<Ext2, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp index 0b98a52..417e3af 100644 --- a/benchmarks/vldb/vptree_bench.cpp +++ b/benchmarks/vldb/vptree_bench.cpp @@ -19,9 +19,9 @@ typedef Word2VecRec Rec; typedef de::VPTree<Rec, 100, true> Shard; -typedef de::knn::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::knn::Parms<Rec> QP; +typedef de::knn::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -75,7 +75,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -84,7 +84,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Running Static query tests\n\n"); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp index b09ee7d..5279f68 100644 --- a/benchmarks/vldb/vptree_bench_alt.cpp +++ b/benchmarks/vldb/vptree_bench_alt.cpp @@ -19,9 +19,9 @@ typedef ANNRec Rec; typedef de::VPTree<Rec, 100, true> Shard; -typedef de::knn::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::knn::Parms<Rec> QP; +typedef de::knn::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -75,7 +75,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -84,7 +84,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Running Static query tests\n\n"); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard, Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp index 4a7fcb6..d0d963c 100644 --- a/benchmarks/vldb/vptree_bsm_bench.cpp +++ b/benchmarks/vldb/vptree_bsm_bench.cpp @@ -18,10 +18,11 @@ typedef Word2VecRec Rec; + typedef de::VPTree<Rec, 100, true> Shard; -typedef de::knn::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::knn::Parms<Rec> QP; +typedef de::knn::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -75,7 +76,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -84,7 +85,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Running Static query tests\n\n"); TIMER_START(); - run_static_queries<Shard, QP, Q>(shard, queries); + run_static_queries<Shard,Q>(shard, queries); TIMER_STOP(); auto static_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp index 63baf8b..b4956a2 100644 --- a/benchmarks/vldb/vptree_bsm_bench_alt.cpp +++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp @@ -19,9 +19,9 @@ typedef ANNRec Rec; typedef de::VPTree<Rec, 100, true> Shard; -typedef de::knn::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::knn::Parms<Rec> QP; +typedef de::knn::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -75,7 +75,7 @@ int main(int argc, char **argv) { fprintf(stderr, "[I] Running Query Benchmark\n"); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vldb/vptree_parmsweep.cpp b/benchmarks/vldb/vptree_parmsweep.cpp index 2cbd521..5e496d4 100644 --- a/benchmarks/vldb/vptree_parmsweep.cpp +++ b/benchmarks/vldb/vptree_parmsweep.cpp @@ -19,10 +19,10 @@ typedef Word2VecRec Rec; typedef de::VPTree<Rec, 100, true> Shard; -typedef de::knn::Query<Rec, Shard> Q; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; -typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2; -typedef de::knn::Parms<Rec> QP; +typedef de::knn::Query<Shard> Q; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2; +typedef Q::Parameters QP; void usage(char *progname) { fprintf(stderr, "%s reccnt datafile queryfile\n", progname); @@ -78,7 +78,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext, QP>(extension, queries); + run_queries<Ext, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); @@ -111,7 +111,7 @@ int main(int argc, char **argv) { size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries<Ext2, QP>(extension, queries); + run_queries<Ext2, Q>(extension, queries); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/external/psudb-common b/external/psudb-common -Subproject de975098c12a83e996923a11f6b525ddb1985ae +Subproject 3be9caf90a12b6ac3afd4437ddd62167ba6d28b diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h index e2e2784..16cbb0e 100644 --- a/include/framework/DynamicExtension.h +++ b/include/framework/DynamicExtension.h @@ -1,8 +1,8 @@ /* * include/framework/DynamicExtension.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -14,766 +14,782 @@ #include <vector> #include "framework/interface/Scheduler.h" -#include "framework/scheduling/FIFOScheduler.h" #include "framework/scheduling/SerialScheduler.h" -#include "framework/structure/MutableBuffer.h" -#include "framework/interface/Record.h" #include "framework/structure/ExtensionStructure.h" +#include "framework/structure/MutableBuffer.h" -#include "framework/util/Configuration.h" #include "framework/scheduling/Epoch.h" +#include "framework/util/Configuration.h" namespace de { -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING, - DeletePolicy D=DeletePolicy::TAGGING, SchedulerInterface SCHED=FIFOScheduler> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType, + LayoutPolicy L = LayoutPolicy::TEIRING, + DeletePolicy D = DeletePolicy::TAGGING, + SchedulerInterface SchedType = SerialScheduler> class DynamicExtension { - typedef S Shard; - typedef MutableBuffer<R> Buffer; - typedef ExtensionStructure<R, S, Q, L> Structure; - typedef Epoch<R, S, Q, L> _Epoch; - typedef BufferView<R> BufView; - - static constexpr size_t QUERY = 1; - static constexpr size_t RECONSTRUCTION = 2; - - struct epoch_ptr { - _Epoch *epoch; - size_t refcnt; - }; - + /* for unit testing purposes */ public: - DynamicExtension(size_t buffer_lwm, size_t buffer_hwm, size_t scale_factor, size_t memory_budget=0, - size_t thread_cnt=16) - : m_scale_factor(scale_factor) - , m_max_delete_prop(1) - , m_sched(memory_budget, thread_cnt) - , m_buffer(new Buffer(buffer_lwm, buffer_hwm)) - , m_core_cnt(thread_cnt) - , m_next_core(0) - , m_epoch_cnt(0) - { - if constexpr (L == LayoutPolicy::BSM) { - assert(scale_factor == 2); - } - - auto vers = new Structure(buffer_hwm, m_scale_factor, m_max_delete_prop); - m_current_epoch.store({new _Epoch(0, vers, m_buffer, 0), 0}); - m_previous_epoch.store({nullptr, 0}); - m_next_epoch.store({nullptr, 0}); - } - - ~DynamicExtension() { - - /* let any in-flight epoch transition finish */ - await_next_epoch(); - - /* shutdown the scheduler */ - m_sched.shutdown(); - - /* delete all held resources */ - delete m_next_epoch.load().epoch; - delete m_current_epoch.load().epoch; - delete m_previous_epoch.load().epoch; - - delete m_buffer; - } - - /* - * Insert the record `rec` into the index. If the buffer is full and - * the framework is blocking on an epoch transition, this call may fail - * and return 0. In this case, retry the call again later. If - * successful, 1 will be returned. The record will be immediately - * visible in the buffer upon the successful return of this function. - */ - int insert(const R &rec) { - return internal_append(rec, false); - } - - /* - * Erase the record `rec` from the index. It is assumed that `rec` - * currently exists--no special checks are made for correctness here. - * The behavior if this function will differ depending on if tombstone - * or tagged deletes are used. - * - * Tombstone deletes - inserts a tombstone record for `rec`. This *may* - * return 0 and fail if the buffer is full and the framework is - * blocking on an epoch transition. In this case, repeat the call - * later. 1 will be returned when the tombstone is successfully - * inserted. - * - * Tagging deletes - Does a point lookup for the record across the - * entire structure, and sets its delete bit when found. Returns 1 if - * the record is found and marked, and 0 if it was not (i.e., if it - * isn't present in the index). - */ - int erase(const R &rec) { - // FIXME: delete tagging will require a lot of extra work to get - // operating "correctly" in a concurrent environment. - - /* - * Get a view on the buffer *first*. This will ensure a stronger - * ordering than simply accessing the buffer directly, but is - * not *strictly* necessary. - */ - if constexpr (D == DeletePolicy::TAGGING) { - static_assert(std::same_as<SCHED, SerialScheduler>, "Tagging is only supported in single-threaded operation"); - - auto view = m_buffer->get_buffer_view(); - - auto epoch = get_active_epoch(); - if (epoch->get_structure()->tagged_delete(rec)) { - end_job(epoch); - return 1; - } - - end_job(epoch); - - /* - * the buffer will take the longest amount of time, and - * probably has the lowest probability of having the record, - * so we'll check it last. - */ - return view.delete_record(rec); - } + LayoutPolicy Layout = L; - /* - * If tagging isn't used, then delete using a tombstone - */ - return internal_append(rec, true); - } - - /* - * Execute the query with parameters `parms` and return a future. This - * future can be used to access a vector containing the results of the - * query. - * - * The behavior of this function is undefined if `parms` is not a - * pointer to a valid query parameter object for the query type used as - * a template parameter to construct the framework. - */ - std::future<std::vector<R>> query(void *parms) { - return schedule_query(parms); - } - - /* - * Returns the number of records (included tagged records and - * tombstones) currently within the framework. - */ - size_t get_record_count() { - auto epoch = get_active_epoch(); - auto t = epoch->get_buffer().get_record_count() + epoch->get_structure()->get_record_count(); - end_job(epoch); +private: + /* convenience typedefs for commonly used types within the class */ + typedef typename ShardType::RECORD RecordType; + typedef MutableBuffer<RecordType> Buffer; + typedef ExtensionStructure<ShardType, QueryType, L> Structure; + typedef Epoch<ShardType, QueryType, L> _Epoch; + typedef BufferView<RecordType> BufView; + + typedef typename QueryType::Parameters Parameters; + typedef typename QueryType::LocalQuery LocalQuery; + typedef typename QueryType::LocalQueryBuffer BufferQuery; + typedef typename QueryType::LocalResultType LocalResult; + typedef typename QueryType::ResultType QueryResult; + + + static constexpr size_t QUERY = 1; + static constexpr size_t RECONSTRUCTION = 2; + + struct epoch_ptr { + _Epoch *epoch; + size_t refcnt; + }; - return t; +public: + /** + * Create a new Dynamized version of a data structure, supporting + * inserts and, possibly, deletes. The following parameters are used + * for configuration of the structure, + * @param buffer_low_watermark The number of records that can be + * inserted before a buffer flush is initiated + * + * @param buffer_high_watermark The maximum buffer capacity, inserts + * will begin to fail once this number is reached, until the + * buffer flush has completed. Has no effect in single-threaded + * operation + * + * @param scale_factor The rate at which the capacity of levels + * grows; should be at least 2 for reasonable performance + * + * @param memory_budget Unused at this time + * + * @param thread_cnt The maximum number of threads available to the + * framework's scheduler for use in answering queries and + * performing compactions and flushes, etc. + */ + DynamicExtension(size_t buffer_low_watermark, size_t buffer_high_watermark, + size_t scale_factor, size_t memory_budget = 0, + size_t thread_cnt = 16) + : m_scale_factor(scale_factor), m_max_delete_prop(1), + m_sched(memory_budget, thread_cnt), + m_buffer(new Buffer(buffer_low_watermark, buffer_high_watermark)), + m_core_cnt(thread_cnt), m_next_core(0), m_epoch_cnt(0) { + if constexpr (L == LayoutPolicy::BSM) { + assert(scale_factor == 2); } - /* - * Returns the number of tombstone records currently within the - * framework. This function can be called when tagged deletes are used, - * but will always return 0 in that case. - */ - size_t get_tombstone_count() { - auto epoch = get_active_epoch(); - auto t = epoch->get_buffer().get_tombstone_count() + epoch->get_structure()->get_tombstone_count(); - end_job(epoch); - - return t; - } + auto vers = + new Structure(buffer_high_watermark, m_scale_factor, m_max_delete_prop); + m_current_epoch.store({new _Epoch(0, vers, m_buffer, 0), 0}); + m_previous_epoch.store({nullptr, 0}); + m_next_epoch.store({nullptr, 0}); + } + + /** + * Destructor for DynamicExtension. Will block until the completion of + * any outstanding epoch transition, shut down the scheduler, and free + * all currently allocated shards, buffers, etc., by calling their + * destructors. + */ + ~DynamicExtension() { + + /* let any in-flight epoch transition finish */ + await_next_epoch(); + + /* shutdown the scheduler */ + m_sched.shutdown(); + + /* delete all held resources */ + delete m_next_epoch.load().epoch; + delete m_current_epoch.load().epoch; + delete m_previous_epoch.load().epoch; + + delete m_buffer; + } + + /** + * Inserts a record into the index. Returns 1 if the insert succeeds, + * and 0 if it fails. Inserts may fail if the DynamicExtension's buffer + * has reached the high water mark; in this case, the insert should be + * retried when the buffer has flushed. The record will be immediately + * visible inside the index upon the return of this function. + * + * @param rec The record to be inserted + * + * @return 1 on success, 0 on failure (in which case the insert should + * be retried) + */ + int insert(const RecordType &rec) { return internal_append(rec, false); } + + /** + * Erases a record from the index, according to the DeletePolicy + * template parameter. Returns 1 on success and 0 on failure. The + * equality comparison operator of RecordType is used to identify + * the record to be deleted. + * + * Deletes behave differently, depending on the DeletionPolicy. For + * Tombstone deletes, a tombstone record will be inserted into the + * index. The presence of the deleted record is not checked first, so + * deleting a record that does not exist will result in an unnecessary + * tombstone record being written. + * + * Deletes using Tagging will perform a point lookup for the record to + * be removed, and mark it as deleted in its header. + * + * @param rec The record to be deleted. The argument to this function + * should compare equal to the record to be deleted. + * + * @return 1 on success, and 0 on failure. For tombstone deletes, a + * failure will occur if the insert fails due to the buffer + * being full, and can be retried. For tagging deletes, a + * failure means that hte record to be deleted could not be + * found in the index, and should *not* be retried. + */ + int erase(const RecordType &rec) { + // FIXME: delete tagging will require a lot of extra work to get + // operating "correctly" in a concurrent environment. /* - * Get the number of levels within the framework. This count will - * include any empty levels, but will not include the buffer. Note that - * this is *not* the same as the number of shards when tiering is used, - * as each level can contain multiple shards in that case. + * Get a view on the buffer *first*. This will ensure a stronger + * ordering than simply accessing the buffer directly, but is + * not *strictly* necessary. */ - size_t get_height() { - auto epoch = get_active_epoch(); - auto t = epoch->get_structure()->get_height(); - end_job(epoch); + if constexpr (D == DeletePolicy::TAGGING) { + static_assert(std::same_as<SchedType, SerialScheduler>, + "Tagging is only supported in single-threaded operation"); - return t; - } + auto view = m_buffer->get_buffer_view(); - /* - * Get the number of bytes of memory allocated across the framework for - * storing records and associated index information (i.e., internal - * ISAM tree nodes). This includes memory that is allocated but - * currently unused in the buffer, or in shards themselves - * (overallocation due to delete cancellation, etc.). - */ - size_t get_memory_usage() { - auto epoch = get_active_epoch(); - auto t = m_buffer->get_memory_usage() + epoch->get_structure()->get_memory_usage(); + auto epoch = get_active_epoch(); + if (epoch->get_structure()->tagged_delete(rec)) { end_job(epoch); + return 1; + } - return t; - } - - /* - * Get the number of bytes of memory allocated across the framework for - * auxiliary structures. This can include bloom filters, aux - * hashtables, etc. - */ - size_t get_aux_memory_usage() { - auto epoch = get_active_epoch(); - auto t = epoch->get_structure()->get_aux_memory_usage(); - end_job(epoch); + end_job(epoch); - return t; + /* + * the buffer will take the longest amount of time, and + * probably has the lowest probability of having the record, + * so we'll check it last. + */ + return view.delete_record(rec); } /* - * Returns the maximum physical capacity of the buffer, measured in - * records. + * If tagging isn't used, then delete using a tombstone */ - size_t get_buffer_capacity() { - return m_buffer->get_capacity(); + return internal_append(rec, true); + } + + /** + * Schedule the execution of a query with specified parameters and + * returns a future that can be used to access the results. The query + * is executed asynchronously. + * @param parms An rvalue reference to the query parameters. + * + * @return A future, from which the query results can be retrieved upon + * query completion + */ + std::future<std::vector<QueryResult>> + query(Parameters &&parms) { + return schedule_query(std::move(parms)); + } + + /** + * Determine the number of records (including tagged records and + * tombstones) currently within the framework. This number is used for + * determining when and how reconstructions occur. + * + * @return The number of records within the index + */ + size_t get_record_count() { + auto epoch = get_active_epoch(); + auto t = epoch->get_buffer().get_record_count() + + epoch->get_structure()->get_record_count(); + end_job(epoch); + + return t; + } + + /** + * Returns the number of tombstone records currently within the + * index. This function can be called when tagged deletes are used, + * but will always return 0 in that case. + * + * @return The number of tombstone records within the index + */ + size_t get_tombstone_count() { + auto epoch = get_active_epoch(); + auto t = epoch->get_buffer().get_tombstone_count() + + epoch->get_structure()->get_tombstone_count(); + end_job(epoch); + + return t; + } + + /** + * Get the number of levels within the framework. This count will + * include any empty levels, but will not include the buffer. Note that + * this is *not* the same as the number of shards when tiering is used, + * as each level can contain multiple shards in that case. + * + * @return The number of levels within the index + */ + size_t get_height() { + auto epoch = get_active_epoch(); + auto t = epoch->get_structure()->get_height(); + end_job(epoch); + + return t; + } + + /** + * Get the number of bytes of memory allocated across the framework for + * storing records and associated index information (i.e., internal + * ISAM tree nodes). This includes memory that is allocated but + * currently unused in the buffer, or in shards themselves + * (overallocation due to delete cancellation, etc.). + * + * @return The number of bytes of memory used for shards (as reported by + * ShardType::get_memory_usage) and the buffer by the index. + */ + size_t get_memory_usage() { + auto epoch = get_active_epoch(); + auto t = m_buffer->get_memory_usage() + + epoch->get_structure()->get_memory_usage(); + end_job(epoch); + + return t; + } + + /** + * Get the number of bytes of memory allocated across the framework for + * auxiliary structures. This can include bloom filters, aux + * hashtables, etc. + * + * @return The number of bytes of memory used for auxilliary structures + * (as reported by ShardType::get_aux_memory_usage) by the index. + */ + size_t get_aux_memory_usage() { + auto epoch = get_active_epoch(); + auto t = epoch->get_structure()->get_aux_memory_usage(); + end_job(epoch); + + return t; + } + + /** + * Create a new single Shard object containing all of the records + * within the framework (buffer and shards). + * + * @param await_reconstruction_completion Specifies whether the currently + * active state of the index should be used to create the shard + * (false), or if shard construction should wait for any active + * reconstructions to finish first (true). Default value of false. + * + * @return A new shard object, containing a copy of all records within + * the index. Ownership of this object is transfered to the + * caller. + */ + ShardType * + create_static_structure(bool await_reconstruction_completion = false) { + if (await_reconstruction_completion) { + await_next_epoch(); } - - /* - * Create a new single Shard object containing all of the records - * within the framework (buffer and shards). The optional parameter can - * be used to specify whether the Shard should be constructed with the - * currently active state of the framework (false), or if shard - * construction should wait until any ongoing reconstructions have - * finished and use that new version (true). - */ - Shard *create_static_structure(bool await_reconstruction_completion=false) { - if (await_reconstruction_completion) { - await_next_epoch(); - } - auto epoch = get_active_epoch(); - auto vers = epoch->get_structure(); - std::vector<Shard *> shards; + auto epoch = get_active_epoch(); + auto vers = epoch->get_structure(); + std::vector<ShardType *> shards; - - if (vers->get_levels().size() > 0) { - for (int i=vers->get_levels().size() - 1; i>= 0; i--) { - if (vers->get_levels()[i] && vers->get_levels()[i]->get_record_count() > 0) { - shards.emplace_back(vers->get_levels()[i]->get_combined_shard()); - } - } - } - - /* - * construct a shard from the buffer view. We'll hold the view - * for as short a time as possible: once the records are exfiltrated - * from the buffer, there's no reason to retain a hold on the view's - * head pointer any longer - */ - { - auto bv = epoch->get_buffer(); - if (bv.get_record_count() > 0) { - shards.emplace_back(new S(std::move(bv))); - } - } - - Shard *flattened = new S(shards); - - for (auto shard : shards) { - delete shard; + if (vers->get_levels().size() > 0) { + for (int i = vers->get_levels().size() - 1; i >= 0; i--) { + if (vers->get_levels()[i] && + vers->get_levels()[i]->get_record_count() > 0) { + shards.emplace_back(vers->get_levels()[i]->get_combined_shard()); } - - end_job(epoch); - return flattened; + } } /* - * If the current epoch is *not* the newest one, then wait for - * the newest one to become available. Otherwise, returns immediately. + * construct a shard from the buffer view. We'll hold the view + * for as short a time as possible: once the records are exfiltrated + * from the buffer, there's no reason to retain a hold on the view's + * head pointer any longer */ - void await_next_epoch() { - while (m_next_epoch.load().epoch != nullptr) { - std::unique_lock<std::mutex> lk(m_epoch_cv_lk); - m_epoch_cv.wait(lk); - } + { + auto bv = epoch->get_buffer(); + if (bv.get_record_count() > 0) { + shards.emplace_back(new ShardType(std::move(bv))); + } } - /* - * Mostly exposed for unit-testing purposes. Verifies that the current - * active version of the ExtensionStructure doesn't violate the maximum - * tombstone proportion invariant. - */ - bool validate_tombstone_proportion() { - auto epoch = get_active_epoch(); - auto t = epoch->get_structure()->validate_tombstone_proportion(); - end_job(epoch); - return t; - } + ShardType *flattened = new ShardType(shards); + for (auto shard : shards) { + delete shard; + } - void print_scheduler_statistics() { - m_sched.print_statistics(); + end_job(epoch); + return flattened; + } + + /* + * If the current epoch is *not* the newest one, then wait for + * the newest one to become available. Otherwise, returns immediately. + */ + void await_next_epoch() { + while (m_next_epoch.load().epoch != nullptr) { + std::unique_lock<std::mutex> lk(m_epoch_cv_lk); + m_epoch_cv.wait(lk); } + } + + /** + * Verify that the currently active version of the index does not + * violate tombstone proportion invariants. Exposed for unit-testing + * purposes. + * + * @return Returns true if the tombstone proportion invariant is + * satisfied, and false if it is not. + */ + bool validate_tombstone_proportion() { + auto epoch = get_active_epoch(); + auto t = epoch->get_structure()->validate_tombstone_proportion(); + end_job(epoch); + return t; + } + + /** + * Calls SchedType::print_statistics, which should write a report of + * scheduler performance statistics to stdout. + */ + void print_scheduler_statistics() const { m_sched.print_statistics(); } private: - SCHED m_sched; - - Buffer *m_buffer; + size_t m_scale_factor; + double m_max_delete_prop; - //std::mutex m_struct_lock; - //std::set<Structure *> m_versions; + SchedType m_sched; + Buffer *m_buffer; - alignas(64) std::atomic<bool> m_reconstruction_scheduled; + size_t m_core_cnt; + std::atomic<int> m_next_core; + std::atomic<size_t> m_epoch_cnt; + + alignas(64) std::atomic<bool> m_reconstruction_scheduled; - std::atomic<epoch_ptr> m_next_epoch; - std::atomic<epoch_ptr> m_current_epoch; - std::atomic<epoch_ptr> m_previous_epoch; + std::atomic<epoch_ptr> m_next_epoch; + std::atomic<epoch_ptr> m_current_epoch; + std::atomic<epoch_ptr> m_previous_epoch; - std::condition_variable m_epoch_cv; - std::mutex m_epoch_cv_lk; + std::condition_variable m_epoch_cv; + std::mutex m_epoch_cv_lk; - std::atomic<size_t> m_epoch_cnt; - size_t m_scale_factor; - double m_max_delete_prop; - std::atomic<int> m_next_core; - size_t m_core_cnt; - void enforce_delete_invariant(_Epoch *epoch) { - auto structure = epoch->get_structure(); - auto compactions = structure->get_compaction_tasks(); + void enforce_delete_invariant(_Epoch *epoch) { + auto structure = epoch->get_structure(); + auto compactions = structure->get_compaction_tasks(); - while (compactions.size() > 0) { + while (compactions.size() > 0) { - /* schedule a compaction */ - ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>(); - args->epoch = epoch; - args->merges = compactions; - args->extension = this; - args->compaction = true; - /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */ + /* schedule a compaction */ + ReconstructionArgs<ShardType, QueryType, L> *args = + new ReconstructionArgs<ShardType, QueryType, L>(); + args->epoch = epoch; + args->merges = compactions; + args->extension = this; + args->compaction = true; + /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed + * here */ - auto wait = args->result.get_future(); + auto wait = args->result.get_future(); - m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); + m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); - /* wait for compaction completion */ - wait.get(); - - /* get a new batch of compactions to perform, if needed */ - compactions = structure->get_compaction_tasks(); - } - } + /* wait for compaction completion */ + wait.get(); - _Epoch *get_active_epoch() { - epoch_ptr old, new_ptr; - - do { - /* - * during an epoch transition, a nullptr will installed in the - * current_epoch. At this moment, the "new" current epoch will - * soon be installed, but the "current" current epoch has been - * moved back to m_previous_epoch. - */ - if (m_current_epoch.load().epoch == nullptr) { - old = m_previous_epoch; - new_ptr = {old.epoch, old.refcnt+1}; - if (old.epoch != nullptr && m_previous_epoch.compare_exchange_strong(old, new_ptr)) { - break; - } - } else { - old = m_current_epoch; - new_ptr = {old.epoch, old.refcnt+1}; - if (old.epoch != nullptr && m_current_epoch.compare_exchange_strong(old, new_ptr)) { - break; - } - } - } while (true); - - assert(new_ptr.refcnt > 0); - - return new_ptr.epoch; + /* get a new batch of compactions to perform, if needed */ + compactions = structure->get_compaction_tasks(); } + } + + _Epoch *get_active_epoch() { + epoch_ptr old, new_ptr; + + do { + /* + * during an epoch transition, a nullptr will installed in the + * current_epoch. At this moment, the "new" current epoch will + * soon be installed, but the "current" current epoch has been + * moved back to m_previous_epoch. + */ + if (m_current_epoch.load().epoch == nullptr) { + old = m_previous_epoch; + new_ptr = {old.epoch, old.refcnt + 1}; + if (old.epoch != nullptr && + m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + } else { + old = m_current_epoch; + new_ptr = {old.epoch, old.refcnt + 1}; + if (old.epoch != nullptr && + m_current_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + } + } while (true); - void advance_epoch(size_t buffer_head) { + assert(new_ptr.refcnt > 0); - retire_epoch(m_previous_epoch.load().epoch); + return new_ptr.epoch; + } - epoch_ptr tmp = {nullptr, 0}; - epoch_ptr cur; - do { - cur = m_current_epoch; - } while(!m_current_epoch.compare_exchange_strong(cur, tmp)); + void advance_epoch(size_t buffer_head) { - m_previous_epoch.store(cur); + retire_epoch(m_previous_epoch.load().epoch); - // FIXME: this may currently block because there isn't any - // query preemption yet. At this point, we'd need to either - // 1) wait for all queries on the old_head to finish - // 2) kill all queries on the old_head - // 3) somehow migrate all queries on the old_head to the new - // version - while (!m_next_epoch.load().epoch->advance_buffer_head(buffer_head)) { - _mm_pause(); - } + epoch_ptr tmp = {nullptr, 0}; + epoch_ptr cur; + do { + cur = m_current_epoch; + } while (!m_current_epoch.compare_exchange_strong(cur, tmp)); + m_previous_epoch.store(cur); - m_current_epoch.store(m_next_epoch); - m_next_epoch.store({nullptr, 0}); + // FIXME: this may currently block because there isn't any + // query preemption yet. At this point, we'd need to either + // 1) wait for all queries on the old_head to finish + // 2) kill all queries on the old_head + // 3) somehow migrate all queries on the old_head to the new + // version + while (!m_next_epoch.load().epoch->advance_buffer_head(buffer_head)) { + _mm_pause(); + } + m_current_epoch.store(m_next_epoch); + m_next_epoch.store({nullptr, 0}); - /* notify any blocking threads that the new epoch is available */ - m_epoch_cv_lk.lock(); - m_epoch_cv.notify_all(); - m_epoch_cv_lk.unlock(); - } + /* notify any blocking threads that the new epoch is available */ + m_epoch_cv_lk.lock(); + m_epoch_cv.notify_all(); + m_epoch_cv_lk.unlock(); + } + /* + * Creates a new epoch by copying the currently active one. The new epoch's + * structure will be a shallow copy of the old one's. + */ + _Epoch *create_new_epoch() { /* - * Creates a new epoch by copying the currently active one. The new epoch's - * structure will be a shallow copy of the old one's. + * This epoch access is _not_ protected under the assumption that + * only one reconstruction will be able to trigger at a time. If that + * condition is violated, it is possible that this code will clone a retired + * epoch. */ - _Epoch *create_new_epoch() { - /* - * This epoch access is _not_ protected under the assumption that - * only one reconstruction will be able to trigger at a time. If that condition - * is violated, it is possible that this code will clone a retired - * epoch. - */ - assert(m_next_epoch.load().epoch == nullptr); - auto current_epoch = get_active_epoch(); + assert(m_next_epoch.load().epoch == nullptr); + auto current_epoch = get_active_epoch(); - m_epoch_cnt.fetch_add(1); - m_next_epoch.store({current_epoch->clone(m_epoch_cnt.load()), 0}); + m_epoch_cnt.fetch_add(1); + m_next_epoch.store({current_epoch->clone(m_epoch_cnt.load()), 0}); - end_job(current_epoch); + end_job(current_epoch); - return m_next_epoch.load().epoch; - } + return m_next_epoch.load().epoch; + } - void retire_epoch(_Epoch *epoch) { - /* - * Epochs with currently active jobs cannot - * be retired. By the time retire_epoch is called, - * it is assumed that a new epoch is active, meaning - * that the epoch to be retired should no longer - * accumulate new active jobs. Eventually, this - * number will hit zero and the function will - * proceed. - */ - - if (epoch == nullptr) { - return; - } + void retire_epoch(_Epoch *epoch) { + /* + * Epochs with currently active jobs cannot + * be retired. By the time retire_epoch is called, + * it is assumed that a new epoch is active, meaning + * that the epoch to be retired should no longer + * accumulate new active jobs. Eventually, this + * number will hit zero and the function will + * proceed. + */ - epoch_ptr old, new_ptr; - new_ptr = {nullptr, 0}; - do { - old = m_previous_epoch.load(); - - /* - * If running in single threaded mode, the failure to retire - * an Epoch will result in the thread of execution blocking - * indefinitely. - */ - if constexpr (std::same_as<SCHED, SerialScheduler>) { - if (old.epoch == epoch) assert(old.refcnt == 0); - } - - if (old.epoch == epoch && old.refcnt == 0 && - m_previous_epoch.compare_exchange_strong(old, new_ptr)) { - break; - } - usleep(1); - - } while(true); - - delete epoch; + if (epoch == nullptr) { + return; } - static void reconstruction(void *arguments) { - auto args = (ReconstructionArgs<R, S, Q, L> *) arguments; + epoch_ptr old, new_ptr; + new_ptr = {nullptr, 0}; + do { + old = m_previous_epoch.load(); + + /* + * If running in single threaded mode, the failure to retire + * an Epoch will result in the thread of execution blocking + * indefinitely. + */ + if constexpr (std::same_as<SchedType, SerialScheduler>) { + if (old.epoch == epoch) + assert(old.refcnt == 0); + } + + if (old.epoch == epoch && old.refcnt == 0 && + m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + usleep(1); + + } while (true); + + delete epoch; + } + + static void reconstruction(void *arguments) { + auto args = (ReconstructionArgs<ShardType, QueryType, L> *)arguments; + + ((DynamicExtension *)args->extension)->SetThreadAffinity(); + Structure *vers = args->epoch->get_structure(); + + if constexpr (L == LayoutPolicy::BSM) { + if (args->merges.size() > 0) { + vers->reconstruction(args->merges[0]); + } + } else { + for (ssize_t i = 0; i < args->merges.size(); i++) { + vers->reconstruction(args->merges[i].target, + args->merges[i].sources[0]); + } + } - ((DynamicExtension *) args->extension)->SetThreadAffinity(); - Structure *vers = args->epoch->get_structure(); + /* + * we'll grab the buffer AFTER doing the internal reconstruction, so we + * can flush as many records as possible in one go. The reconstruction + * was done so as to make room for the full buffer anyway, so there's + * no real benefit to doing this first. + */ + auto buffer_view = args->epoch->get_buffer(); + size_t new_head = buffer_view.get_tail(); - if constexpr (L == LayoutPolicy::BSM) { - if (args->merges.size() > 0) { - vers->reconstruction(args->merges[0]); - } - } else { - for (ssize_t i=0; i<args->merges.size(); i++) { - vers->reconstruction(args->merges[i].target, args->merges[i].sources[0]); - } - } + /* + * if performing a compaction, don't flush the buffer, as + * there is no guarantee that any necessary reconstructions + * will free sufficient space in L0 to support a flush + */ + if (!args->compaction) { + vers->flush_buffer(std::move(buffer_view)); + } + args->result.set_value(true); - /* - * we'll grab the buffer AFTER doing the internal reconstruction, so we - * can flush as many records as possible in one go. The reconstruction - * was done so as to make room for the full buffer anyway, so there's - * no real benefit to doing this first. - */ - auto buffer_view = args->epoch->get_buffer(); - size_t new_head = buffer_view.get_tail(); + /* + * Compactions occur on an epoch _before_ it becomes active, + * and as a result the active epoch should _not_ be advanced as + * part of a compaction + */ + if (!args->compaction) { + ((DynamicExtension *)args->extension)->advance_epoch(new_head); + } - /* - * if performing a compaction, don't flush the buffer, as - * there is no guarantee that any necessary reconstructions - * will free sufficient space in L0 to support a flush - */ - if (!args->compaction) { - vers->flush_buffer(std::move(buffer_view)); + ((DynamicExtension *)args->extension) + ->m_reconstruction_scheduled.store(false); + + delete args; + } + + static void async_query(void *arguments) { + auto *args = + (QueryArgs<ShardType, QueryType, DynamicExtension> *) arguments; + + auto epoch = args->extension->get_active_epoch(); + + auto buffer = epoch->get_buffer(); + auto vers = epoch->get_structure(); + auto *parms = &(args->query_parms); + + /* create initial buffer query */ + auto buffer_query = QueryType::local_preproc_buffer(&buffer, parms); + + /* create initial local queries */ + std::vector<std::pair<ShardID, ShardType *>> shards; + std::vector<LocalQuery *> local_queries = + vers->get_local_queries(shards, parms); + + /* process local/buffer queries to create the final version */ + QueryType::distribute_query(parms, local_queries, buffer_query); + + /* execute the local/buffer queries and combine the results into output */ + std::vector<QueryResult> output; + do { + std::vector<std::vector<LocalResult>> + query_results(shards.size() + 1); + for (size_t i = 0; i < query_results.size(); i++) { + std::vector<LocalResult> local_results; + ShardID shid; + + if (i == 0) { /* execute buffer query */ + local_results = QueryType::local_query_buffer(buffer_query); + shid = INVALID_SHID; + } else { /*execute local queries */ + local_results = QueryType::local_query(shards[i - 1].second, + local_queries[i - 1]); + shid = shards[i - 1].first; } - args->result.set_value(true); + /* framework-level, automatic delete filtering */ + query_results[i] = std::move(local_results); - /* - * Compactions occur on an epoch _before_ it becomes active, - * and as a result the active epoch should _not_ be advanced as - * part of a compaction - */ - if (!args->compaction) { - ((DynamicExtension *) args->extension)->advance_epoch(new_head); + /* end query early if EARLY_ABORT is set and a result exists */ + if constexpr (QueryType::EARLY_ABORT) { + if (query_results[i].size() > 0) + break; } + } - ((DynamicExtension *) args->extension)->m_reconstruction_scheduled.store(false); - - delete args; - } - - static void async_query(void *arguments) { - QueryArgs<R, S, Q, L> *args = (QueryArgs<R, S, Q, L> *) arguments; - - auto epoch = ((DynamicExtension *) args->extension)->get_active_epoch(); - - auto ptr1 = ((DynamicExtension *) args->extension)->m_previous_epoch.load().epoch; - auto ptr2 = ((DynamicExtension *) args->extension)->m_current_epoch.load().epoch; - auto ptr3 = ((DynamicExtension *) args->extension)->m_next_epoch.load().epoch; - - - auto buffer = epoch->get_buffer(); - auto vers = epoch->get_structure(); - void *parms = args->query_parms; - - /* Get the buffer query states */ - void *buffer_state = Q::get_buffer_query_state(&buffer, parms); - - /* Get the shard query states */ - std::vector<std::pair<ShardID, Shard*>> shards; - std::vector<void *> states = vers->get_query_states(shards, parms); + /* + * combine the results of the local queries, also translating + * from LocalResultType to ResultType + */ + QueryType::combine(query_results, parms, output); - std::vector<R> results; - Q::process_query_states(parms, states, buffer_state); + /* optionally repeat the local queries if necessary */ + } while (QueryType::repeat(parms, output, local_queries, buffer_query)); - do { - std::vector<std::vector<Wrapped<R>>> query_results(shards.size() + 1); - for (size_t i=0; i<query_results.size(); i++) { - std::vector<Wrapped<R>> local_results; - ShardID shid; + /* return the output vector to caller via the future */ + args->result_set.set_value(std::move(output)); - if (i == 0) { /* process the buffer first */ - local_results = Q::buffer_query(buffer_state, parms); - shid = INVALID_SHID; - } else { - local_results = Q::query(shards[i - 1].second, states[i - 1], parms); - shid = shards[i - 1].first; - } + /* officially end the query job, releasing the pin on the epoch */ + args->extension->end_job(epoch); - query_results[i] = std::move(filter_deletes(local_results, shid, vers, &buffer)); - - if constexpr (Q::EARLY_ABORT) { - if (query_results[i].size() > 0) break; - } - } - Q::merge(query_results, parms, results); - - } while (Q::repeat(parms, results, states, buffer_state)); - - args->result_set.set_value(std::move(results)); - - ((DynamicExtension *) args->extension)->end_job(epoch); - - Q::delete_buffer_query_state(buffer_state); - for (size_t i=0; i<states.size(); i++) { - Q::delete_query_state(states[i]); - } - - delete args; + /* clean up memory allocated for temporary query objects */ + delete buffer_query; + for (size_t i = 0; i < local_queries.size(); i++) { + delete local_queries[i]; } - void schedule_reconstruction() { - auto epoch = create_new_epoch(); - /* - * the reconstruction process calls end_job(), - * so we must start one before calling it - */ - - ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>(); - args->epoch = epoch; - args->merges = epoch->get_structure()->get_reconstruction_tasks(m_buffer->get_high_watermark()); - args->extension = this; - args->compaction = false; - /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */ - - m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); + delete args; + } + + void schedule_reconstruction() { + auto epoch = create_new_epoch(); + + ReconstructionArgs<ShardType, QueryType, L> *args = + new ReconstructionArgs<ShardType, QueryType, L>(); + args->epoch = epoch; + args->merges = epoch->get_structure()->get_reconstruction_tasks( + m_buffer->get_high_watermark()); + args->extension = this; + args->compaction = false; + /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed + * here */ + + m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); + } + + std::future<std::vector<QueryResult>> + schedule_query(Parameters &&query_parms) { + auto args = + new QueryArgs<ShardType, QueryType, DynamicExtension>(); + args->extension = this; + args->query_parms = std::move(query_parms); + auto result = args->result_set.get_future(); + + m_sched.schedule_job(async_query, 0, (void *)args, QUERY); + + return result; + } + + int internal_append(const RecordType &rec, bool ts) { + if (m_buffer->is_at_low_watermark()) { + auto old = false; + + if (m_reconstruction_scheduled.compare_exchange_strong(old, true)) { + schedule_reconstruction(); + } } - std::future<std::vector<R>> schedule_query(void *query_parms) { - QueryArgs<R, S, Q, L> *args = new QueryArgs<R, S, Q, L>(); - args->extension = this; - args->query_parms = query_parms; - auto result = args->result_set.get_future(); + /* this will fail if the HWM is reached and return 0 */ + return m_buffer->append(rec, ts); + } - m_sched.schedule_job(async_query, 0, args, QUERY); - - return result; +#ifdef _GNU_SOURCE + void SetThreadAffinity() { + if constexpr (std::same_as<SchedType, SerialScheduler>) { + return; } - int internal_append(const R &rec, bool ts) { - if (m_buffer->is_at_low_watermark()) { - auto old = false; - - if (m_reconstruction_scheduled.compare_exchange_strong(old, true)) { - schedule_reconstruction(); - } - } - - /* this will fail if the HWM is reached and return 0 */ - return m_buffer->append(rec, ts); + int core = m_next_core.fetch_add(1) % m_core_cnt; + cpu_set_t mask; + CPU_ZERO(&mask); + + switch (core % 2) { + case 0: + // 0 |-> 0 + // 2 |-> 2 + // 4 |-> 4 + core = core + 0; + break; + case 1: + // 1 |-> 28 + // 3 |-> 30 + // 5 |-> 32 + core = (core - 1) + m_core_cnt; + break; } + CPU_SET(core, &mask); + ::sched_setaffinity(0, sizeof(mask), &mask); + } +#else + void SetThreadAffinity() {} +#endif - static std::vector<Wrapped<R>> filter_deletes(std::vector<Wrapped<R>> &records, ShardID shid, Structure *vers, BufView *bview) { - if constexpr (Q::SKIP_DELETE_FILTER) { - return std::move(records); - } - - std::vector<Wrapped<R>> processed_records; - processed_records.reserve(records.size()); + void end_job(_Epoch *epoch) { + epoch_ptr old, new_ptr; - /* - * For delete tagging, we just need to check the delete bit - * on each record. + do { + if (m_previous_epoch.load().epoch == epoch) { + old = m_previous_epoch; + /* + * This could happen if we get into the system during a + * transition. In this case, we can just back out and retry */ - if constexpr (D == DeletePolicy::TAGGING) { - for (auto &rec : records) { - if (rec.is_deleted()) { - continue; - } + if (old.epoch == nullptr) { + continue; + } - processed_records.emplace_back(rec); - } + assert(old.refcnt > 0); - return processed_records; + new_ptr = {old.epoch, old.refcnt - 1}; + if (m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; } - + } else { + old = m_current_epoch; /* - * For tombstone deletes, we need to search for the corresponding - * tombstone for each record. + * This could happen if we get into the system during a + * transition. In this case, we can just back out and retry */ - for (auto &rec : records) { - if (rec.is_tombstone()) { - continue; - } - - // FIXME: need to figure out how best to re-enable the buffer tombstone - // check in the correct manner. - //if (buffview.check_tombstone(rec.rec)) { - //continue; - //} - - for (size_t i=0; i<bview->get_record_count(); i++) { - if (bview->get(i)->is_tombstone() && bview->get(i)->rec == rec.rec) { - continue; - } - } - - if (shid != INVALID_SHID) { - for (size_t lvl=0; lvl<=shid.level_idx; lvl++) { - if (vers->get_levels()[lvl]->check_tombstone(0, rec.rec)) { - continue; - } - } - - if (vers->get_levels()[shid.level_idx]->check_tombstone(shid.shard_idx + 1, rec.rec)) { - continue; - } - } - - processed_records.emplace_back(rec); - } - - return processed_records; - } - -#ifdef _GNU_SOURCE - void SetThreadAffinity() { - if constexpr (std::same_as<SCHED, SerialScheduler>) { - return; + if (old.epoch == nullptr) { + continue; } - int core = m_next_core.fetch_add(1) % m_core_cnt; - cpu_set_t mask; - CPU_ZERO(&mask); + assert(old.refcnt > 0); - switch (core % 2) { - case 0: - // 0 |-> 0 - // 2 |-> 2 - // 4 |-> 4 - core = core; - break; - case 1: - // 1 |-> 28 - // 3 |-> 30 - // 5 |-> 32 - core = (core - 1) + m_core_cnt; + new_ptr = {old.epoch, old.refcnt - 1}; + if (m_current_epoch.compare_exchange_strong(old, new_ptr)) { break; } - CPU_SET(core, &mask); - ::sched_setaffinity(0, sizeof(mask), &mask); - } -#else - void SetThreadAffinity() { - - } -#endif - - - void end_job(_Epoch *epoch) { - epoch_ptr old, new_ptr; - - do { - if (m_previous_epoch.load().epoch == epoch) { - old = m_previous_epoch; - /* - * This could happen if we get into the system during a - * transition. In this case, we can just back out and retry - */ - if (old.epoch == nullptr) { - continue; - } - - assert(old.refcnt > 0); - - new_ptr = {old.epoch, old.refcnt - 1}; - if (m_previous_epoch.compare_exchange_strong(old, new_ptr)) { - break; - } - } else { - old = m_current_epoch; - /* - * This could happen if we get into the system during a - * transition. In this case, we can just back out and retry - */ - if (old.epoch == nullptr) { - continue; - } - - assert(old.refcnt > 0); - - new_ptr = {old.epoch, old.refcnt - 1}; - if (m_current_epoch.compare_exchange_strong(old, new_ptr)) { - break; - } - } - } while (true); - } - + } + } while (true); + } }; -} - +} // namespace de diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h index 577d6cd..1b64646 100644 --- a/include/framework/interface/Query.h +++ b/include/framework/interface/Query.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Query.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -10,23 +10,127 @@ #include "framework/QueryRequirements.h" -namespace de{ +namespace de { -template <typename Q, typename R, typename S> -concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv, std::vector<R> &resv) { - {Q::get_query_state(sh, p)} -> std::convertible_to<void*>; - {Q::get_buffer_query_state(bv, p)} -> std::convertible_to<void *>; - {Q::process_query_states(p, s, p)}; - {Q::query(sh, p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; - {Q::buffer_query(p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; - {Q::merge(rv, p, resv)}; +/* + * FIXME: It would probably be best to absorb the std::vector into + * this type too; this would allow user-defined collections for + * intermediate results, which could allow for more merging + * optimizations. However, this would require an alternative + * approach to doing delete checks, so we'll leave it for now. + */ +template <typename R> +concept LocalResultInterface = requires(R res) { + { res.is_deleted() } -> std::convertible_to<bool>; + { res.is_tombstone() } -> std::convertible_to<bool>; +}; + +/* + * + * + */ +template <typename QUERY, typename SHARD, + typename RESULT = typename QUERY::ResultType, + typename LOCAL_RESULT = typename QUERY::LocalResultType, + typename PARAMETERS = typename QUERY::Parameters, + typename LOCAL = typename QUERY::LocalQuery, + typename LOCAL_BUFFER = typename QUERY::LocalQueryBuffer> +concept QueryInterface = LocalResultInterface<LOCAL_RESULT> && + requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query, + SHARD *shard, std::vector<LOCAL *> &local_queries, + std::vector<std::vector<LOCAL_RESULT>> &local_results, + std::vector<RESULT> &result, + BufferView<typename SHARD::RECORD> *bv) { + + /* + * Given a set of query parameters and a shard, return a local query + * object for that shard. + */ + { QUERY::local_preproc(shard, parameters) } -> std::convertible_to<LOCAL *>; + + /* + * Given a set of query parameters and a buffer view, return a local + * query object for the buffer. + * NOTE: for interface reasons, the pointer to the buffer view MUST be + * stored inside of the local query object. The future buffer + * query routine will access the buffer by way of this pointer. + */ + { + QUERY::local_preproc_buffer(bv, parameters) + } -> std::convertible_to<LOCAL_BUFFER *>; + + /* + * Given a full set of local queries, and the buffer query, make any + * necessary adjustments to the local queries in-place, to account for + * global information. If no additional processing is required, this + * function can be left empty. + */ + {QUERY::distribute_query(parameters, local_queries, buffer_query)}; + + /* + * Answer the local query, defined by `local` against `shard` and return + * a vector of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query(shard, local) + } -> std::convertible_to<std::vector<LOCAL_RESULT>>; + + /* + * Answer the local query defined by `local` against the buffer (which + * should be accessed by a pointer inside of `local`) and return a vector + * of LOCAL_RESULT objects defining the query result. + */ + { + QUERY::local_query_buffer(buffer_query) + } -> std::convertible_to<std::vector<LOCAL_RESULT>>; + + /* + * Process the local results from the buffer and all of the shards, + * stored in `local_results`, and insert the associated ResultType + * objects into the `result` vector, which represents the final result + * of the query. Updates to this vector are done in-place. + */ + {QUERY::combine(local_results, parameters, result)}; - {Q::delete_query_state(p)} -> std::same_as<void>; - {Q::delete_buffer_query_state(p)} -> std::same_as<void>; + /* + * Process the post-combine `result` vector of ResultType objects, + * in the context of the global and local query parameters, to determine + * if the query should be repeated. If so, make any necessary adjustments + * to the local query objects and return True. Otherwise, return False. + * + * If no repetition is needed for a given problem type, simply return + * False immediately and the query will end. + */ + { + QUERY::repeat(parameters, result, local_queries, buffer_query) + } -> std::same_as<bool>; - {Q::repeat(p, resv, s, p)} -> std::same_as<bool>; + /* + * If this flag is True, then the query will immediately stop and return + * a result as soon as the first non-deleted LocalRecordType is found. + * Otherwise, every Shard and the buffer will be queried and the results + * merged, like normal. + * + * This is largely an optimization flag for use with point-lookup, or + * other single-record result queries + */ + { QUERY::EARLY_ABORT } -> std::convertible_to<bool>; - {Q::EARLY_ABORT} -> std::convertible_to<bool>; - {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>; + /* + * If false, the built-in delete filtering that the framework can + * apply to the local results, prior to calling combine, will be skipped. + * This general filtering can be inefficient, particularly for tombstone + * -based deletes, and so if a more efficient manual filtering can be + * performed, it is worth setting this to True and doing that filtering + * in the combine step. + * + * If deletes are not a consideration for your problem, it's also best + * to turn this off, as it'll avoid the framework making an extra pass + * over the local results prior to combining them. + * + * TODO: Temporarily disabling this, as we've dropped framework-level + * delete filtering for the time being. + */ + /* { QUERY::SKIP_DELETE_FILTER } -> std::convertible_to<bool>; */ }; -} +} // namespace de diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h index 19ccadd..d3e77d8 100644 --- a/include/framework/interface/Record.h +++ b/include/framework/interface/Record.h @@ -1,272 +1,247 @@ /* * include/framework/interface/Record.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * - * FIXME: the record implementations could probably be broken out into + * FIXME: the record implementations could probably be broken out into * different files, leaving only the interface here */ #pragma once -#include <cstring> -#include <concepts> #include <cmath> +#include <concepts> +#include <cstring> #include "psu-util/hash.h" namespace de { -template<typename R> +template <typename R> concept RecordInterface = requires(R r, R s) { - { r < s } ->std::convertible_to<bool>; - { r == s } ->std::convertible_to<bool>; + { r < s } -> std::convertible_to<bool>; + { r == s } -> std::convertible_to<bool>; }; -template<typename R> +template <typename R> concept WeightedRecordInterface = requires(R r) { - {r.weight} -> std::convertible_to<double>; + { r.weight } -> std::convertible_to<double>; }; -template<typename R> +template <typename R> concept NDRecordInterface = RecordInterface<R> && requires(R r, R s) { - {r.calc_distance(s)} -> std::convertible_to<double>; + { r.calc_distance(s) } -> std::convertible_to<double>; }; template <typename R> concept KVPInterface = RecordInterface<R> && requires(R r) { - r.key; - r.value; + r.key; + r.value; }; -template<typename R> +template <typename R> concept AlexInterface = KVPInterface<R> && requires(R r) { - {r.key} -> std::convertible_to<size_t>; - {r.value} -> std::convertible_to<size_t>; + { r.key } -> std::convertible_to<size_t>; + { r.value } -> std::convertible_to<size_t>; }; -template<typename R> -concept WrappedInterface = RecordInterface<R> && requires(R r, R s, bool b, int i) { - {r.header} -> std::convertible_to<uint32_t>; - r.rec; - {r.set_delete()}; - {r.is_deleted()} -> std::convertible_to<bool>; - {r.set_tombstone(b)}; - {r.is_tombstone()} -> std::convertible_to<bool>; - {r.set_timestamp(i)}; - {r.get_timestamp()} -> std::convertible_to<uint32_t>; - {r.clear_timestamp()}; - {r.is_visible()} -> std::convertible_to<bool>; - {r.set_visible()}; - {r < s} -> std::convertible_to<bool>; - {r == s} ->std::convertible_to<bool>; +template <typename R> +concept WrappedInterface = RecordInterface<R> && + requires(R r, R s, bool b, int i) { + { r.header } -> std::convertible_to<uint32_t>; + r.rec; + {r.set_delete()}; + { r.is_deleted() } -> std::convertible_to<bool>; + {r.set_tombstone(b)}; + { r.is_tombstone() } -> std::convertible_to<bool>; + {r.set_timestamp(i)}; + { r.get_timestamp() } -> std::convertible_to<uint32_t>; + {r.clear_timestamp()}; + { r.is_visible() } -> std::convertible_to<bool>; + {r.set_visible()}; + { r < s } -> std::convertible_to<bool>; + { r == s } -> std::convertible_to<bool>; }; -template<RecordInterface R> -struct Wrapped { - uint32_t header; - R rec; +template <RecordInterface R> struct Wrapped { + uint32_t header; + R rec; - inline void set_delete() { - header |= 2; - } + inline void set_delete() { header |= 2; } - inline bool is_deleted() const { - return header & 2; - } + inline bool is_deleted() const { return header & 2; } - inline void set_visible() { - header |= 4; - } + inline void set_visible() { header |= 4; } - inline bool is_visible() const { - return header & 4; - } + inline bool is_visible() const { return header & 4; } - inline void set_timestamp(int ts) { - header |= (ts << 3); - } - - inline int get_timestamp() const { - return header >> 3; - } + inline void set_timestamp(int ts) { header |= (ts << 3); } - inline void clear_timestamp() { - header &= 7; - } + inline int get_timestamp() const { return header >> 3; } - inline void set_tombstone(bool val=true) { - if (val) { - header |= 1; - } else { - header &= 0; - } - } + inline void clear_timestamp() { header &= 7; } - inline bool is_tombstone() const { - return header & 1; + inline void set_tombstone(bool val = true) { + if (val) { + header |= 1; + } else { + header &= 0; } + } - inline bool operator<(const Wrapped& other) const { - return rec < other.rec || (rec == other.rec && header < other.header); - } + inline bool is_tombstone() const { return header & 1; } - inline bool operator==(const Wrapped& other) const { - return rec == other.rec; - } + inline bool operator<(const Wrapped &other) const { + return rec < other.rec || (rec == other.rec && header < other.header); + } + inline bool operator==(const Wrapped &other) const { + return rec == other.rec; + } }; -template <typename K, typename V> -struct Record { - K key; - V value; +template <typename K, typename V> struct Record { + K key; + V value; - inline bool operator<(const Record& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const Record &other) const { + return key < other.key || (key == other.key && value < other.value); + } - inline bool operator==(const Record& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const Record &other) const { + return key == other.key && value == other.value; + } }; -template<typename V> -struct Record<const char*, V> { - const char* key; - V value; - size_t len; +template <typename V> struct Record<const char *, V> { + const char *key; + V value; + size_t len; - inline bool operator<(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) < 0; - } + inline bool operator<(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) < 0; + } - inline bool operator==(const Record& other) const { - size_t n = std::min(len, other.len) + 1; - return strncmp(key, other.key, n) == 0; - } + inline bool operator==(const Record &other) const { + size_t n = std::min(len, other.len) + 1; + return strncmp(key, other.key, n) == 0; + } }; -template <typename K, typename V, typename W> -struct WeightedRecord { - K key; - V value; - W weight = 1; +template <typename K, typename V, typename W> struct WeightedRecord { + K key; + V value; + W weight = 1; - inline bool operator==(const WeightedRecord& other) const { - return key == other.key && value == other.value; - } + inline bool operator==(const WeightedRecord &other) const { + return key == other.key && value == other.value; + } - inline bool operator<(const WeightedRecord& other) const { - return key < other.key || (key == other.key && value < other.value); - } + inline bool operator<(const WeightedRecord &other) const { + return key < other.key || (key == other.key && value < other.value); + } }; +template <typename V, size_t D = 2> struct CosinePoint { + V data[D]; -template <typename V, size_t D=2> -struct CosinePoint{ - V data[D]; - - inline bool operator==(const CosinePoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] != other.data[i]) { - return false; - } - } - - return true; + inline bool operator==(const CosinePoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] != other.data[i]) { + return false; + } } - /* lexicographic order */ - inline bool operator<(const CosinePoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] < other.data[i]) { - return true; - } else if (data[i] > other.data[i]) { - return false; - } - } + return true; + } + /* lexicographic order */ + inline bool operator<(const CosinePoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] < other.data[i]) { + return true; + } else if (data[i] > other.data[i]) { return false; + } } - inline double calc_distance(const CosinePoint& other) const { + return false; + } - double prod = 0; - double asquared = 0; - double bsquared = 0; + inline double calc_distance(const CosinePoint &other) const { - for (size_t i=0; i<D; i++) { - prod += data[i] * other.data[i]; - asquared += data[i]*data[i]; - bsquared += other.data[i]*other.data[i]; - } + double prod = 0; + double asquared = 0; + double bsquared = 0; - return prod / std::sqrt(asquared * bsquared); + for (size_t i = 0; i < D; i++) { + prod += data[i] * other.data[i]; + asquared += data[i] * data[i]; + bsquared += other.data[i] * other.data[i]; } + + return prod / std::sqrt(asquared * bsquared); + } }; +template <typename V, size_t D = 2> struct EuclidPoint { + V data[D]; -template <typename V, size_t D=2> -struct EuclidPoint{ - V data[D]; + inline bool operator==(const EuclidPoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] != other.data[i]) { + return false; + } + } - inline bool operator==(const EuclidPoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] != other.data[i]) { - return false; - } - } + return true; + } + /* lexicographic order */ + inline bool operator<(const EuclidPoint &other) const { + for (size_t i = 0; i < D; i++) { + if (data[i] < other.data[i]) { return true; + } else if (data[i] > other.data[i]) { + return false; + } } - /* lexicographic order */ - inline bool operator<(const EuclidPoint& other) const { - for (size_t i=0; i<D; i++) { - if (data[i] < other.data[i]) { - return true; - } else if (data[i] > other.data[i]) { - return false; - } - } + return false; + } - return false; + inline double calc_distance(const EuclidPoint &other) const { + double dist = 0; + for (size_t i = 0; i < D; i++) { + dist += (data[i] - other.data[i]) * (data[i] - other.data[i]); } - inline double calc_distance(const EuclidPoint& other) const { - double dist = 0; - for (size_t i=0; i<D; i++) { - dist += (data[i] - other.data[i]) * (data[i] - other.data[i]); - } - - return std::sqrt(dist); - } + return std::sqrt(dist); + } }; -template<RecordInterface R> -struct RecordHash { - size_t operator()(R const &rec) const { - return psudb::hash_bytes((std::byte *) &rec, sizeof(R)); - } +template <RecordInterface R> struct RecordHash { + size_t operator()(R const &rec) const { + return psudb::hash_bytes((std::byte *)&rec, sizeof(R)); + } }; -template <typename R> -class DistCmpMax { +template <typename R> class DistCmpMax { public: - DistCmpMax(R *baseline) : P(baseline) {} + DistCmpMax(R *baseline) : P(baseline) {} - inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { - return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); - } + inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { + return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); + } - inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){ - return a->calc_distance(*P) > b->calc_distance(*P); - } + inline bool operator()(const R *a, + const R *b) requires(!WrappedInterface<R>) { + return a->calc_distance(*P) > b->calc_distance(*P); + } private: - R *P; + R *P; }; -} +} // namespace de diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h index 451ddd2..d76a6c8 100644 --- a/include/framework/interface/Scheduler.h +++ b/include/framework/interface/Scheduler.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Scheduler.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -10,10 +10,11 @@ #include "framework/scheduling/Task.h" -template <typename S> -concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) { - {S(i, i)}; - {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>; - {s.shutdown()}; - {s.print_statistics()}; +template <typename SchedType> +concept SchedulerInterface = requires(SchedType s, size_t i, void *vp, + de::Job j) { + {SchedType(i, i)}; + {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>; + {s.shutdown()}; + {s.print_statistics()}; }; diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h index c4a9180..bd980c0 100644 --- a/include/framework/interface/Shard.h +++ b/include/framework/interface/Shard.h @@ -1,7 +1,7 @@ /* * include/framework/interface/Shard.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -12,25 +12,57 @@ namespace de { -template <typename S, typename R> -concept ShardInterface = RecordInterface<R> && requires(S s, std::vector<S*> spp, void *p, bool b, size_t i, BufferView<R> bv, R r) { - {S(spp)}; - {S(std::move(bv))}; +template <typename SHARD> +concept ShardInterface = RecordInterface<typename SHARD::RECORD> && + requires(SHARD shard, const std::vector<SHARD *> &shard_vector, bool b, + BufferView<typename SHARD::RECORD> bv, + typename SHARD::RECORD rec) { + /* construct a shard from a vector of shards of the same type */ + {SHARD(shard_vector)}; - {s.point_lookup(r, b) } -> std::same_as<Wrapped<R>*>; - {s.get_data()} -> std::same_as<Wrapped<R>*>; + /* construct a shard from a buffer view (i.e., unsorted array of records) */ + {SHARD(std::move(bv))}; + + /* perform a lookup for a record matching rec and return a pointer to it */ + { + shard.point_lookup(rec, b) + } -> std::same_as<Wrapped<typename SHARD::RECORD> *>; + + /* + * return the number of records in the shard -- used to determine when + * reconstructions occur + */ + { shard.get_record_count() } -> std::convertible_to<size_t>; + + /* + * return the number of tombstones in the shard -- can simply return + * 0 if tombstones are not in use. + */ + { shard.get_tombstone_count() } -> std::convertible_to<size_t>; + + /* + * return the number of bytes of memory used by the main data structure + * within the shard -- informational use only at the moment + */ + { shard.get_memory_usage() } -> std::convertible_to<size_t>; + + /* + * return the number of bytes of memory used by auxilliary data + * structures (bloom filters, etc.) within the shard -- informational + * use only at the moment + */ + { shard.get_aux_memory_usage() } -> std::convertible_to<size_t>; - {s.get_record_count()} -> std::convertible_to<size_t>; - {s.get_tombstone_count()} -> std::convertible_to<size_t>; - {s.get_memory_usage()} -> std::convertible_to<size_t>; - {s.get_aux_memory_usage()} -> std::convertible_to<size_t>; }; -template <typename S, typename R> -concept SortedShardInterface = ShardInterface<S, R> && requires(S s, R r, R *rp, size_t i) { - {s.lower_bound(r)} -> std::convertible_to<size_t>; - {s.upper_bound(r)} -> std::convertible_to<size_t>; - {s.get_record_at(i)} -> std::same_as<Wrapped<R>*>; +template <typename SHARD> +concept SortedShardInterface = ShardInterface<SHARD> && + requires(SHARD shard, typename SHARD::RECORD rec, size_t index) { + { shard.lower_bound(rec) } -> std::convertible_to<size_t>; + { shard.upper_bound(rec) } -> std::convertible_to<size_t>; + { + shard.get_record_at(index) + } -> std::same_as<Wrapped<typename SHARD::RECORD> *>; }; -} +} // namespace de diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h index 9377fb0..03675b1 100644 --- a/include/framework/scheduling/Epoch.h +++ b/include/framework/scheduling/Epoch.h @@ -1,7 +1,7 @@ /* * include/framework/scheduling/Epoch.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -11,133 +11,120 @@ #include <condition_variable> #include <mutex> -#include "framework/structure/MutableBuffer.h" -#include "framework/structure/ExtensionStructure.h" #include "framework/structure/BufferView.h" +#include "framework/structure/ExtensionStructure.h" +#include "framework/structure/MutableBuffer.h" namespace de { - -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType, + LayoutPolicy L> class Epoch { private: - typedef MutableBuffer<R> Buffer; - typedef ExtensionStructure<R, S, Q, L> Structure; - typedef BufferView<R> BufView; -public: - Epoch(size_t number=0) - : m_buffer(nullptr) - , m_structure(nullptr) - , m_active_merge(false) - , m_epoch_number(number) - , m_buffer_head(0) - {} - - Epoch(size_t number, Structure *structure, Buffer *buff, size_t head) - : m_buffer(buff) - , m_structure(structure) - , m_active_merge(false) - , m_epoch_number(number) - , m_buffer_head(head) - { - structure->take_reference(); - } - - ~Epoch() { - if (m_structure) { - m_structure->release_reference(); - } - - if (m_structure->get_reference_count() == 0) { - delete m_structure; - } + typedef typename ShardType::RECORD RecordType; + typedef MutableBuffer<RecordType> Buffer; + typedef ExtensionStructure<ShardType, QueryType, L> Structure; + typedef BufferView<RecordType> BufView; +public: + Epoch(size_t number = 0) + : m_buffer(nullptr), m_structure(nullptr), m_active_merge(false), + m_epoch_number(number), m_buffer_head(0) {} + + Epoch(size_t number, Structure *structure, Buffer *buff, size_t head) + : m_buffer(buff), m_structure(structure), m_active_merge(false), + m_epoch_number(number), m_buffer_head(head) { + structure->take_reference(); + } + + ~Epoch() { + if (m_structure) { + m_structure->release_reference(); } - /* - * Epochs are *not* copyable or movable. Only one can exist, and all users - * of it work with pointers - */ - Epoch(const Epoch&) = delete; - Epoch(Epoch&&) = delete; - Epoch &operator=(const Epoch&) = delete; - Epoch &operator=(Epoch&&) = delete; - - size_t get_epoch_number() { - return m_epoch_number; + if (m_structure->get_reference_count() == 0) { + delete m_structure; } - - Structure *get_structure() { - return m_structure; + } + + /* + * Epochs are *not* copyable or movable. Only one can exist, and all users + * of it work with pointers + */ + Epoch(const Epoch &) = delete; + Epoch(Epoch &&) = delete; + Epoch &operator=(const Epoch &) = delete; + Epoch &operator=(Epoch &&) = delete; + + size_t get_epoch_number() { return m_epoch_number; } + + Structure *get_structure() { return m_structure; } + + BufView get_buffer() { return m_buffer->get_buffer_view(m_buffer_head); } + + /* + * Returns a new Epoch object that is a copy of this one. The new object + * will also contain a copy of the m_structure, rather than a reference to + * the same one. The epoch number of the new epoch will be set to the + * provided argument. + */ + Epoch *clone(size_t number) { + std::unique_lock<std::mutex> m_buffer_lock; + auto epoch = new Epoch(number); + epoch->m_buffer = m_buffer; + epoch->m_buffer_head = m_buffer_head; + + if (m_structure) { + epoch->m_structure = m_structure->copy(); + /* the copy routine returns a structure with 0 references */ + epoch->m_structure->take_reference(); } - BufView get_buffer() { - return m_buffer->get_buffer_view(m_buffer_head); + return epoch; + } + + /* + * Check if a merge can be started from this Epoch. At present, without + * concurrent merging, this simply checks if there is currently a scheduled + * merge based on this Epoch. If there is, returns false. If there isn't, + * return true and set a flag indicating that there is an active merge. + */ + bool prepare_reconstruction() { + auto old = m_active_merge.load(); + if (old) { + return false; } - /* - * Returns a new Epoch object that is a copy of this one. The new object - * will also contain a copy of the m_structure, rather than a reference to - * the same one. The epoch number of the new epoch will be set to the - * provided argument. - */ - Epoch *clone(size_t number) { - std::unique_lock<std::mutex> m_buffer_lock; - auto epoch = new Epoch(number); - epoch->m_buffer = m_buffer; - epoch->m_buffer_head = m_buffer_head; - - if (m_structure) { - epoch->m_structure = m_structure->copy(); - /* the copy routine returns a structure with 0 references */ - epoch->m_structure->take_reference(); - } - - return epoch; + // FIXME: this needs cleaned up + while (!m_active_merge.compare_exchange_strong(old, true)) { + old = m_active_merge.load(); + if (old) { + return false; + } } - /* - * Check if a merge can be started from this Epoch. At present, without - * concurrent merging, this simply checks if there is currently a scheduled - * merge based on this Epoch. If there is, returns false. If there isn't, - * return true and set a flag indicating that there is an active merge. - */ - bool prepare_reconstruction() { - auto old = m_active_merge.load(); - if (old) { - return false; - } - - // FIXME: this needs cleaned up - while (!m_active_merge.compare_exchange_strong(old, true)) { - old = m_active_merge.load(); - if (old) { - return false; - } - } - - return true; - } + return true; + } - bool advance_buffer_head(size_t head) { - m_buffer_head = head; - return m_buffer->advance_head(m_buffer_head); - } + bool advance_buffer_head(size_t head) { + m_buffer_head = head; + return m_buffer->advance_head(m_buffer_head); + } private: - Structure *m_structure; - Buffer *m_buffer; - - std::mutex m_buffer_lock; - std::atomic<bool> m_active_merge; - - /* - * The number of currently active jobs - * (queries/merges) operating on this - * epoch. An epoch can only be retired - * when this number is 0. - */ - size_t m_epoch_number; - size_t m_buffer_head; + Buffer *m_buffer; + Structure *m_structure; + + std::mutex m_buffer_lock; + std::atomic<bool> m_active_merge; + + /* + * The number of currently active jobs + * (queries/merges) operating on this + * epoch. An epoch can only be retired + * when this number is 0. + */ + size_t m_epoch_number; + size_t m_buffer_head; }; -} +} // namespace de diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h index 3ed4f49..7cb6d20 100644 --- a/include/framework/scheduling/FIFOScheduler.h +++ b/include/framework/scheduling/FIFOScheduler.h @@ -1,7 +1,7 @@ /* * include/framework/scheduling/FIFOScheduler.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -17,11 +17,11 @@ */ #pragma once -#include <thread> -#include <condition_variable> -#include <chrono> #include "framework/scheduling/Task.h" #include "framework/scheduling/statistics.h" +#include <chrono> +#include <condition_variable> +#include <thread> #include "ctpl/ctpl.h" #include "psu-ds/LockedPriorityQueue.h" @@ -30,100 +30,95 @@ namespace de { using namespace std::literals::chrono_literals; - class FIFOScheduler { private: - static const size_t DEFAULT_MAX_THREADS = 8; + static const size_t DEFAULT_MAX_THREADS = 8; public: - FIFOScheduler(size_t memory_budget, size_t thread_cnt) - : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX) - , m_thrd_cnt((thread_cnt) ? thread_cnt: DEFAULT_MAX_THREADS) - , m_used_memory(0) - , m_used_thrds(0) - , m_shutdown(false) - { - m_sched_thrd = std::thread(&FIFOScheduler::run, this); - m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this); - m_thrd_pool.resize(m_thrd_cnt); + FIFOScheduler(size_t memory_budget, size_t thread_cnt) + : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX), + m_thrd_cnt((thread_cnt) ? thread_cnt : DEFAULT_MAX_THREADS), + m_used_memory(0), m_used_thrds(0), m_shutdown(false) { + m_sched_thrd = std::thread(&FIFOScheduler::run, this); + m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this); + m_thrd_pool.resize(m_thrd_cnt); + } + + ~FIFOScheduler() { + if (!m_shutdown.load()) { + shutdown(); } - ~FIFOScheduler() { - if (!m_shutdown.load()) { - shutdown(); - } + m_sched_thrd.join(); + m_sched_wakeup_thrd.join(); + } - m_sched_thrd.join(); - m_sched_wakeup_thrd.join(); - } + void schedule_job(std::function<void(void *)> job, size_t size, void *args, + size_t type = 0) { + std::unique_lock<std::mutex> lk(m_cv_lock); + size_t ts = m_counter.fetch_add(1); - void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) { - std::unique_lock<std::mutex> lk(m_cv_lock); - size_t ts = m_counter.fetch_add(1); + m_stats.job_queued(ts, type, size); + m_task_queue.push(Task(size, ts, job, args, type, &m_stats)); - m_stats.job_queued(ts, type, size); - m_task_queue.push(Task(size, ts, job, args, type, &m_stats)); + m_cv.notify_all(); + } - m_cv.notify_all(); - } - - void shutdown() { - m_shutdown.store(true); - m_thrd_pool.stop(true); - m_cv.notify_all(); - } + void shutdown() { + m_shutdown.store(true); + m_thrd_pool.stop(true); + m_cv.notify_all(); + } - void print_statistics() { - m_stats.print_statistics(); - } + void print_statistics() { m_stats.print_statistics(); } private: - psudb::LockedPriorityQueue<Task> m_task_queue; + psudb::LockedPriorityQueue<Task> m_task_queue; - size_t m_memory_budget; - size_t m_thrd_cnt; + [[maybe_unused]] size_t m_memory_budget; + size_t m_thrd_cnt; - std::atomic<bool> m_shutdown; - std::atomic<size_t> m_counter; - std::mutex m_cv_lock; - std::condition_variable m_cv; + std::atomic<size_t> m_counter; + std::mutex m_cv_lock; + std::condition_variable m_cv; - std::thread m_sched_thrd; - std::thread m_sched_wakeup_thrd; - ctpl::thread_pool m_thrd_pool; + std::thread m_sched_thrd; + std::thread m_sched_wakeup_thrd; + ctpl::thread_pool m_thrd_pool; - std::atomic<size_t> m_used_thrds; - std::atomic<size_t> m_used_memory; + std::atomic<size_t> m_used_memory; + std::atomic<size_t> m_used_thrds; - SchedulerStatistics m_stats; + std::atomic<bool> m_shutdown; - void periodic_wakeup() { - do { - std::this_thread::sleep_for(10us); - m_cv.notify_all(); - } while (!m_shutdown.load()); - } + SchedulerStatistics m_stats; - void schedule_next() { - assert(m_task_queue.size() > 0); - auto t = m_task_queue.pop(); - m_stats.job_scheduled(t.m_timestamp); + void periodic_wakeup() { + do { + std::this_thread::sleep_for(10us); + m_cv.notify_all(); + } while (!m_shutdown.load()); + } - m_thrd_pool.push(t); - } + void schedule_next() { + assert(m_task_queue.size() > 0); + auto t = m_task_queue.pop(); + m_stats.job_scheduled(t.m_timestamp); - void run() { - do { - std::unique_lock<std::mutex> cv_lock(m_cv_lock); - m_cv.wait(cv_lock); + m_thrd_pool.push(t); + } - while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) { - schedule_next(); - } - } while(!m_shutdown.load()); - } + void run() { + do { + std::unique_lock<std::mutex> cv_lock(m_cv_lock); + m_cv.wait(cv_lock); + while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) { + schedule_next(); + } + } while (!m_shutdown.load()); + } }; -} +} // namespace de diff --git a/include/framework/scheduling/SerialScheduler.h b/include/framework/scheduling/SerialScheduler.h index ac59301..7cd9cfc 100644 --- a/include/framework/scheduling/SerialScheduler.h +++ b/include/framework/scheduling/SerialScheduler.h @@ -1,13 +1,13 @@ /* * include/framework/scheduling/SerialScheduler.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * * IMPORTANT: This "scheduler" is a shim implementation for allowing - * strictly serial, single-threaded operation of the framework. It should - * never be used in multi-threaded contexts. A call to the schedule_job + * strictly serial, single-threaded operation of the framework. It should + * never be used in multi-threaded contexts. A call to the schedule_job * function will immediately run the job and block on its completion before * returning. * @@ -21,42 +21,36 @@ namespace de { class SerialScheduler { public: - SerialScheduler(size_t memory_budget, size_t thread_cnt) - : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX) - , m_thrd_cnt((thread_cnt) ? thread_cnt: UINT64_MAX) - , m_used_memory(0) - , m_used_thrds(0) - , m_counter(0) - {} - - ~SerialScheduler() = default; - - void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) { - size_t ts = m_counter++; - m_stats.job_queued(ts, type, size); - m_stats.job_scheduled(ts); - auto t = Task(size, ts, job, args, type, &m_stats); - t(0); - } - - void shutdown() { - /* intentionally left blank */ - } - - void print_statistics() { - m_stats.print_statistics(); - } + SerialScheduler(size_t memory_budget, size_t thread_cnt) + : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX), + m_thrd_cnt((thread_cnt) ? thread_cnt : UINT64_MAX), m_used_memory(0), + m_used_thrds(0), m_counter(0) {} + + ~SerialScheduler() = default; + + void schedule_job(std::function<void(void *)> job, size_t size, void *args, + size_t type = 0) { + size_t ts = m_counter++; + m_stats.job_queued(ts, type, size); + m_stats.job_scheduled(ts); + auto t = Task(size, ts, job, args, type, &m_stats); + t(0); + } + + void shutdown() { /* intentionally left blank */ } + + void print_statistics() { m_stats.print_statistics(); } private: - size_t m_memory_budget; - size_t m_thrd_cnt; + [[maybe_unused]] size_t m_memory_budget; + [[maybe_unused]] size_t m_thrd_cnt; - size_t m_used_thrds; - size_t m_used_memory; + [[maybe_unused]] size_t m_used_memory; + [[maybe_unused]] size_t m_used_thrds; - size_t m_counter; + size_t m_counter; - SchedulerStatistics m_stats; + SchedulerStatistics m_stats; }; -} +} // namespace de diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index bd53090..6b6f040 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -1,7 +1,7 @@ /* * include/framework/scheduling/Task.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -13,77 +13,76 @@ */ #pragma once -#include <future> -#include <functional> #include <chrono> +#include <functional> +#include <future> -#include "framework/util/Configuration.h" #include "framework/scheduling/Epoch.h" #include "framework/scheduling/statistics.h" +#include "framework/util/Configuration.h" namespace de { -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType, + LayoutPolicy L> struct ReconstructionArgs { - Epoch<R, S, Q, L> *epoch; - ReconstructionVector merges; - std::promise<bool> result; - bool compaction; - void *extension; + typedef typename ShardType::RECORD RecordType; + Epoch<ShardType, QueryType, L> *epoch; + ReconstructionVector merges; + std::promise<bool> result; + bool compaction; + void *extension; }; -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> -struct QueryArgs { - std::promise<std::vector<R>> result_set; - void *query_parms; - void *extension; +template <ShardInterface S, QueryInterface<S> Q, typename DE> struct QueryArgs { + std::promise<std::vector<typename Q::ResultType>> result_set; + typename Q::Parameters query_parms; + DE *extension; }; -typedef std::function<void(void*)> Job; +typedef std::function<void(void *)> Job; struct Task { - Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr) - : m_job(job) - , m_size(size) - , m_timestamp(ts) - , m_args(args) - , m_type(type) - , m_stats(stats) - {} + Task(size_t size, size_t ts, Job job, void *args, size_t type = 0, + SchedulerStatistics *stats = nullptr) + : m_job(job), m_size(size), m_timestamp(ts), m_args(args), m_type(type), + m_stats(stats) {} - Job m_job; - size_t m_size; - size_t m_timestamp; - void *m_args; - size_t m_type; - SchedulerStatistics *m_stats; + Job m_job; + size_t m_size; + size_t m_timestamp; + void *m_args; + size_t m_type; + SchedulerStatistics *m_stats; - friend bool operator<(const Task &self, const Task &other) { - return self.m_timestamp < other.m_timestamp; - } + friend bool operator<(const Task &self, const Task &other) { + return self.m_timestamp < other.m_timestamp; + } - friend bool operator>(const Task &self, const Task &other) { - return self.m_timestamp > other.m_timestamp; - } + friend bool operator>(const Task &self, const Task &other) { + return self.m_timestamp > other.m_timestamp; + } - void operator()(size_t thrd_id) { - auto start = std::chrono::high_resolution_clock::now(); - if (m_stats) { - m_stats->job_begin(m_timestamp); - } + void operator()(size_t thrd_id) { + auto start = std::chrono::high_resolution_clock::now(); + if (m_stats) { + m_stats->job_begin(m_timestamp); + } - m_job(m_args); + m_job(m_args); - if (m_stats) { - m_stats->job_complete(m_timestamp); - } - auto stop = std::chrono::high_resolution_clock::now(); + if (m_stats) { + m_stats->job_complete(m_timestamp); + } + auto stop = std::chrono::high_resolution_clock::now(); - if (m_stats) { - auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); - m_stats->log_time_data(time, m_type); - } + if (m_stats) { + auto time = + std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start) + .count(); + m_stats->log_time_data(time, m_type); } + } }; -} +} // namespace de diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h index 6c479cd..48c186f 100644 --- a/include/framework/scheduling/statistics.h +++ b/include/framework/scheduling/statistics.h @@ -1,7 +1,7 @@ /* * include/framework/scheduling/statistics.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -13,106 +13,94 @@ */ #pragma once -#include <cstdlib> +#include <atomic> #include <cassert> +#include <chrono> +#include <cstdlib> +#include <mutex> #include <unordered_map> #include <vector> -#include <mutex> -#include <chrono> -#include <atomic> namespace de { class SchedulerStatistics { private: - enum class EventType { - QUEUED, - SCHEDULED, - STARTED, - FINISHED - }; + enum class EventType { QUEUED, SCHEDULED, STARTED, FINISHED }; - struct Event { - size_t id; - EventType type; - }; - - struct JobInfo { - size_t id; - size_t size; - size_t type; - }; + struct Event { + size_t id; + EventType type; + }; + struct JobInfo { + size_t id; + size_t size; + size_t type; + }; public: - SchedulerStatistics() = default; - ~SchedulerStatistics() = default; + SchedulerStatistics() = default; + ~SchedulerStatistics() = default; - void job_queued(size_t id, size_t type, size_t size) { - auto time = std::chrono::high_resolution_clock::now(); - } + void job_queued(size_t id, size_t type, size_t size) { } - void job_scheduled(size_t id) { - std::unique_lock<std::mutex> lk(m_mutex); + void job_scheduled(size_t id) { std::unique_lock<std::mutex> lk(m_mutex); } - } + void job_begin(size_t id) {} - void job_begin(size_t id) { + void job_complete(size_t id) {} - } + /* FIXME: This is just a temporary approach */ + void log_time_data(size_t length, size_t type) { + assert(type == 1 || type == 2); - void job_complete(size_t id) { + if (type == 1) { + m_type_1_cnt.fetch_add(1); + m_type_1_total_time.fetch_add(length); - } + if (length > m_type_1_largest_time) { + m_type_1_largest_time.store(length); + } + } else { + m_type_2_cnt.fetch_add(1); + m_type_2_total_time.fetch_add(length); - /* FIXME: This is just a temporary approach */ - void log_time_data(size_t length, size_t type) { - assert(type == 1 || type == 2); - - if (type == 1) { - m_type_1_cnt.fetch_add(1); - m_type_1_total_time.fetch_add(length); - - if (length > m_type_1_largest_time) { - m_type_1_largest_time.store(length); - } - } else { - m_type_2_cnt.fetch_add(1); - m_type_2_total_time.fetch_add(length); - - if (length > m_type_2_largest_time) { - m_type_2_largest_time.store(length); - } - } + if (length > m_type_2_largest_time) { + m_type_2_largest_time.store(length); + } } - - void print_statistics() { - if (m_type_1_cnt > 0) { - fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", - m_type_1_cnt.load(), - m_type_1_total_time.load() / m_type_1_cnt.load(), - m_type_1_largest_time.load()); - } - if (m_type_2_cnt > 0) { - fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n", - m_type_2_cnt.load(), - m_type_2_total_time.load() / m_type_2_cnt.load(), - m_type_2_largest_time.load()); - } + } + + void print_statistics() { + if (m_type_1_cnt > 0) { + fprintf( + stdout, + "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", + m_type_1_cnt.load(), m_type_1_total_time.load() / m_type_1_cnt.load(), + m_type_1_largest_time.load()); + } + if (m_type_2_cnt > 0) { + fprintf(stdout, + "Reconstruction Count: %ld\tReconstruction Avg. Latency: " + "%ld\tMax Recon. Latency:%ld\n", + m_type_2_cnt.load(), + m_type_2_total_time.load() / m_type_2_cnt.load(), + m_type_2_largest_time.load()); } + } private: - std::mutex m_mutex; - std::unordered_map<size_t, JobInfo> m_jobs; - std::vector<Event> m_event_log; + std::mutex m_mutex; + std::unordered_map<size_t, JobInfo> m_jobs; + std::vector<Event> m_event_log; - std::atomic<size_t> m_type_1_cnt; - std::atomic<size_t> m_type_1_total_time; + std::atomic<size_t> m_type_1_cnt; + std::atomic<size_t> m_type_1_total_time; - std::atomic<size_t> m_type_2_cnt; - std::atomic<size_t> m_type_2_total_time; + std::atomic<size_t> m_type_2_cnt; + std::atomic<size_t> m_type_2_total_time; - std::atomic<size_t> m_type_1_largest_time; - std::atomic<size_t> m_type_2_largest_time; + std::atomic<size_t> m_type_1_largest_time; + std::atomic<size_t> m_type_2_largest_time; }; -} +} // namespace de diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h index e95a799..acf1201 100644 --- a/include/framework/structure/BufferView.h +++ b/include/framework/structure/BufferView.h @@ -1,7 +1,7 @@ /* * include/framework/structure/BufferView.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -9,166 +9,150 @@ */ #pragma once -#include <cstdlib> #include <cassert> +#include <cstdlib> #include <functional> #include <utility> -#include "psu-util/alignment.h" -#include "psu-ds/BloomFilter.h" #include "framework/interface/Record.h" +#include "psu-ds/BloomFilter.h" +#include "psu-util/alignment.h" namespace de { -typedef std::function<void(void)> ReleaseFunction; +typedef std::function<void(void)> ReleaseFunction; -template <RecordInterface R> -class BufferView { +template <RecordInterface R> class BufferView { public: - BufferView() = default; - - /* - * the BufferView's lifetime is tightly linked to buffer versioning, and so - * copying and assignment are disabled. - */ - BufferView(const BufferView&) = delete; - BufferView &operator=(BufferView &) = delete; - - BufferView(BufferView &&other) - : m_data(std::exchange(other.m_data, nullptr)) - , m_release(std::move(other.m_release)) - , m_head(std::exchange(other.m_head, 0)) - , m_tail(std::exchange(other.m_tail, 0)) - , m_start(std::exchange(other.m_start, 0)) - , m_stop(std::exchange(other.m_stop, 0)) - , m_cap(std::exchange(other.m_cap, 0)) - , m_approx_ts_cnt(std::exchange(other.m_approx_ts_cnt, 0)) - , m_tombstone_filter(std::exchange(other.m_tombstone_filter, nullptr)) - , m_active(std::exchange(other.m_active, false)) {} - - BufferView &operator=(BufferView &&other) = delete; - - - BufferView(Wrapped<R> *buffer, size_t cap, size_t head, size_t tail, size_t tombstone_cnt, psudb::BloomFilter<R> *filter, - ReleaseFunction release) - : m_data(buffer) - , m_release(release) - , m_head(head) - , m_tail(tail) - , m_start(m_head % cap) - , m_stop(m_tail % cap) - , m_cap(cap) - , m_approx_ts_cnt(tombstone_cnt) - , m_tombstone_filter(filter) - , m_active(true) {} - - ~BufferView() { - if (m_active) { - m_release(); - } + BufferView() = default; + + /* + * the BufferView's lifetime is tightly linked to buffer versioning, so + * copying and assignment are disabled. + */ + BufferView(const BufferView &) = delete; + BufferView &operator=(BufferView &) = delete; + + BufferView(BufferView &&other) + : m_data(std::exchange(other.m_data, nullptr)), + m_release(std::move(other.m_release)), + m_head(std::exchange(other.m_head, 0)), + m_tail(std::exchange(other.m_tail, 0)), + m_start(std::exchange(other.m_start, 0)), + m_stop(std::exchange(other.m_stop, 0)), + m_cap(std::exchange(other.m_cap, 0)), + m_approx_ts_cnt(std::exchange(other.m_approx_ts_cnt, 0)), + m_tombstone_filter(std::exchange(other.m_tombstone_filter, nullptr)), + m_active(std::exchange(other.m_active, false)) {} + + BufferView &operator=(BufferView &&other) = delete; + + BufferView(Wrapped<R> *buffer, size_t cap, size_t head, size_t tail, + size_t tombstone_cnt, psudb::BloomFilter<R> *filter, + ReleaseFunction release) + : m_data(buffer), m_release(release), m_head(head), m_tail(tail), + m_start(m_head % cap), m_stop(m_tail % cap), m_cap(cap), + m_approx_ts_cnt(tombstone_cnt), m_tombstone_filter(filter), + m_active(true) {} + + ~BufferView() { + if (m_active) { + m_release(); } + } - bool check_tombstone(const R& rec) { - if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) return false; - - for (size_t i=0; i<get_record_count(); i++) { - if (m_data[to_idx(i)].rec == rec && m_data[to_idx(i)].is_tombstone()) { - return true; - } - } + bool check_tombstone(const R &rec) { + if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) + return false; - return false; + for (size_t i = 0; i < get_record_count(); i++) { + if (m_data[to_idx(i)].rec == rec && m_data[to_idx(i)].is_tombstone()) { + return true; + } } - bool delete_record(const R& rec) { - if (m_start < m_stop) { - for (size_t i=m_start; i<m_stop; i++) { - if (m_data[i].rec == rec) { - m_data[i].set_delete(); - return true; - } - } - } else { - for (size_t i=m_start; i<m_cap; i++) { - if (m_data[i].rec == rec) { - m_data[i].set_delete(); - return true; - } - } - - for (size_t i=0; i<m_stop; i++) { - if (m_data[i].rec == rec) { - m_data[i].set_delete(); - return true; - } - - } + return false; + } + bool delete_record(const R &rec) { + if (m_start < m_stop) { + for (size_t i = m_start; i < m_stop; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; } + } + } else { + for (size_t i = m_start; i < m_cap; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; + } + } - return false; - } - - size_t get_record_count() { - return m_tail - m_head; - } - - size_t get_capacity() { - return m_cap; - } - - /* - * NOTE: This function returns an upper bound on the number - * of tombstones within the view. There may be less than - * this, due to synchronization issues during view creation. - */ - size_t get_tombstone_count() { - return m_approx_ts_cnt; - } - - Wrapped<R> *get(size_t i) { - //assert(i < get_record_count()); - return m_data + to_idx(i); - } - - void copy_to_buffer(psudb::byte *buffer) { - /* check if the region to be copied circles back to start. If so, do it in two steps */ - if (m_start > m_stop) { - size_t split_idx = m_cap - m_start; - - memcpy(buffer, (std::byte*) (m_data + m_start), split_idx* sizeof(Wrapped<R>)); - memcpy(buffer + (split_idx * sizeof(Wrapped<R>)), (std::byte*) m_data, m_stop * sizeof(Wrapped<R>)); - } else { - memcpy(buffer, (std::byte*) (m_data + m_start), get_record_count() * sizeof(Wrapped<R>)); + for (size_t i = 0; i < m_stop; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; } + } } - size_t get_tail() { - return m_tail; + return false; + } + + size_t get_record_count() { return m_tail - m_head; } + + size_t get_capacity() { return m_cap; } + + /* + * NOTE: This function returns an upper bound on the number + * of tombstones within the view. There may be less than + * this, due to synchronization issues during view creation. + */ + size_t get_tombstone_count() { return m_approx_ts_cnt; } + + Wrapped<R> *get(size_t i) { + return m_data + to_idx(i); + } + + void copy_to_buffer(psudb::byte *buffer) { + /* check if the region to be copied circles back to start. If so, do it in + * two steps */ + if (m_start > m_stop) { + size_t split_idx = m_cap - m_start; + + memcpy(buffer, (std::byte *)(m_data + m_start), + split_idx * sizeof(Wrapped<R>)); + memcpy(buffer + (split_idx * sizeof(Wrapped<R>)), (std::byte *)m_data, + m_stop * sizeof(Wrapped<R>)); + } else { + memcpy(buffer, (std::byte *)(m_data + m_start), + get_record_count() * sizeof(Wrapped<R>)); } + } - size_t get_head() { - return m_head; - } + size_t get_tail() { return m_tail; } + + size_t get_head() { return m_head; } private: - Wrapped<R>* m_data; - ReleaseFunction m_release; - size_t m_head; - size_t m_tail; - size_t m_start; - size_t m_stop; - size_t m_cap; - size_t m_approx_ts_cnt; - psudb::BloomFilter<R> *m_tombstone_filter; - bool m_active; - - size_t to_idx(size_t i) { - size_t idx = (m_start + i >= m_cap) ? i - (m_cap - m_start) - : m_start + i; - assert(idx < m_cap); - return idx; - } + Wrapped<R> *m_data; + ReleaseFunction m_release; + size_t m_head; + size_t m_tail; + size_t m_start; + size_t m_stop; + size_t m_cap; + size_t m_approx_ts_cnt; + psudb::BloomFilter<R> *m_tombstone_filter; + bool m_active; + + size_t to_idx(size_t i) { + size_t idx = (m_start + i >= m_cap) ? i - (m_cap - m_start) : m_start + i; + assert(idx < m_cap); + return idx; + } }; -} +} // namespace de diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index b83674b..2728246 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -1,8 +1,8 @@ /* * include/framework/structure/ExtensionStructure.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -22,622 +22,660 @@ namespace de { -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType, + LayoutPolicy L = LayoutPolicy::TEIRING> class ExtensionStructure { - typedef S Shard; - typedef BufferView<R> BuffView; + typedef typename ShardType::RECORD RecordType; + typedef BufferView<RecordType> BuffView; - typedef struct { - size_t reccnt; - size_t reccap; + typedef struct { + size_t reccnt; + size_t reccap; - size_t shardcnt; - size_t shardcap; - } level_state; + size_t shardcnt; + size_t shardcap; + } level_state; - typedef std::vector<level_state> state_vector; + typedef std::vector<level_state> state_vector; public: - ExtensionStructure(size_t buffer_size, size_t scale_factor, double max_delete_prop) - : m_scale_factor(scale_factor) - , m_max_delete_prop(max_delete_prop) - , m_buffer_size(buffer_size) - {} - - ~ExtensionStructure() = default; - - /* - * Create a shallow copy of this extension structure. The copy will share - * references to the same levels/shards as the original, but will have its - * own lists. As all of the shards are immutable (with the exception of - * deletes), the copy can be restructured with reconstructions and flushes - * without affecting the original. The copied structure will be returned - * with a reference count of 0; generally you will want to immediately call - * take_reference() on it. - * - * NOTE: When using tagged deletes, a delete of a record in the original - * structure will affect the copy, so long as the copy retains a reference - * to the same shard as the original. This could cause synchronization - * problems under tagging with concurrency. Any deletes in this context will - * need to be forwarded to the appropriate structures manually. - */ - ExtensionStructure<R, S, Q, L> *copy() { - auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, - m_max_delete_prop); - for (size_t i=0; i<m_levels.size(); i++) { - new_struct->m_levels.push_back(m_levels[i]->clone()); - } - - new_struct->m_refcnt = 0; - new_struct->m_current_state = m_current_state; + ExtensionStructure(size_t buffer_size, size_t scale_factor, + double max_delete_prop) + : m_scale_factor(scale_factor), m_max_delete_prop(max_delete_prop), + m_buffer_size(buffer_size) {} + + ~ExtensionStructure() = default; + + /* + * Create a shallow copy of this extension structure. The copy will share + * references to the same levels/shards as the original, but will have its + * own lists. As all of the shards are immutable (with the exception of + * deletes), the copy can be restructured with reconstructions and flushes + * without affecting the original. The copied structure will be returned + * with a reference count of 0; generally you will want to immediately call + * take_reference() on it. + * + * NOTE: When using tagged deletes, a delete of a record in the original + * structure will affect the copy, so long as the copy retains a reference + * to the same shard as the original. This could cause synchronization + * problems under tagging with concurrency. Any deletes in this context will + * need to be forwarded to the appropriate structures manually. + */ + ExtensionStructure<ShardType, QueryType, L> *copy() { + auto new_struct = new ExtensionStructure<ShardType, QueryType, L>( + m_buffer_size, m_scale_factor, m_max_delete_prop); + for (size_t i = 0; i < m_levels.size(); i++) { + new_struct->m_levels.push_back(m_levels[i]->clone()); + } - return new_struct; + new_struct->m_refcnt = 0; + new_struct->m_current_state = m_current_state; + + return new_struct; + } + + /* + * Search for a record matching the argument and mark it deleted by + * setting the delete bit in its wrapped header. Returns 1 if a matching + * record was found and deleted, and 0 if a matching record was not found. + * + * This function will stop after finding the first matching record. It is + * assumed that no duplicate records exist. In the case of duplicates, this + * function will still "work", but in the sense of "delete first match". + */ + int tagged_delete(const RecordType &rec) { + for (auto level : m_levels) { + if (level && level->delete_record(rec)) { + return 1; + } } /* - * Search for a record matching the argument and mark it deleted by - * setting the delete bit in its wrapped header. Returns 1 if a matching - * record was found and deleted, and 0 if a matching record was not found. - * - * This function will stop after finding the first matching record. It is - * assumed that no duplicate records exist. In the case of duplicates, this - * function will still "work", but in the sense of "delete first match". + * If the record to be erased wasn't found, return 0. The + * DynamicExtension itself will then search the active + * Buffers. */ - int tagged_delete(const R &rec) { - for (auto level : m_levels) { - if (level && level->delete_record(rec)) { - return 1; - } - } - - /* - * If the record to be erased wasn't found, return 0. The - * DynamicExtension itself will then search the active - * Buffers. - */ - return 0; + return 0; + } + + /* + * Flush a buffer into the extension structure, performing any necessary + * reconstructions to free up room in L0. + * + * FIXME: arguably, this should be a method attached to the buffer that + * takes a structure as input. + */ + inline bool flush_buffer(BuffView buffer) { + state_vector tmp = m_current_state; + + if (tmp.size() == 0) { + grow(tmp); } - /* - * Flush a buffer into the extension structure, performing any necessary - * reconstructions to free up room in L0. - * - * FIXME: arguably, this should be a method attached to the buffer that - * takes a structure as input. - */ - inline bool flush_buffer(BuffView buffer) { - state_vector tmp = m_current_state; + assert(can_reconstruct_with(0, buffer.get_record_count(), tmp)); + flush_buffer_into_l0(std::move(buffer)); - if (tmp.size() == 0) { - grow(tmp); - } + return true; + } - assert(can_reconstruct_with(0, buffer.get_record_count(), tmp)); - flush_buffer_into_l0(std::move(buffer)); + /* + * Return the total number of records (including tombstones) within all + * of the levels of the structure. + */ + size_t get_record_count() { + size_t cnt = 0; - return true; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) + cnt += m_levels[i]->get_record_count(); } - /* - * Return the total number of records (including tombstones) within all - * of the levels of the structure. - */ - size_t get_record_count() { - size_t cnt = 0; + return cnt; + } - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_record_count(); - } + /* + * Return the total number of tombstones contained within all of the + * levels of the structure. + */ + size_t get_tombstone_count() { + size_t cnt = 0; - return cnt; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) + cnt += m_levels[i]->get_tombstone_count(); } - /* - * Return the total number of tombstones contained within all of the - * levels of the structure. - */ - size_t get_tombstone_count() { - size_t cnt = 0; - - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_tombstone_count(); - } - - return cnt; + return cnt; + } + + /* + * Return the number of levels within the structure. Note that not + * all of these levels are necessarily populated. + */ + size_t get_height() { return m_levels.size(); } + + /* + * Return the amount of memory (in bytes) used by the shards within the + * structure for storing the primary data structure and raw data. + */ + size_t get_memory_usage() { + size_t cnt = 0; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) + cnt += m_levels[i]->get_memory_usage(); } - /* - * Return the number of levels within the structure. Note that not - * all of these levels are necessarily populated. - */ - size_t get_height() { - return m_levels.size(); + return cnt; + } + + /* + * Return the amount of memory (in bytes) used by the shards within the + * structure for storing auxiliary data structures. This total does not + * include memory used for the main data structure, or raw data. + */ + size_t get_aux_memory_usage() { + size_t cnt = 0; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) { + cnt += m_levels[i]->get_aux_memory_usage(); + } } - /* - * Return the amount of memory (in bytes) used by the shards within the - * structure for storing the primary data structure and raw data. - */ - size_t get_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_memory_usage(); + return cnt; + } + + /* + * Validate that no level in the structure exceeds its maximum tombstone + * capacity. This is used to trigger preemptive compactions at the end of + * the reconstruction process. + */ + bool validate_tombstone_proportion() { + long double ts_prop; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) { + ts_prop = (long double)m_levels[i]->get_tombstone_count() / + (long double)calc_level_record_capacity(i); + if (ts_prop > (long double)m_max_delete_prop) { + return false; } - - return cnt; + } } - /* - * Return the amount of memory (in bytes) used by the shards within the - * structure for storing auxiliary data structures. This total does not - * include memory used for the main data structure, or raw data. + return true; + } + + bool validate_tombstone_proportion(level_index level) { + long double ts_prop = (long double)m_levels[level]->get_tombstone_count() / + (long double)calc_level_record_capacity(level); + return ts_prop <= (long double)m_max_delete_prop; + } + + /* + * Return a reference to the underlying vector of levels within the + * structure. + */ + std::vector<std::shared_ptr<InternalLevel<ShardType, QueryType>>> & + get_levels() { + return m_levels; + } + + /* + * NOTE: This cannot be simulated, because tombstone cancellation is not + * cheaply predictable. It is possible that the worst case number could + * be used instead, to allow for prediction, but compaction isn't a + * major concern outside of sampling; at least for now. So I'm not + * going to focus too much time on it at the moment. + */ + ReconstructionVector get_compaction_tasks() { + ReconstructionVector tasks; + state_vector scratch_state = m_current_state; + + /* if the tombstone/delete invariant is satisfied, no need for compactions */ - size_t get_aux_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) { - cnt += m_levels[i]->get_aux_memory_usage(); - } - } - - return cnt; + if (validate_tombstone_proportion()) { + return tasks; } - /* - * Validate that no level in the structure exceeds its maximum tombstone - * capacity. This is used to trigger preemptive compactions at the end of - * the reconstruction process. - */ - bool validate_tombstone_proportion() { - long double ts_prop; - for (size_t i = 0; i < m_levels.size(); i++) { - if (m_levels[i]) { - ts_prop = (long double)m_levels[i]->get_tombstone_count() / - (long double)calc_level_record_capacity(i); - if (ts_prop > (long double)m_max_delete_prop) { - return false; - } - } + /* locate the first level to violate the invariant */ + level_index violation_idx = -1; + for (level_index i = 0; i < m_levels.size(); i++) { + if (!validate_tombstone_proportion(i)) { + violation_idx = i; + break; } - - return true; } - bool validate_tombstone_proportion(level_index level) { - long double ts_prop = (long double) m_levels[level]->get_tombstone_count() / (long double) calc_level_record_capacity(level); - return ts_prop <= (long double) m_max_delete_prop; - } + assert(violation_idx != -1); - /* - * Return a reference to the underlying vector of levels within the - * structure. - */ - std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> &get_levels() { - return m_levels; + level_index base_level = + find_reconstruction_target(violation_idx, scratch_state); + if (base_level == -1) { + base_level = grow(scratch_state); } - /* - * NOTE: This cannot be simulated, because tombstone cancellation is not - * cheaply predictable. It is possible that the worst case number could - * be used instead, to allow for prediction, but compaction isn't a - * major concern outside of sampling; at least for now. So I'm not - * going to focus too much time on it at the moment. - */ - ReconstructionVector get_compaction_tasks() { - ReconstructionVector tasks; - state_vector scratch_state = m_current_state; - - /* if the tombstone/delete invariant is satisfied, no need for compactions */ - if (validate_tombstone_proportion()) { - return tasks; - } - - /* locate the first level to violate the invariant */ - level_index violation_idx = -1; - for (level_index i=0; i<m_levels.size(); i++) { - if (!validate_tombstone_proportion(i)) { - violation_idx = i; - break; - } - } - - assert(violation_idx != -1); - - level_index base_level = find_reconstruction_target(violation_idx, scratch_state); - if (base_level == -1) { - base_level = grow(scratch_state); - } - - for (level_index i=base_level; i>0; i--) { - /* - * The amount of storage required for the reconstruction accounts - * for the cost of storing the new records, along with the - * cost of retaining the old records during the process - * (hence the 2x multiplier). - * - * FIXME: currently does not account for the *actual* size - * of the shards, only the storage for the records - * themselves. - */ - size_t reccnt = m_levels[i - 1]->get_record_count(); - if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, reccnt, scratch_state)) { - reccnt += m_levels[i]->get_record_count(); - } - } - tasks.add_reconstruction(i-i, i, reccnt); + for (level_index i = base_level; i > 0; i--) { + /* + * The amount of storage required for the reconstruction accounts + * for the cost of storing the new records, along with the + * cost of retaining the old records during the process + * (hence the 2x multiplier). + * + * FIXME: currently does not account for the *actual* size + * of the shards, only the storage for the records + * themselves. + */ + size_t reccnt = m_levels[i - 1]->get_record_count(); + if constexpr (L == LayoutPolicy::LEVELING) { + if (can_reconstruct_with(i, reccnt, scratch_state)) { + reccnt += m_levels[i]->get_record_count(); } - - return tasks; + } + tasks.add_reconstruction(i - i, i, reccnt); } + return tasks; + } + + /* + * + */ + ReconstructionVector + get_reconstruction_tasks(size_t buffer_reccnt, + state_vector scratch_state = {}) { /* - * + * If no scratch state vector is provided, use a copy of the + * current one. The only time an empty vector could be used as + * *real* input to this function is when the current state is also + * empty, so this should would even in that case. */ - ReconstructionVector get_reconstruction_tasks(size_t buffer_reccnt, - state_vector scratch_state={}) { - /* - * If no scratch state vector is provided, use a copy of the - * current one. The only time an empty vector could be used as - * *real* input to this function is when the current state is also - * empty, so this should would even in that case. - */ - if (scratch_state.size() == 0) { - scratch_state = m_current_state; - } - - ReconstructionVector reconstructions; - size_t LOOKAHEAD = 1; - for (size_t i=0; i<LOOKAHEAD; i++) { - /* - * If L0 cannot support a direct buffer flush, figure out what - * work must be done to free up space first. Otherwise, the - * reconstruction vector will be initially empty. - */ - if (!can_reconstruct_with(0, buffer_reccnt, scratch_state)) { - auto local_recon = get_reconstruction_tasks_from_level(0, scratch_state); - - /* - * for the first iteration, we need to do all of the - * reconstructions, so use these to initially the returned - * reconstruction list - */ - if (i == 0) { - reconstructions = local_recon; - /* - * Quick sanity test of idea: if the next reconstruction - * would be larger than this one, steal the largest - * task from it and run it now instead. - */ - } else if (local_recon.get_total_reccnt() > reconstructions.get_total_reccnt()) { - auto t = local_recon.remove_reconstruction(0); - reconstructions.add_reconstruction(t); - } - } - - /* simulate the buffer flush in the scratch state */ - scratch_state[0].reccnt += buffer_reccnt; - if (L == LayoutPolicy::TEIRING || scratch_state[0].shardcnt == 0) { - scratch_state[0].shardcnt += 1; - } - - } - - return std::move(reconstructions); + if (scratch_state.size() == 0) { + scratch_state = m_current_state; } - - /* - * - */ - ReconstructionVector get_reconstruction_tasks_from_level(level_index source_level, state_vector &scratch_state) { - ReconstructionVector reconstructions; + ReconstructionVector reconstructions; + size_t LOOKAHEAD = 1; + for (size_t i = 0; i < LOOKAHEAD; i++) { + /* + * If L0 cannot support a direct buffer flush, figure out what + * work must be done to free up space first. Otherwise, the + * reconstruction vector will be initially empty. + */ + if (!can_reconstruct_with(0, buffer_reccnt, scratch_state)) { + auto local_recon = + get_reconstruction_tasks_from_level(0, scratch_state); /* - * Find the first level capable of sustaining a reconstruction from - * the level above it. If no such level exists, add a new one at - * the bottom of the structure. + * for the first iteration, we need to do all of the + * reconstructions, so use these to initially the returned + * reconstruction list */ - level_index base_level = find_reconstruction_target(source_level, scratch_state); - if (base_level == -1) { - base_level = grow(scratch_state); + if (i == 0) { + reconstructions = local_recon; + /* + * Quick sanity test of idea: if the next reconstruction + * would be larger than this one, steal the largest + * task from it and run it now instead. + */ + } else if (local_recon.get_total_reccnt() > + reconstructions.get_total_reccnt()) { + auto t = local_recon.remove_reconstruction(0); + reconstructions.add_reconstruction(t); } + } - if constexpr (L == LayoutPolicy::BSM) { - if (base_level == 0) { - return std::move(reconstructions); - } - - ReconstructionTask task; - task.target = base_level; - - size_t base_reccnt = 0; - for (level_index i=base_level; i>source_level; i--) { - auto recon_reccnt = scratch_state[i-1].reccnt; - base_reccnt += recon_reccnt; - scratch_state[i-1].reccnt = 0; - scratch_state[i-1].shardcnt = 0; - task.add_source(i-1, recon_reccnt); - } + /* simulate the buffer flush in the scratch state */ + scratch_state[0].reccnt += buffer_reccnt; + if (L == LayoutPolicy::TEIRING || scratch_state[0].shardcnt == 0) { + scratch_state[0].shardcnt += 1; + } + } - reconstructions.add_reconstruction(task); - scratch_state[base_level].reccnt = base_reccnt; - scratch_state[base_level].shardcnt = 1; + return reconstructions; + } - return std::move(reconstructions); - } + /* + * + */ + ReconstructionVector + get_reconstruction_tasks_from_level(level_index source_level, + state_vector &scratch_state) { + ReconstructionVector reconstructions; - /* - * Determine the full set of reconstructions necessary to open up - * space in the source level. - */ - for (level_index i=base_level; i>source_level; i--) { - size_t recon_reccnt = scratch_state[i-1].reccnt; - size_t base_reccnt = recon_reccnt; - - /* - * If using Leveling, the total reconstruction size will be the - * records in *both* base and target, because they will need to - * be merged (assuming that target isn't empty). - */ - if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, base_reccnt, scratch_state)) { - recon_reccnt += scratch_state[i].reccnt; - } - } - reconstructions.add_reconstruction(i-1, i, recon_reccnt); - - /* - * The base level will be emptied and its records moved to - * the target. - */ - scratch_state[i-1].reccnt = 0; - scratch_state[i-1].shardcnt = 0; - - /* - * The target level will have the records from the base level - * added to it, and potentially gain a shard if the LayoutPolicy - * is tiering or the level currently lacks any shards at all. - */ - scratch_state[i].reccnt += base_reccnt; - if (L == LayoutPolicy::TEIRING || scratch_state[i].shardcnt == 0) { - scratch_state[i].shardcnt += 1; - } - } - - return std::move(reconstructions); + /* + * Find the first level capable of sustaining a reconstruction from + * the level above it. If no such level exists, add a new one at + * the bottom of the structure. + */ + level_index base_level = + find_reconstruction_target(source_level, scratch_state); + if (base_level == -1) { + base_level = grow(scratch_state); } - inline void reconstruction(ReconstructionTask task) { - static_assert(L == LayoutPolicy::BSM); - std::vector<InternalLevel<R, Shard, Q>*> levels(task.sources.size()); - for (size_t i=0; i<task.sources.size(); i++) { - levels[i] = m_levels[task.sources[i]].get(); - } + if constexpr (L == LayoutPolicy::BSM) { + if (base_level == 0) { + return reconstructions; + } - auto new_level = InternalLevel<R, Shard, Q>::reconstruction(levels, task.target); - if (task.target >= m_levels.size()) { - m_current_state.push_back({new_level->get_record_count(), calc_level_record_capacity(task.target), - 1, 1}); - m_levels.emplace_back(new_level); - } else { - m_current_state[task.target] = {new_level->get_record_count(), calc_level_record_capacity(task.target), - 1, 1}; - m_levels[task.target] = new_level; - } + ReconstructionTask task; + task.target = base_level; - /* remove all of the levels that have been flattened */ - for (size_t i=0; i<task.sources.size(); i++) { - m_levels[task.sources[i]] = std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(task.sources[i], 1)); - m_current_state[task.sources[i]] = {0, calc_level_record_capacity(task.target), 0, 1}; - } + size_t base_reccnt = 0; + for (level_index i = base_level; i > source_level; i--) { + auto recon_reccnt = scratch_state[i - 1].reccnt; + base_reccnt += recon_reccnt; + scratch_state[i - 1].reccnt = 0; + scratch_state[i - 1].shardcnt = 0; + task.add_source(i - 1, recon_reccnt); + } + + reconstructions.add_reconstruction(task); + scratch_state[base_level].reccnt = base_reccnt; + scratch_state[base_level].shardcnt = 1; - return; + return reconstructions; } /* - * Combine incoming_level with base_level and reconstruct the shard, - * placing it in base_level. The two levels should be sequential--i.e. no - * levels are skipped in the reconstruction process--otherwise the - * tombstone ordering invariant may be violated. + * Determine the full set of reconstructions necessary to open up + * space in the source level. */ - inline void reconstruction(level_index base_level, level_index incoming_level) { - size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - - if (base_level >= m_levels.size()) { - m_levels.emplace_back(std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(base_level, shard_capacity))); - m_current_state.push_back({0, calc_level_record_capacity(base_level), - 0, shard_capacity}); - } - - if constexpr (L == LayoutPolicy::LEVELING) { - /* if the base level has a shard, merge the base and incoming together to make a new one */ - if (m_levels[base_level]->get_shard_count() > 0) { - m_levels[base_level] = InternalLevel<R, Shard, Q>::reconstruction(m_levels[base_level].get(), m_levels[incoming_level].get()); - /* otherwise, we can just move the incoming to the base */ - } else { - m_levels[base_level] = m_levels[incoming_level]; - } - - } else { - m_levels[base_level]->append_level(m_levels[incoming_level].get()); - m_levels[base_level]->finalize(); + for (level_index i = base_level; i > source_level; i--) { + size_t recon_reccnt = scratch_state[i - 1].reccnt; + size_t base_reccnt = recon_reccnt; + + /* + * If using Leveling, the total reconstruction size will be the + * records in *both* base and target, because they will need to + * be merged (assuming that target isn't empty). + */ + if constexpr (L == LayoutPolicy::LEVELING) { + if (can_reconstruct_with(i, base_reccnt, scratch_state)) { + recon_reccnt += scratch_state[i].reccnt; } + } + reconstructions.add_reconstruction(i - 1, i, recon_reccnt); + + /* + * The base level will be emptied and its records moved to + * the target. + */ + scratch_state[i - 1].reccnt = 0; + scratch_state[i - 1].shardcnt = 0; + + /* + * The target level will have the records from the base level + * added to it, and potentially gain a shard if the LayoutPolicy + * is tiering or the level currently lacks any shards at all. + */ + scratch_state[i].reccnt += base_reccnt; + if (L == LayoutPolicy::TEIRING || scratch_state[i].shardcnt == 0) { + scratch_state[i].shardcnt += 1; + } + } - /* place a new, empty level where the incoming level used to be */ - m_levels[incoming_level] = std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor)); + return reconstructions; + } - /* - * Update the state vector to match the *real* state following - * the reconstruction - */ - m_current_state[base_level] = {m_levels[base_level]->get_record_count(), - calc_level_record_capacity(base_level), m_levels[base_level]->get_shard_count(), shard_capacity}; - m_current_state[incoming_level] = {0, calc_level_record_capacity(incoming_level), 0, shard_capacity}; + inline void reconstruction(ReconstructionTask task) { + static_assert(L == LayoutPolicy::BSM); + std::vector<InternalLevel<ShardType, QueryType> *> levels( + task.sources.size()); + for (size_t i = 0; i < task.sources.size(); i++) { + levels[i] = m_levels[task.sources[i]].get(); } - bool take_reference() { - m_refcnt.fetch_add(1); - return true; + auto new_level = InternalLevel<ShardType, QueryType>::reconstruction( + levels, task.target); + if (task.target >= m_levels.size()) { + m_current_state.push_back({new_level->get_record_count(), + calc_level_record_capacity(task.target), 1, + 1}); + m_levels.emplace_back(new_level); + } else { + m_current_state[task.target] = {new_level->get_record_count(), + calc_level_record_capacity(task.target), + 1, 1}; + m_levels[task.target] = new_level; } - bool release_reference() { - assert(m_refcnt.load() > 0); - m_refcnt.fetch_add(-1); - return true; + /* remove all of the levels that have been flattened */ + for (size_t i = 0; i < task.sources.size(); i++) { + m_levels[task.sources[i]] = + std::shared_ptr<InternalLevel<ShardType, QueryType>>( + new InternalLevel<ShardType, QueryType>(task.sources[i], 1)); + m_current_state[task.sources[i]] = { + 0, calc_level_record_capacity(task.target), 0, 1}; } - size_t get_reference_count() { - return m_refcnt.load(); + return; + } + + /* + * Combine incoming_level with base_level and reconstruct the shard, + * placing it in base_level. The two levels should be sequential--i.e. no + * levels are skipped in the reconstruction process--otherwise the + * tombstone ordering invariant may be violated. + */ + inline void reconstruction(level_index base_level, + level_index incoming_level) { + size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + if (base_level >= m_levels.size()) { + m_levels.emplace_back( + std::shared_ptr<InternalLevel<ShardType, QueryType>>( + new InternalLevel<ShardType, QueryType>(base_level, + shard_capacity))); + m_current_state.push_back( + {0, calc_level_record_capacity(base_level), 0, shard_capacity}); } - std::vector<void *> get_query_states(std::vector<std::pair<ShardID, Shard*>> &shards, void *parms) { - std::vector<void*> states; - - for (auto &level : m_levels) { - level->get_query_states(shards, states, parms); - } + if constexpr (L == LayoutPolicy::LEVELING) { + /* if the base level has a shard, merge the base and incoming together to + * make a new one */ + if (m_levels[base_level]->get_shard_count() > 0) { + m_levels[base_level] = + InternalLevel<ShardType, QueryType>::reconstruction( + m_levels[base_level].get(), m_levels[incoming_level].get()); + /* otherwise, we can just move the incoming to the base */ + } else { + m_levels[base_level] = m_levels[incoming_level]; + } - return states; + } else { + m_levels[base_level]->append_level(m_levels[incoming_level].get()); + m_levels[base_level]->finalize(); } -private: - size_t m_scale_factor; - double m_max_delete_prop; - size_t m_buffer_size; - - std::atomic<size_t> m_refcnt; - - std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> m_levels; - - /* - * A pair of <record_count, shard_count> for each level in the - * structure. Record counts may be slightly inaccurate due to - * deletes. - */ - state_vector m_current_state; + /* place a new, empty level where the incoming level used to be */ + m_levels[incoming_level] = + std::shared_ptr<InternalLevel<ShardType, QueryType>>( + new InternalLevel<ShardType, QueryType>( + incoming_level, + (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor)); /* - * Add a new level to the scratch state and return its index. - * - * IMPORTANT: This does _not_ add a level to the extension structure - * anymore. This is handled by the appropriate reconstruction and flush - * methods as needed. This function is for use in "simulated" - * reconstructions. + * Update the state vector to match the *real* state following + * the reconstruction */ - inline level_index grow(state_vector &scratch_state) { - level_index new_idx = m_levels.size(); - size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - - scratch_state.push_back({0, calc_level_record_capacity(new_idx), - 0, new_shard_cap}); - return new_idx; + m_current_state[base_level] = {m_levels[base_level]->get_record_count(), + calc_level_record_capacity(base_level), + m_levels[base_level]->get_shard_count(), + shard_capacity}; + m_current_state[incoming_level] = { + 0, calc_level_record_capacity(incoming_level), 0, shard_capacity}; + } + + bool take_reference() { + m_refcnt.fetch_add(1); + return true; + } + + bool release_reference() { + assert(m_refcnt.load() > 0); + m_refcnt.fetch_add(-1); + return true; + } + + size_t get_reference_count() { return m_refcnt.load(); } + + std::vector<typename QueryType::LocalQuery *> + get_local_queries(std::vector<std::pair<ShardID, ShardType *>> &shards, + typename QueryType::Parameters *parms) { + + std::vector<typename QueryType::LocalQuery *> queries; + + for (auto &level : m_levels) { + level->get_local_queries(shards, queries, parms); } - /* - * Find the first level below the level indicated by idx that - * is capable of sustaining a reconstruction and return its - * level index. If no such level exists, returns -1. Also - * returns -1 if idx==0, and no such level exists, to simplify - * the logic of the first buffer flush. - */ - inline level_index find_reconstruction_target(level_index idx, state_vector &state) { + return queries; + } - /* - * this handles the very first buffer flush, when the state vector - * is empty. - */ - if (idx == 0 && state.size() == 0) return -1; +private: + size_t m_scale_factor; + double m_max_delete_prop; + size_t m_buffer_size; + + std::atomic<size_t> m_refcnt; + + std::vector<std::shared_ptr<InternalLevel<ShardType, QueryType>>> m_levels; + + /* + * A pair of <record_count, shard_count> for each level in the + * structure. Record counts may be slightly inaccurate due to + * deletes. + */ + state_vector m_current_state; + + /* + * Add a new level to the scratch state and return its index. + * + * IMPORTANT: This does _not_ add a level to the extension structure + * anymore. This is handled by the appropriate reconstruction and flush + * methods as needed. This function is for use in "simulated" + * reconstructions. + */ + inline level_index grow(state_vector &scratch_state) { + level_index new_idx = m_levels.size(); + size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + scratch_state.push_back( + {0, calc_level_record_capacity(new_idx), 0, new_shard_cap}); + return new_idx; + } + + /* + * Find the first level below the level indicated by idx that + * is capable of sustaining a reconstruction and return its + * level index. If no such level exists, returns -1. Also + * returns -1 if idx==0, and no such level exists, to simplify + * the logic of the first buffer flush. + */ + inline level_index find_reconstruction_target(level_index idx, + state_vector &state) { - size_t incoming_rec_cnt = state[idx].reccnt; - for (level_index i=idx+1; i<state.size(); i++) { - if (can_reconstruct_with(i, incoming_rec_cnt, state)) { - return i; - } + /* + * this handles the very first buffer flush, when the state vector + * is empty. + */ + if (idx == 0 && state.size() == 0) + return -1; - incoming_rec_cnt = state[idx].reccnt; - } + size_t incoming_rec_cnt = state[idx].reccnt; + for (level_index i = idx + 1; i < state.size(); i++) { + if (can_reconstruct_with(i, incoming_rec_cnt, state)) { + return i; + } - return -1; + incoming_rec_cnt = state[idx].reccnt; } - inline void flush_buffer_into_l0(BuffView buffer) { - size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - - if (m_levels.size() == 0) { - m_levels.emplace_back(std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(0, shard_capacity))); + return -1; + } - m_current_state.push_back({0, calc_level_record_capacity(0), - 0, shard_capacity}); - } + inline void flush_buffer_into_l0(BuffView buffer) { + size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - if constexpr (L == LayoutPolicy::LEVELING) { - // FIXME: Kludgey implementation due to interface constraints. - auto old_level = m_levels[0].get(); - auto temp_level = new InternalLevel<R, Shard, Q>(0, 1); - temp_level->append_buffer(std::move(buffer)); - - if (old_level->get_shard_count() > 0) { - m_levels[0] = InternalLevel<R, Shard, Q>::reconstruction(old_level, temp_level); - delete temp_level; - } else { - m_levels[0] = std::shared_ptr<InternalLevel<R, Shard, Q>>(temp_level); - } - } else { - m_levels[0]->append_buffer(std::move(buffer)); - } + if (m_levels.size() == 0) { + m_levels.emplace_back( + std::shared_ptr<InternalLevel<ShardType, QueryType>>( + new InternalLevel<ShardType, QueryType>(0, shard_capacity))); - /* update the state vector */ - m_current_state[0].reccnt = m_levels[0]->get_record_count(); - m_current_state[0].shardcnt = m_levels[0]->get_shard_count(); + m_current_state.push_back( + {0, calc_level_record_capacity(0), 0, shard_capacity}); } - /* - * Mark a given memory level as no-longer in use by the tree. For now this - * will just free the level. In future, this will be more complex as the - * level may not be able to immediately be deleted, depending upon who - * else is using it. - */ - inline void mark_as_unused(std::shared_ptr<InternalLevel<R, Shard, Q>> level) { - level.reset(); + if constexpr (L == LayoutPolicy::LEVELING) { + // FIXME: Kludgey implementation due to interface constraints. + auto old_level = m_levels[0].get(); + auto temp_level = new InternalLevel<ShardType, QueryType>(0, 1); + temp_level->append_buffer(std::move(buffer)); + + if (old_level->get_shard_count() > 0) { + m_levels[0] = InternalLevel<ShardType, QueryType>::reconstruction( + old_level, temp_level); + delete temp_level; + } else { + m_levels[0] = + std::shared_ptr<InternalLevel<ShardType, QueryType>>(temp_level); + } + } else { + m_levels[0]->append_buffer(std::move(buffer)); } - /* - * Assume that level "0" should be larger than the buffer. The buffer - * itself is index -1, which should return simply the buffer capacity. - */ - inline size_t calc_level_record_capacity(level_index idx) { - return m_buffer_size * pow(m_scale_factor, idx+1); + /* update the state vector */ + m_current_state[0].reccnt = m_levels[0]->get_record_count(); + m_current_state[0].shardcnt = m_levels[0]->get_shard_count(); + } + + /* + * Mark a given memory level as no-longer in use by the tree. For now this + * will just free the level. In future, this will be more complex as the + * level may not be able to immediately be deleted, depending upon who + * else is using it. + */ + inline void + mark_as_unused(std::shared_ptr<InternalLevel<ShardType, QueryType>> level) { + level.reset(); + } + + /* + * Assume that level "0" should be larger than the buffer. The buffer + * itself is index -1, which should return simply the buffer capacity. + */ + inline size_t calc_level_record_capacity(level_index idx) { + return m_buffer_size * pow(m_scale_factor, idx + 1); + } + + /* + * Returns the number of records present on a specified level. + */ + inline size_t get_level_record_count(level_index idx) { + return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0; + } + + /* + * Determines if a level can sustain a reconstruction with incoming_rec_cnt + * additional records without exceeding its capacity. + */ + inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt, + state_vector &state) { + if (idx >= state.size()) { + return false; } - /* - * Returns the number of records present on a specified level. - */ - inline size_t get_level_record_count(level_index idx) { - return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0; + if constexpr (L == LayoutPolicy::LEVELING) { + return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap; + } else if constexpr (L == LayoutPolicy::BSM) { + return state[idx].reccnt == 0; + } else { + return state[idx].shardcnt < state[idx].shardcap; } - /* - * Determines if a level can sustain a reconstruction with incoming_rec_cnt - * additional records without exceeding its capacity. - */ - inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt, state_vector &state) { - if (idx >= state.size()) { - return false; - } - - if constexpr (L == LayoutPolicy::LEVELING) { - return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap; - } else if constexpr (L == LayoutPolicy::BSM) { - return state[idx].reccnt == 0; - } else { - return state[idx].shardcnt < state[idx].shardcap; - } - - /* unreachable */ - assert(true); - } + /* unreachable */ + assert(true); + } }; -} - +} // namespace de diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h index b962dcc..a4cf94d 100644 --- a/include/framework/structure/InternalLevel.h +++ b/include/framework/structure/InternalLevel.h @@ -1,8 +1,8 @@ /* * include/framework/structure/InternalLevel.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -15,276 +15,281 @@ */ #pragma once -#include <vector> #include <memory> +#include <vector> -#include "util/types.h" -#include "framework/interface/Shard.h" #include "framework/interface/Query.h" #include "framework/interface/Record.h" +#include "framework/interface/Shard.h" #include "framework/structure/BufferView.h" +#include "util/types.h" namespace de { -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType> class InternalLevel; - - -template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q> +template <ShardInterface ShardType, QueryInterface<ShardType> QueryType> class InternalLevel { - typedef S Shard; - typedef BufferView<R> BuffView; -public: - InternalLevel(ssize_t level_no, size_t shard_cap) - : m_level_no(level_no) - , m_shard_cnt(0) - , m_shards(shard_cap, nullptr) - , m_pending_shard(nullptr) - {} - - ~InternalLevel() { - delete m_pending_shard; - } + typedef typename ShardType::RECORD RecordType; + typedef BufferView<RecordType> BuffView; - /* - * Create a new shard combining the records from base_level and new_level, - * and return a shared_ptr to a new level containing this shard. This is used - * for reconstructions under the leveling layout policy. - * - * No changes are made to the levels provided as arguments. - */ - static std::shared_ptr<InternalLevel> reconstruction(InternalLevel* base_level, InternalLevel* new_level) { - assert(base_level->m_level_no > new_level->m_level_no || (base_level->m_level_no == 0 && new_level->m_level_no == 0)); - auto res = new InternalLevel(base_level->m_level_no, 1); - res->m_shard_cnt = 1; - std::vector<Shard *> shards = {base_level->m_shards[0].get(), +public: + InternalLevel(ssize_t level_no, size_t shard_cap) + : m_level_no(level_no), m_shard_cnt(0), m_shards(shard_cap, nullptr), + m_pending_shard(nullptr) {} + + ~InternalLevel() { delete m_pending_shard; } + + /* + * Create a new shard combining the records from base_level and new_level, + * and return a shared_ptr to a new level containing this shard. This is used + * for reconstructions under the leveling layout policy. + * + * No changes are made to the levels provided as arguments. + */ + static std::shared_ptr<InternalLevel> + reconstruction(InternalLevel *base_level, InternalLevel *new_level) { + assert(base_level->m_level_no > new_level->m_level_no || + (base_level->m_level_no == 0 && new_level->m_level_no == 0)); + auto res = new InternalLevel(base_level->m_level_no, 1); + res->m_shard_cnt = 1; + std::vector<ShardType *> shards = {base_level->m_shards[0].get(), new_level->m_shards[0].get()}; - res->m_shards[0] = std::make_shared<S>(shards); - return std::shared_ptr<InternalLevel>(res); + res->m_shards[0] = std::make_shared<ShardType>(shards); + return std::shared_ptr<InternalLevel>(res); + } + + static std::shared_ptr<InternalLevel> + reconstruction(std::vector<InternalLevel *> levels, size_t level_idx) { + std::vector<ShardType *> shards; + for (auto level : levels) { + for (auto shard : level->m_shards) { + if (shard) + shards.emplace_back(shard.get()); + } } - static std::shared_ptr<InternalLevel> reconstruction(std::vector<InternalLevel*> levels, size_t level_idx) { - std::vector<Shard *> shards; - for (auto level : levels) { - for (auto shard : level->m_shards) { - if (shard) shards.emplace_back(shard.get()); - } - } - - auto res = new InternalLevel(level_idx, 1); - res->m_shard_cnt = 1; - res->m_shards[0] = std::make_shared<S>(shards); + auto res = new InternalLevel(level_idx, 1); + res->m_shard_cnt = 1; + res->m_shards[0] = std::make_shared<ShardType>(shards); + + return std::shared_ptr<InternalLevel>(res); + } + + /* + * Create a new shard combining the records from all of + * the shards in level, and append this new shard into + * this level. This is used for reconstructions under + * the tiering layout policy. + * + * No changes are made to the level provided as an argument. + */ + void append_level(InternalLevel *level) { + // FIXME: that this is happening probably means that + // something is going terribly wrong earlier in the + // reconstruction logic. + if (level->get_shard_count() == 0) { + return; + } - return std::shared_ptr<InternalLevel>(res); + std::vector<ShardType *> shards; + for (auto shard : level->m_shards) { + if (shard) + shards.emplace_back(shard.get()); } - /* - * Create a new shard combining the records from all of - * the shards in level, and append this new shard into - * this level. This is used for reconstructions under - * the tiering layout policy. - * - * No changes are made to the level provided as an argument. - */ - void append_level(InternalLevel* level) { - // FIXME: that this is happening probably means that - // something is going terribly wrong earlier in the - // reconstruction logic. - if (level->get_shard_count() == 0) { - return; - } + if (m_shard_cnt == m_shards.size()) { + m_pending_shard = new ShardType(shards); + return; + } - std::vector<S*> shards; - for (auto shard : level->m_shards) { - if (shard) shards.emplace_back(shard.get()); - } + auto tmp = new ShardType(shards); + m_shards[m_shard_cnt] = std::shared_ptr<ShardType>(tmp); + + ++m_shard_cnt; + } + + /* + * Create a new shard using the records in the + * provided buffer, and append this new shard + * into this level. This is used for buffer + * flushes under the tiering layout policy. + */ + void append_buffer(BuffView buffer) { + if (m_shard_cnt == m_shards.size()) { + assert(m_pending_shard == nullptr); + m_pending_shard = new ShardType(std::move(buffer)); + return; + } - if (m_shard_cnt == m_shards.size()) { - m_pending_shard = new S(shards); - return; - } + m_shards[m_shard_cnt] = std::make_shared<ShardType>(std::move(buffer)); + ++m_shard_cnt; + } - auto tmp = new S(shards); - m_shards[m_shard_cnt] = std::shared_ptr<S>(tmp); + void finalize() { + if (m_pending_shard) { + for (size_t i = 0; i < m_shards.size(); i++) { + m_shards[i] = nullptr; + } - ++m_shard_cnt; + m_shards[0] = std::shared_ptr<ShardType>(m_pending_shard); + m_pending_shard = nullptr; + m_shard_cnt = 1; } - - /* - * Create a new shard using the records in the - * provided buffer, and append this new shard - * into this level. This is used for buffer - * flushes under the tiering layout policy. - */ - void append_buffer(BuffView buffer) { - if (m_shard_cnt == m_shards.size()) { - assert(m_pending_shard == nullptr); - m_pending_shard = new S(std::move(buffer)); - return; - } - - m_shards[m_shard_cnt] = std::make_shared<S>(std::move(buffer)); - ++m_shard_cnt; + } + + /* + * Create a new shard containing the combined records + * from all shards on this level and return it. + * + * No changes are made to this level. + */ + ShardType *get_combined_shard() { + if (m_shard_cnt == 0) { + return nullptr; } - void finalize() { - if (m_pending_shard) { - for (size_t i=0; i<m_shards.size(); i++) { - m_shards[i] = nullptr; - } - - m_shards[0] = std::shared_ptr<S>(m_pending_shard); - m_pending_shard = nullptr; - m_shard_cnt = 1; - } + std::vector<ShardType *> shards; + for (auto shard : m_shards) { + if (shard) + shards.emplace_back(shard.get()); } - /* - * Create a new shard containing the combined records - * from all shards on this level and return it. - * - * No changes are made to this level. - */ - Shard *get_combined_shard() { - if (m_shard_cnt == 0) { - return nullptr; - } - - std::vector<Shard *> shards; - for (auto shard : m_shards) { - if (shard) shards.emplace_back(shard.get()); - } - - return new S(shards); + return new ShardType(shards); + } + + void get_local_queries( + std::vector<std::pair<ShardID, ShardType *>> &shards, + std::vector<typename QueryType::LocalQuery *> &local_queries, + typename QueryType::Parameters *query_parms) { + for (size_t i = 0; i < m_shard_cnt; i++) { + if (m_shards[i]) { + auto local_query = + QueryType::local_preproc(m_shards[i].get(), query_parms); + shards.push_back({{m_level_no, (ssize_t)i}, m_shards[i].get()}); + local_queries.emplace_back(local_query); + } } + } - void get_query_states(std::vector<std::pair<ShardID, Shard *>> &shards, std::vector<void*>& shard_states, void *query_parms) { - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - auto shard_state = Q::get_query_state(m_shards[i].get(), query_parms); - shards.push_back({{m_level_no, (ssize_t) i}, m_shards[i].get()}); - shard_states.emplace_back(shard_state); - } + bool check_tombstone(size_t shard_stop, const RecordType &rec) { + if (m_shard_cnt == 0) + return false; + + for (int i = m_shard_cnt - 1; i >= (ssize_t)shard_stop; i--) { + if (m_shards[i]) { + auto res = m_shards[i]->point_lookup(rec, true); + if (res && res->is_tombstone()) { + return true; } + } } - - bool check_tombstone(size_t shard_stop, const R& rec) { - if (m_shard_cnt == 0) return false; - - for (int i = m_shard_cnt - 1; i >= (ssize_t) shard_stop; i--) { - if (m_shards[i]) { - auto res = m_shards[i]->point_lookup(rec, true); - if (res && res->is_tombstone()) { - return true; - } - } + return false; + } + + bool delete_record(const RecordType &rec) { + if (m_shard_cnt == 0) + return false; + + for (size_t i = 0; i < m_shards.size(); ++i) { + if (m_shards[i]) { + auto res = m_shards[i]->point_lookup(rec); + if (res) { + res->set_delete(); + return true; } - return false; + } } - bool delete_record(const R &rec) { - if (m_shard_cnt == 0) return false; - - for (size_t i = 0; i < m_shards.size(); ++i) { - if (m_shards[i]) { - auto res = m_shards[i]->point_lookup(rec); - if (res) { - res->set_delete(); - return true; - } - } - } + return false; + } - return false; + ShardType *get_shard(size_t idx) { + if (idx >= m_shard_cnt) { + return nullptr; } - Shard* get_shard(size_t idx) { - if (idx >= m_shard_cnt) { - return nullptr; - } + return m_shards[idx].get(); + } - return m_shards[idx].get(); - } + size_t get_shard_count() { return m_shard_cnt; } - size_t get_shard_count() { - return m_shard_cnt; + size_t get_record_count() { + size_t cnt = 0; + for (size_t i = 0; i < m_shard_cnt; i++) { + if (m_shards[i]) { + cnt += m_shards[i]->get_record_count(); + } } - size_t get_record_count() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - cnt += m_shards[i]->get_record_count(); - } - } + return cnt; + } - return cnt; + size_t get_tombstone_count() { + size_t res = 0; + for (size_t i = 0; i < m_shard_cnt; ++i) { + if (m_shards[i]) { + res += m_shards[i]->get_tombstone_count(); + } } - - size_t get_tombstone_count() { - size_t res = 0; - for (size_t i = 0; i < m_shard_cnt; ++i) { - if (m_shards[i]) { - res += m_shards[i]->get_tombstone_count(); - } - } - return res; + return res; + } + + size_t get_aux_memory_usage() { + size_t cnt = 0; + for (size_t i = 0; i < m_shard_cnt; i++) { + if (m_shards[i]) { + cnt += m_shards[i]->get_aux_memory_usage(); + } } - size_t get_aux_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]){ - cnt += m_shards[i]->get_aux_memory_usage(); - } - } + return cnt; + } - return cnt; + size_t get_memory_usage() { + size_t cnt = 0; + for (size_t i = 0; i < m_shard_cnt; i++) { + if (m_shards[i]) { + cnt += m_shards[i]->get_memory_usage(); + } } - size_t get_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - cnt += m_shards[i]->get_memory_usage(); - } - } - - return cnt; + return cnt; + } + + double get_tombstone_prop() { + size_t tscnt = 0; + size_t reccnt = 0; + for (size_t i = 0; i < m_shard_cnt; i++) { + if (m_shards[i]) { + tscnt += m_shards[i]->get_tombstone_count(); + reccnt += m_shards[i]->get_record_count(); + } } - double get_tombstone_prop() { - size_t tscnt = 0; - size_t reccnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - tscnt += m_shards[i]->get_tombstone_count(); - reccnt += m_shards[i]->get_record_count(); - } - } + return (double)tscnt / (double)(tscnt + reccnt); + } - return (double) tscnt / (double) (tscnt + reccnt); + std::shared_ptr<InternalLevel> clone() { + auto new_level = + std::make_shared<InternalLevel>(m_level_no, m_shards.size()); + for (size_t i = 0; i < m_shard_cnt; i++) { + new_level->m_shards[i] = m_shards[i]; } + new_level->m_shard_cnt = m_shard_cnt; - std::shared_ptr<InternalLevel> clone() { - auto new_level = std::make_shared<InternalLevel>(m_level_no, m_shards.size()); - for (size_t i=0; i<m_shard_cnt; i++) { - new_level->m_shards[i] = m_shards[i]; - } - new_level->m_shard_cnt = m_shard_cnt; - - return new_level; - } + return new_level; + } private: - ssize_t m_level_no; - - size_t m_shard_cnt; - size_t m_shard_size_cap; + ssize_t m_level_no; + + size_t m_shard_cnt; + size_t m_shard_size_cap; - std::vector<std::shared_ptr<Shard>> m_shards; - Shard *m_pending_shard; + std::vector<std::shared_ptr<ShardType>> m_shards; + ShardType *m_pending_shard; }; -} +} // namespace de diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h index 7db3980..625b04b 100644 --- a/include/framework/structure/MutableBuffer.h +++ b/include/framework/structure/MutableBuffer.h @@ -1,8 +1,8 @@ /* * include/framework/structure/MutableBuffer.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -18,301 +18,281 @@ */ #pragma once -#include <cstdlib> #include <atomic> #include <cassert> +#include <cstdlib> #include <immintrin.h> -#include "psu-util/alignment.h" -#include "util/bf_config.h" -#include "psu-ds/BloomFilter.h" #include "framework/interface/Record.h" #include "framework/structure/BufferView.h" - -using psudb::CACHELINE_SIZE; +#include "psu-ds/BloomFilter.h" +#include "psu-util/alignment.h" +#include "util/bf_config.h" namespace de { -template <RecordInterface R> -class MutableBuffer { - friend class BufferView<R>; +template <RecordInterface R> class MutableBuffer { + friend class BufferView<R>; - struct buffer_head { - size_t head_idx; - size_t refcnt; - }; - -public: - MutableBuffer(size_t low_watermark, size_t high_watermark, size_t capacity=0) - : m_lwm(low_watermark) - , m_hwm(high_watermark) - , m_cap((capacity == 0) ? 2 * high_watermark : capacity) - , m_tail(0) - , m_head({0, 0}) - , m_old_head({high_watermark, 0}) - //, m_data((Wrapped<R> *) psudb::sf_aligned_alloc(CACHELINE_SIZE, m_cap * sizeof(Wrapped<R>))) - , m_data(new Wrapped<R>[m_cap]()) - , m_tombstone_filter(new psudb::BloomFilter<R>(BF_FPR, m_hwm, BF_HASH_FUNCS)) - , m_tscnt(0) - , m_old_tscnt(0) - , m_active_head_advance(false) - { - assert(m_cap > m_hwm); - assert(m_hwm >= m_lwm); - } + struct buffer_head { + size_t head_idx; + size_t refcnt; + }; - ~MutableBuffer() { - delete[] m_data; - delete m_tombstone_filter; +public: + MutableBuffer(size_t low_watermark, size_t high_watermark, + size_t capacity = 0) + : m_lwm(low_watermark), m_hwm(high_watermark), + m_cap((capacity == 0) ? 2 * high_watermark : capacity), m_tail(0), + m_head({0, 0}), m_old_head({high_watermark, 0}), + m_data(new Wrapped<R>[m_cap]()), + m_tombstone_filter( + new psudb::BloomFilter<R>(BF_FPR, m_hwm, BF_HASH_FUNCS)), + m_tscnt(0), m_old_tscnt(0), m_active_head_advance(false) { + assert(m_cap > m_hwm); + assert(m_hwm >= m_lwm); + } + + ~MutableBuffer() { + delete[] m_data; + delete m_tombstone_filter; + } + + int append(const R &rec, bool tombstone = false) { + int32_t tail = 0; + if ((tail = try_advance_tail()) == -1) { + return 0; } - int append(const R &rec, bool tombstone=false) { - int32_t tail = 0; - if ((tail = try_advance_tail()) == -1) { - return 0; - } - - Wrapped<R> wrec; - wrec.rec = rec; - wrec.header = 0; - if (tombstone) wrec.set_tombstone(); + Wrapped<R> wrec; + wrec.rec = rec; + wrec.header = 0; + if (tombstone) + wrec.set_tombstone(); - // FIXME: because of the mod, it isn't correct to use `pos` - // as the ordering timestamp in the header anymore. - size_t pos = tail % m_cap; - - m_data[pos] = wrec; - m_data[pos].set_timestamp(pos); - - if (tombstone) { - m_tscnt.fetch_add(1); - if (m_tombstone_filter) m_tombstone_filter->insert(rec); - } + // FIXME: because of the mod, it isn't correct to use `pos` + // as the ordering timestamp in the header anymore. + size_t pos = tail % m_cap; - m_data[pos].set_visible(); + m_data[pos] = wrec; + m_data[pos].set_timestamp(pos); - return 1; + if (tombstone) { + m_tscnt.fetch_add(1); + if (m_tombstone_filter) + m_tombstone_filter->insert(rec); } - bool truncate() { - m_tscnt.store(0); - m_tail.store(0); - if (m_tombstone_filter) m_tombstone_filter->clear(); + m_data[pos].set_visible(); - return true; - } + return 1; + } - size_t get_record_count() { - return m_tail.load() - m_head.load().head_idx; - } - - size_t get_capacity() { - return m_cap; - } + bool truncate() { + m_tscnt.store(0); + m_tail.store(0); + if (m_tombstone_filter) + m_tombstone_filter->clear(); - bool is_full() { - return get_record_count() >= m_hwm; - } + return true; + } - bool is_at_low_watermark() { - return get_record_count() >= m_lwm; - } + size_t get_record_count() { return m_tail.load() - m_head.load().head_idx; } - size_t get_tombstone_count() { - return m_tscnt.load(); - } + size_t get_capacity() { return m_cap; } - bool delete_record(const R& rec) { - return get_buffer_view().delete_record(rec); - } + bool is_full() { return get_record_count() >= m_hwm; } - bool check_tombstone(const R& rec) { - return get_buffer_view().check_tombstone(rec); - } + bool is_at_low_watermark() { return get_record_count() >= m_lwm; } - size_t get_memory_usage() { - return m_cap * sizeof(Wrapped<R>); - } + size_t get_tombstone_count() { return m_tscnt.load(); } - size_t get_aux_memory_usage() { - return m_tombstone_filter->get_memory_usage(); - } + bool delete_record(const R &rec) { + return get_buffer_view().delete_record(rec); + } - BufferView<R> get_buffer_view(size_t target_head) { - size_t head = get_head(target_head); - auto f = std::bind(release_head_reference, (void *) this, head); + bool check_tombstone(const R &rec) { + return get_buffer_view().check_tombstone(rec); + } - return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f); - } + size_t get_memory_usage() { return m_cap * sizeof(Wrapped<R>); } - BufferView<R> get_buffer_view() { - size_t head = get_head(m_head.load().head_idx); - auto f = std::bind(release_head_reference, (void *) this, head); + size_t get_aux_memory_usage() { + return m_tombstone_filter->get_memory_usage(); + } - return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f); - } + BufferView<R> get_buffer_view(size_t target_head) { + size_t head = get_head(target_head); + auto f = std::bind(release_head_reference, (void *)this, head); - /* - * Advance the buffer following a reconstruction. Move current - * head and head_refcnt into old_head and old_head_refcnt, then - * assign new_head to old_head. - */ - bool advance_head(size_t new_head) { - assert(new_head > m_head.load().head_idx); - assert(new_head <= m_tail.load()); - - /* refuse to advance head while there is an old with one references */ - if (m_old_head.load().refcnt > 0) { - //fprintf(stderr, "[W]: Refusing to advance head due to remaining reference counts\n"); - return false; - } - - m_active_head_advance.store(true); + return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), + m_tombstone_filter, f); + } - buffer_head new_hd = {new_head, 0}; - buffer_head cur_hd; + BufferView<R> get_buffer_view() { + size_t head = get_head(m_head.load().head_idx); + auto f = std::bind(release_head_reference, (void *)this, head); - /* replace current head with new head */ - do { - cur_hd = m_head.load(); - } while(!m_head.compare_exchange_strong(cur_hd, new_hd)); + return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), + m_tombstone_filter, f); + } - /* move the current head into the old head */ - m_old_head.store(cur_hd); + /* + * Advance the buffer following a reconstruction. Move current + * head and head_refcnt into old_head and old_head_refcnt, then + * assign new_head to old_head. + */ + bool advance_head(size_t new_head) { + assert(new_head > m_head.load().head_idx); + assert(new_head <= m_tail.load()); - m_active_head_advance.store(false); - return true; + /* refuse to advance head while there is an old with one references */ + if (m_old_head.load().refcnt > 0) { + // fprintf(stderr, "[W]: Refusing to advance head due to remaining + // reference counts\n"); + return false; } - /* - * FIXME: If target_head does not match *either* the old_head or the - * current_head, this routine will loop infinitely. - */ - size_t get_head(size_t target_head) { - buffer_head cur_hd, new_hd; - bool head_acquired = false; - - do { - if (m_old_head.load().head_idx == target_head) { - cur_hd = m_old_head.load(); - cur_hd.head_idx = target_head; - new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; - head_acquired = m_old_head.compare_exchange_strong(cur_hd, new_hd); - } else if (m_head.load().head_idx == target_head){ - cur_hd = m_head.load(); - cur_hd.head_idx = target_head; - new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; - head_acquired = m_head.compare_exchange_strong(cur_hd, new_hd); - } - } while(!head_acquired); - - return new_hd.head_idx; + m_active_head_advance.store(true); + + buffer_head new_hd = {new_head, 0}; + buffer_head cur_hd; + + /* replace current head with new head */ + do { + cur_hd = m_head.load(); + } while (!m_head.compare_exchange_strong(cur_hd, new_hd)); + + /* move the current head into the old head */ + m_old_head.store(cur_hd); + + m_active_head_advance.store(false); + return true; + } + + /* + * FIXME: If target_head does not match *either* the old_head or the + * current_head, this routine will loop infinitely. + */ + size_t get_head(size_t target_head) { + buffer_head cur_hd, new_hd; + bool head_acquired = false; + + do { + if (m_old_head.load().head_idx == target_head) { + cur_hd = m_old_head.load(); + cur_hd.head_idx = target_head; + new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; + head_acquired = m_old_head.compare_exchange_strong(cur_hd, new_hd); + } else if (m_head.load().head_idx == target_head) { + cur_hd = m_head.load(); + cur_hd.head_idx = target_head; + new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; + head_acquired = m_head.compare_exchange_strong(cur_hd, new_hd); + } + } while (!head_acquired); + + return new_hd.head_idx; + } + + void set_low_watermark(size_t lwm) { + assert(lwm < m_hwm); + m_lwm = lwm; + } + + size_t get_low_watermark() { return m_lwm; } + + void set_high_watermark(size_t hwm) { + assert(hwm > m_lwm); + assert(hwm < m_cap); + m_hwm = hwm; + } + + size_t get_high_watermark() { return m_hwm; } + + size_t get_tail() { return m_tail.load(); } + + /* + * Note: this returns the available physical storage capacity, + * *not* now many more records can be inserted before the + * HWM is reached. It considers the old_head to be "free" + * when it has no remaining references. This should be true, + * but a buggy framework implementation may violate the + * assumption. + */ + size_t get_available_capacity() { + if (m_old_head.load().refcnt == 0) { + return m_cap - (m_tail.load() - m_head.load().head_idx); } - void set_low_watermark(size_t lwm) { - assert(lwm < m_hwm); - m_lwm = lwm; - } + return m_cap - (m_tail.load() - m_old_head.load().head_idx); + } - size_t get_low_watermark() { - return m_lwm; - } +private: + int64_t try_advance_tail() { + size_t old_value = m_tail.load(); - void set_high_watermark(size_t hwm) { - assert(hwm > m_lwm); - assert(hwm < m_cap); - m_hwm = hwm; + /* if full, fail to advance the tail */ + if (old_value - m_head.load().head_idx >= m_hwm) { + return -1; } - size_t get_high_watermark() { - return m_hwm; - } + while (!m_tail.compare_exchange_strong(old_value, old_value + 1)) { + /* if full, stop trying and fail to advance the tail */ + if (m_tail.load() >= m_hwm) { + return -1; + } - size_t get_tail() { - return m_tail.load(); + _mm_pause(); } - /* - * Note: this returns the available physical storage capacity, - * *not* now many more records can be inserted before the - * HWM is reached. It considers the old_head to be "free" - * when it has no remaining references. This should be true, - * but a buggy framework implementation may violate the - * assumption. - */ - size_t get_available_capacity() { - if (m_old_head.load().refcnt == 0) { - return m_cap - (m_tail.load() - m_head.load().head_idx); - } + return old_value; + } - return m_cap - (m_tail.load() - m_old_head.load().head_idx); - } + size_t to_idx(size_t i, size_t head) { return (head + i) % m_cap; } -private: - int64_t try_advance_tail() { - size_t old_value = m_tail.load(); + static void release_head_reference(void *buff, size_t head) { + MutableBuffer<R> *buffer = (MutableBuffer<R> *)buff; - /* if full, fail to advance the tail */ - if (old_value - m_head.load().head_idx >= m_hwm) { - return -1; + buffer_head cur_hd, new_hd; + do { + if (buffer->m_old_head.load().head_idx == head) { + cur_hd = buffer->m_old_head; + if (cur_hd.refcnt == 0) + continue; + new_hd = {cur_hd.head_idx, cur_hd.refcnt - 1}; + if (buffer->m_old_head.compare_exchange_strong(cur_hd, new_hd)) { + break; } - - while (!m_tail.compare_exchange_strong(old_value, old_value+1)) { - /* if full, stop trying and fail to advance the tail */ - if (m_tail.load() >= m_hwm) { - return -1; - } - - _mm_pause(); + } else { + cur_hd = buffer->m_head; + if (cur_hd.refcnt == 0) + continue; + new_hd = {cur_hd.head_idx, cur_hd.refcnt - 1}; + + if (buffer->m_head.compare_exchange_strong(cur_hd, new_hd)) { + break; } + } + _mm_pause(); + } while (true); + } - return old_value; - } + size_t m_lwm; + size_t m_hwm; + size_t m_cap; - size_t to_idx(size_t i, size_t head) { - return (head + i) % m_cap; - } + alignas(64) std::atomic<size_t> m_tail; - static void release_head_reference(void *buff, size_t head) { - MutableBuffer<R> *buffer = (MutableBuffer<R> *) buff; - - buffer_head cur_hd, new_hd; - do { - if (buffer->m_old_head.load().head_idx == head) { - cur_hd = buffer->m_old_head; - if (cur_hd.refcnt == 0) continue; - new_hd = {cur_hd.head_idx, cur_hd.refcnt-1}; - if (buffer->m_old_head.compare_exchange_strong(cur_hd, new_hd)) { - break; - } - } else { - cur_hd = buffer->m_head; - if (cur_hd.refcnt == 0) continue; - new_hd = {cur_hd.head_idx, cur_hd.refcnt-1}; - - if (buffer->m_head.compare_exchange_strong(cur_hd, new_hd)) { - break; - } - } - _mm_pause(); - } while(true); - } + alignas(64) std::atomic<buffer_head> m_head; + alignas(64) std::atomic<buffer_head> m_old_head; + + Wrapped<R> *m_data; + psudb::BloomFilter<R> *m_tombstone_filter; + alignas(64) std::atomic<size_t> m_tscnt; + size_t m_old_tscnt; - size_t m_lwm; - size_t m_hwm; - size_t m_cap; - - alignas(64) std::atomic<size_t> m_tail; - - alignas(64) std::atomic<buffer_head> m_head; - alignas(64) std::atomic<buffer_head> m_old_head; - - Wrapped<R>* m_data; - psudb::BloomFilter<R>* m_tombstone_filter; - alignas(64) std::atomic<size_t> m_tscnt; - size_t m_old_tscnt; - - alignas(64) std::atomic<bool> m_active_head_advance; + alignas(64) std::atomic<bool> m_active_head_advance; }; -} +} // namespace de diff --git a/include/framework/util/Configuration.h b/include/framework/util/Configuration.h index 4a4524a..f4b0364 100644 --- a/include/framework/util/Configuration.h +++ b/include/framework/util/Configuration.h @@ -1,7 +1,7 @@ /* * include/framework/util/Configuration.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -13,35 +13,8 @@ namespace de { -static thread_local size_t sampling_attempts = 0; -static thread_local size_t sampling_rejections = 0; -static thread_local size_t deletion_rejections = 0; -static thread_local size_t bounds_rejections = 0; -static thread_local size_t tombstone_rejections = 0; -static thread_local size_t buffer_rejections = 0; +enum class LayoutPolicy { LEVELING, TEIRING, BSM }; -/* - * thread_local size_t various_sampling_times go here. - */ -static thread_local size_t sample_range_time = 0; -static thread_local size_t alias_time = 0; -static thread_local size_t alias_query_time = 0; -static thread_local size_t rejection_check_time = 0; -static thread_local size_t buffer_sample_time = 0; -static thread_local size_t memlevel_sample_time = 0; -static thread_local size_t disklevel_sample_time = 0; -static thread_local size_t sampling_bailouts = 0; - - -enum class LayoutPolicy { - LEVELING, - TEIRING, - BSM -}; - -enum class DeletePolicy { - TOMBSTONE, - TAGGING -}; +enum class DeletePolicy { TOMBSTONE, TAGGING }; -} +} // namespace de diff --git a/include/query/irs.h b/include/query/irs.h index 879d070..6dec850 100644 --- a/include/query/irs.h +++ b/include/query/irs.h @@ -1,12 +1,12 @@ /* * include/query/irs.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * - * A query class for independent range sampling. This query requires - * that the shard support get_lower_bound(key), get_upper_bound(key), + * A query class for independent range sampling. This query requires + * that the shard support get_lower_bound(key), get_upper_bound(key), * and get_record_at(index). */ #pragma once @@ -14,237 +14,227 @@ #include "framework/QueryRequirements.h" #include "psu-ds/Alias.h" -namespace de { namespace irs { +namespace de { +namespace irs { -template <RecordInterface R> -struct Parms { +template <ShardInterface S, bool REJECTION = true> class Query { + typedef typename S::RECORD R; + +public: + struct Parameters { decltype(R::key) lower_bound; decltype(R::key) upper_bound; size_t sample_size; gsl_rng *rng; -}; + }; - -template <RecordInterface R> -struct State { - size_t lower_bound; - size_t upper_bound; - size_t sample_size; + struct LocalQuery { + size_t lower_idx; + size_t upper_idx; size_t total_weight; -}; + size_t sample_size; + Parameters global_parms; + }; + + struct LocalQueryBuffer { + BufferView<R> *buffer; -template <RecordInterface R> -struct BufferState { size_t cutoff; std::vector<Wrapped<R>> records; + std::unique_ptr<psudb::Alias> alias; size_t sample_size; - BufferView<R> *buffer; - psudb::Alias *alias; + Parameters global_parms; + }; - BufferState(BufferView<R> *buffer) : buffer(buffer) {} - ~BufferState() { - delete alias; - } -}; + typedef Wrapped<R> LocalResultType; + typedef R ResultType; -template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; + constexpr static bool EARLY_ABORT = false; + constexpr static bool SKIP_DELETE_FILTER = false; - static void *get_query_state(S *shard, void *parms) { - auto res = new State<R>(); - decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound; - decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound; + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); - res->lower_bound = shard->get_lower_bound(lower_key); - res->upper_bound = shard->get_upper_bound(upper_key); + query->global_parms = *parms; - if (res->lower_bound == shard->get_record_count()) { - res->total_weight = 0; - } else { - res->total_weight = res->upper_bound - res->lower_bound; - } + query->lower_idx = shard->get_lower_bound(query->global_parms.lower_bound); + query->upper_idx = shard->get_upper_bound(query->global_parms.upper_bound); - res->sample_size = 0; - return res; + if (query->lower_idx == shard->get_record_count()) { + query->total_weight = 0; + } else { + query->total_weight = query->upper_idx - query->lower_idx; } - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - auto res = new BufferState<R>(buffer); - - res->cutoff = res->buffer->get_record_count(); - res->sample_size = 0; - res->alias = nullptr; + query->sample_size = 0; + return query; + } - if constexpr (Rejection) { - return res; - } - - auto lower_key = ((Parms<R> *) parms)->lower_bound; - auto upper_key = ((Parms<R> *) parms)->upper_bound; + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); + query->buffer = buffer; - for (size_t i=0; i<res->cutoff; i++) { - if ((res->buffer->get(i)->rec.key >= lower_key) && (buffer->get(i)->rec.key <= upper_key)) { - res->records.emplace_back(*(res->buffer->get(i))); - } - } + query->cutoff = query->buffer->get_record_count(); + query->sample_size = 0; + query->alias = nullptr; + query->global_parms = *parms; - return res; + if constexpr (REJECTION) { + return query; } - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buffer_state) { - auto p = (Parms<R> *) query_parms; - auto bs = (buffer_state) ? (BufferState<R> *) buffer_state : nullptr; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0); - size_t buffer_sz = 0; + for (size_t i = 0; i < query->cutoff; i++) { + if ((query->buffer->get(i)->rec.key >= query->global_parms.lower_bound) && + (buffer->get(i)->rec.key <= query->global_parms.upper_bound)) { + query->records.emplace_back(*(query->buffer->get(i))); + } + } - /* for simplicity of static structure testing */ - if (!bs) { - assert(shard_states.size() == 1); - auto state = (State<R> *) shard_states[0]; - state->sample_size = p->sample_size; - return; - } + return query; + } - /* we only need to build the shard alias on the first call */ - if (bs->alias == nullptr) { - std::vector<size_t> weights; - if constexpr (Rejection) { - weights.push_back((bs) ? bs->cutoff : 0); - } else { - weights.push_back((bs) ? bs->records.size() : 0); - } - - size_t total_weight = weights[0]; - for (auto &s : shard_states) { - auto state = (State<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - // if no valid records fall within the query range, just - // set all of the sample sizes to 0 and bail out. - if (total_weight == 0) { - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (State<R> *) shard_states[i]; - state->sample_size = 0; - } - - return; - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - bs->alias = new psudb::Alias(normalized_weights); - } + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { - for (size_t i=0; i<p->sample_size; i++) { - auto idx = bs->alias->get(p->rng); - if (idx == 0) { - buffer_sz++; - } else { - shard_sample_sizes[idx - 1]++; - } - } + std::vector<size_t> shard_sample_sizes(local_queries.size() + 1, 0); + size_t buffer_sz = 0; - if (bs) { - bs->sample_size = buffer_sz; - } - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (State<R> *) shard_states[i]; - state->sample_size = shard_sample_sizes[i+1]; - } + /* for simplicity of static structure testing */ + if (!buffer_query) { + assert(local_queries.size() == 1); + local_queries[0]->sample_size = + local_queries[0]->global_parms.sample_size; + return; } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - auto lower_key = ((Parms<R> *) parms)->lower_bound; - auto upper_key = ((Parms<R> *) parms)->upper_bound; - auto rng = ((Parms<R> *) parms)->rng; - - auto state = (State<R> *) q_state; - auto sample_sz = state->sample_size; - - std::vector<Wrapped<R>> result_set; - - if (sample_sz == 0 || state->lower_bound == shard->get_record_count()) { - return result_set; + /* we only need to build the shard alias on the first call */ + if (buffer_query->alias == nullptr) { + std::vector<size_t> weights; + if constexpr (REJECTION) { + weights.push_back(buffer_query->cutoff); + } else { + weights.push_back(buffer_query->records.size()); + } + + size_t total_weight = weights[0]; + for (auto &q : local_queries) { + total_weight += q->total_weight; + weights.push_back(q->total_weight); + } + + /* + * if no valid records fall within the query range, + * set all of the sample sizes to 0 and bail out. + */ + if (total_weight == 0) { + for (auto q : local_queries) { + q->sample_size = 0; } - size_t attempts = 0; - size_t range_length = state->upper_bound - state->lower_bound; - do { - attempts++; - size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0; - result_set.emplace_back(*shard->get_record_at(state->lower_bound + idx)); - } while (attempts < sample_sz); + return; + } - return result_set; - } + std::vector<double> normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double)w / (double)total_weight); + } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto st = (BufferState<R> *) state; - auto p = (Parms<R> *) parms; + buffer_query->alias = std::make_unique<psudb::Alias>(normalized_weights); + } - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); + for (size_t i = 0; i < parms->sample_size; i++) { + auto idx = buffer_query->alias->get(parms->rng); + if (idx == 0) { + buffer_sz++; + } else { + shard_sample_sizes[idx - 1]++; + } + } - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = st->buffer->get(idx); + if (buffer_query) { + buffer_query->sample_size = buffer_sz; + } - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - result.emplace_back(*rec); - } - } + for (size_t i = 0; i < local_queries.size(); i++) { + local_queries[i]->sample_size = shard_sample_sizes[i]; + } + } - return result; - } + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + auto sample_sz = query->sample_size; - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->records.size()); - result.emplace_back(st->records[idx]); - } + std::vector<LocalResultType> result_set; - return result; + if (sample_sz == 0 || query->lower_idx == shard->get_record_count()) { + return result_set; } - static void merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } + size_t attempts = 0; + size_t range_length = query->upper_idx - query->lower_idx; + do { + attempts++; + size_t idx = + (range_length > 0) + ? gsl_rng_uniform_int(query->global_parms.rng, range_length) + : 0; + result_set.emplace_back(*shard->get_record_at(query->lower_idx + idx)); + } while (attempts < sample_sz); + + return result_set; + } + + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { + std::vector<LocalResultType> result; + result.reserve(query->sample_size); + + if constexpr (REJECTION) { + for (size_t i = 0; i < query->sample_size; i++) { + auto idx = gsl_rng_uniform_int(query->global_parms.rng, query->cutoff); + auto rec = query->buffer->get(idx); + + if (rec->rec.key >= query->global_parms.lower_bound && + rec->rec.key <= query->global_parms.upper_bound) { + result.emplace_back(*rec); } - } + } - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; + return result; } - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; + for (size_t i = 0; i < query->sample_size; i++) { + auto idx = + gsl_rng_uniform_int(query->global_parms.rng, query->records.size()); + result.emplace_back(query->records[idx]); } - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - auto p = (Parms<R> *) parms; - - if (results.size() < p->sample_size) { - auto q = *p; - q.sample_size -= results.size(); - process_query_states(&q, states, buffer_state); - return true; - } + return result; + } - return false; + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + for (size_t i = 0; i < local_results.size(); i++) { + for (size_t j = 0; j < local_results[i].size(); j++) { + output.emplace_back(local_results[i][j].rec); + } } + } + + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + if (output.size() < parms->sample_size) { + parms->sample_size -= output.size(); + distribute_query(parms, local_queries, buffer_query); + return true; + } + + return false; + } }; -}} +} // namespace irs +} // namespace de diff --git a/include/query/knn.h b/include/query/knn.h index a227293..87ea10a 100644 --- a/include/query/knn.h +++ b/include/query/knn.h @@ -6,7 +6,7 @@ * Distributed under the Modified BSD License. * * A query class for k-NN queries, designed for use with the VPTree - * shard. + * shard. * * FIXME: no support for tombstone deletes just yet. This would require a * query resumption mechanism, most likely. @@ -16,147 +16,147 @@ #include "framework/QueryRequirements.h" #include "psu-ds/PriorityQueue.h" -namespace de { namespace knn { +namespace de { +namespace knn { using psudb::PriorityQueue; -template <NDRecordInterface R> -struct Parms { +template <ShardInterface S> class Query { + typedef typename S::RECORD R; + +public: + struct Parameters { R point; size_t k; -}; + }; -template <NDRecordInterface R> -struct State { - size_t k; -}; + struct LocalQuery { + Parameters global_parms; + }; -template <NDRecordInterface R> -struct BufferState { + struct LocalQueryBuffer { BufferView<R> *buffer; + Parameters global_parms; + }; - BufferState(BufferView<R> *buffer) - : buffer(buffer) {} -}; + typedef Wrapped<R> LocalResultType; + typedef R ResultType; + constexpr static bool EARLY_ABORT = false; + constexpr static bool SKIP_DELETE_FILTER = true; -template <NDRecordInterface R, ShardInterface<R> S> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); + query->global_parms = *parms; - static void *get_query_state(S *shard, void *parms) { - return nullptr; - } + return query; + } - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - return new BufferState<R>(buffer); - } + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); + query->global_parms = *parms; + query->buffer = buffer; - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { - return; - } + return query; + } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - std::vector<Wrapped<R>> results; - Parms<R> *p = (Parms<R> *) parms; - Wrapped<R> wrec; - wrec.rec = p->point; - wrec.header = 0; + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return; + } - PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(p->k, &wrec); + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + std::vector<LocalResultType> results; - shard->search(p->point, p->k, pq); + Wrapped<R> wrec; + wrec.rec = query->global_parms.point; + wrec.header = 0; - while (pq.size() > 0) { - results.emplace_back(*pq.peek().data); - pq.pop(); - } + PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(query->global_parms.k, + &wrec); - return results; + shard->search(query->global_parms.point, query->global_parms.k, pq); + + while (pq.size() > 0) { + results.emplace_back(*pq.peek().data); + pq.pop(); } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - Parms<R> *p = (Parms<R> *) parms; - BufferState<R> *s = (BufferState<R> *) state; - Wrapped<R> wrec; - wrec.rec = p->point; - wrec.header = 0; - - size_t k = p->k; - - PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(k, &wrec); - for (size_t i=0; i<s->buffer->get_record_count(); i++) { - // Skip over deleted records (under tagging) - if (s->buffer->get(i)->is_deleted()) { - continue; - } - - if (pq.size() < k) { - pq.push(s->buffer->get(i)); - } else { - double head_dist = pq.peek().data->rec.calc_distance(wrec.rec); - double cur_dist = (s->buffer->get(i))->rec.calc_distance(wrec.rec); - - if (cur_dist < head_dist) { - pq.pop(); - pq.push(s->buffer->get(i)); - } - } - } + return results; + } - std::vector<Wrapped<R>> results; - while (pq.size() > 0) { - results.emplace_back(*(pq.peek().data)); - pq.pop(); - } + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { - return std::move(results); - } + std::vector<LocalResultType> results; - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - Parms<R> *p = (Parms<R> *) parms; - R rec = p->point; - size_t k = p->k; - - PriorityQueue<R, DistCmpMax<R>> pq(k, &rec); - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - if (pq.size() < k) { - pq.push(&results[i][j].rec); - } else { - double head_dist = pq.peek().data->calc_distance(rec); - double cur_dist = results[i][j].rec.calc_distance(rec); - - if (cur_dist < head_dist) { - pq.pop(); - pq.push(&results[i][j].rec); - } - } - } - } + Wrapped<R> wrec; + wrec.rec = query->global_parms.point; + wrec.header = 0; - while (pq.size() > 0) { - output.emplace_back(*pq.peek().data); - pq.pop(); - } + PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(query->global_parms.k, + &wrec); + + for (size_t i = 0; i < query->buffer->get_record_count(); i++) { + // Skip over deleted records (under tagging) + if (query->buffer->get(i)->is_deleted()) { + continue; + } - return std::move(output); + if (pq.size() < query->global_parms.k) { + pq.push(query->buffer->get(i)); + } else { + double head_dist = pq.peek().data->rec.calc_distance(wrec.rec); + double cur_dist = (query->buffer->get(i))->rec.calc_distance(wrec.rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(query->buffer->get(i)); + } + } } - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; + while (pq.size() > 0) { + results.emplace_back(*(pq.peek().data)); + pq.pop(); } - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; + return std::move(results); + } + + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + + PriorityQueue<R, DistCmpMax<R>> pq(parms->k, &(parms->point)); + for (size_t i = 0; i < local_results.size(); i++) { + for (size_t j = 0; j < local_results[i].size(); j++) { + if (pq.size() < parms->k) { + pq.push(&local_results[i][j].rec); + } else { + double head_dist = pq.peek().data->calc_distance(parms->point); + double cur_dist = local_results[i][j].rec.calc_distance(parms->point); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&local_results[i][j].rec); + } + } + } } - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - return false; + while (pq.size() > 0) { + output.emplace_back(*pq.peek().data); + pq.pop(); } -}; + } -}} + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return false; + } +}; +} // namespace knn +} // namespace de diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h index 94c2bce..f3788de 100644 --- a/include/query/pointlookup.h +++ b/include/query/pointlookup.h @@ -18,106 +18,102 @@ #include "framework/QueryRequirements.h" -namespace de { namespace pl { +namespace de { +namespace pl { -template <RecordInterface R> -struct Parms { - decltype(R::key) search_key; -}; +template <ShardInterface S> class Query { + typedef typename S::RECORD R; -template <RecordInterface R> -struct State { -}; - -template <RecordInterface R> -struct BufferState { - BufferView<R> *buffer; - - BufferState(BufferView<R> *buffer) - : buffer(buffer) {} -}; - -template <KVPInterface R, ShardInterface<R> S> -class Query { public: - constexpr static bool EARLY_ABORT=true; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(S *shard, void *parms) { - return nullptr; - } - - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - auto res = new BufferState<R>(buffer); + struct Parameters { + decltype(R::key) search_key; + }; - return res; - } + struct LocalQuery { + Parameters global_parms; + }; - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { - return; + struct LocalQueryBuffer { + BufferView<R> *buffer; + Parameters global_parms; + }; + + typedef Wrapped<R> LocalResultType; + typedef R ResultType; + + constexpr static bool EARLY_ABORT = true; + constexpr static bool SKIP_DELETE_FILTER = true; + + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); + query->global_parms = *parms; + return query; + } + + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); + query->buffer = buffer; + query->global_parms = *parms; + + return query; + } + + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return; + } + + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + std::vector<LocalResultType> result; + + auto r = shard->point_lookup({query->global_parms.search_key, 0}); + + if (r) { + result.push_back(*r); } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - auto p = (Parms<R> *) parms; - auto s = (State<R> *) q_state; - - std::vector<Wrapped<R>> result; - - auto r = shard->point_lookup({p->search_key, 0}); + return result; + } + + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { + std::vector<LocalResultType> result; - if (r) { - result.push_back(*r); - } + for (size_t i = 0; i < query->buffer->get_record_count(); i++) { + auto rec = query->buffer->get(i); + if (rec->rec.key == query->global_parms.search_key) { + result.push_back(*rec); return result; + } } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto p = (Parms<R> *) parms; - auto s = (BufferState<R> *) state; - - std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->buffer->get_record_count(); i++) { - auto rec = s->buffer->get(i); - - if (rec->rec.key == p->search_key) { - records.push_back(*rec); - return records; - } + return result; + } + + + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + for (auto r : local_results) { + if (r.size() > 0) { + if (r[0].is_deleted() || r[0].is_tombstone()) { + return; } - return records; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - for (auto r : results) { - if (r.size() > 0) { - if (r[0].is_deleted() || r[0].is_tombstone()) { - return output; - } - - output.push_back(r[0].rec); - return output; - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; - } - - - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - return false; + output.push_back(r[0].rec); + return; + } } + } + + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return false; + } }; - -}} +} // namespace pl +} // namespace de diff --git a/include/query/rangecount.h b/include/query/rangecount.h index 5b95cdd..68d304d 100644 --- a/include/query/rangecount.h +++ b/include/query/rangecount.h @@ -5,169 +5,168 @@ * * Distributed under the Modified BSD License. * - * A query class for single dimensional range count queries. This query - * requires that the shard support get_lower_bound(key) and + * A query class for single dimensional range count queries. This query + * requires that the shard support get_lower_bound(key) and * get_record_at(index). */ #pragma once #include "framework/QueryRequirements.h" -namespace de { namespace rc { +namespace de { +namespace rc { -template <RecordInterface R> -struct Parms { +template <ShardInterface S, bool FORCE_SCAN = true> class Query { + typedef typename S::RECORD R; + +public: + struct Parameters { decltype(R::key) lower_bound; decltype(R::key) upper_bound; -}; + }; -template <RecordInterface R> -struct State { + struct LocalQuery { size_t start_idx; size_t stop_idx; -}; + Parameters global_parms; + }; -template <RecordInterface R> -struct BufferState { + struct LocalQueryBuffer { BufferView<R> *buffer; - - BufferState(BufferView<R> *buffer) - : buffer(buffer) {} -}; - -template <KVPInterface R, ShardInterface<R> S, bool FORCE_SCAN=false> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(S *shard, void *parms) { - return nullptr; - } - - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - auto res = new BufferState<R>(buffer); - - return res; + Parameters global_parms; + }; + + struct LocalResultType { + size_t record_count; + size_t tombstone_count; + + bool is_deleted() {return false;} + bool is_tombstone() {return false;} + }; + + typedef size_t ResultType; + constexpr static bool EARLY_ABORT = false; + constexpr static bool SKIP_DELETE_FILTER = true; + + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); + + query->start_idx = shard->get_lower_bound(parms->lower_bound); + query->stop_idx = shard->get_record_count(); + query->global_parms.lower_bound = parms->lower_bound; + query->global_parms.upper_bound = parms->upper_bound; + + return query; + } + + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); + query->buffer = buffer; + query->global_parms.lower_bound = parms->lower_bound; + query->global_parms.upper_bound = parms->upper_bound; + + return query; + } + + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return; + } + + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + std::vector<LocalResultType> result; + + /* + * if the returned index is one past the end of the + * records for the PGM, then there are not records + * in the index falling into the specified range. + */ + if (query->start_idx == shard->get_record_count()) { + return result; } - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { - return; + auto ptr = shard->get_record_at(query->start_idx); + size_t reccnt = 0; + size_t tscnt = 0; + + /* + * roll the pointer forward to the first record that is + * greater than or equal to the lower bound. + */ + while (ptr < shard->get_data() + query->stop_idx && + ptr->rec.key < query->global_parms.lower_bound) { + ptr++; } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - std::vector<Wrapped<R>> records; - auto p = (Parms<R> *) parms; - auto s = (State<R> *) q_state; - - size_t reccnt = 0; - size_t tscnt = 0; - - Wrapped<R> res; - res.rec.key= 0; // records - res.rec.value = 0; // tombstones - records.emplace_back(res); - - - auto start_idx = shard->get_lower_bound(p->lower_bound); - auto stop_idx = shard->get_lower_bound(p->upper_bound); + while (ptr < shard->get_data() + query->stop_idx && + ptr->rec.key <= query->global_parms.upper_bound) { - /* - * if the returned index is one past the end of the - * records for the PGM, then there are not records - * in the index falling into the specified range. - */ - if (start_idx == shard->get_record_count()) { - return records; - } - - - /* - * roll the pointer forward to the first record that is - * greater than or equal to the lower bound. - */ - auto recs = shard->get_data(); - while(start_idx < stop_idx && recs[start_idx].rec.key < p->lower_bound) { - start_idx++; - } - - while (stop_idx < shard->get_record_count() && recs[stop_idx].rec.key <= p->upper_bound) { - stop_idx++; - } - size_t idx = start_idx; - size_t ts_cnt = 0; + if (!ptr->is_deleted()) { + reccnt++; - while (idx < stop_idx) { - ts_cnt += recs[idx].is_tombstone() * 2 + recs[idx].is_deleted(); - idx++; + if (ptr->is_tombstone()) { + tscnt++; } + } - records[0].rec.key = idx - start_idx; - records[0].rec.value = ts_cnt; - - return records; + ptr++; } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto p = (Parms<R> *) parms; - auto s = (BufferState<R> *) state; - - std::vector<Wrapped<R>> records; - - Wrapped<R> res; - res.rec.key= 0; // records - res.rec.value = 0; // tombstones - records.emplace_back(res); - - size_t stop_idx; - if constexpr (FORCE_SCAN) { - stop_idx = s->buffer->get_capacity() / 2; - } else { - stop_idx = s->buffer->get_record_count(); - } - - for (size_t i=0; i<s->buffer->get_record_count(); i++) { - auto rec = s->buffer->get(i); - - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound - && !rec->is_deleted()) { - if (rec->is_tombstone()) { - records[0].rec.value++; - } else { - records[0].rec.key++; - } - } + result.push_back({reccnt, tscnt}); + return result; + } + + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { + + std::vector<LocalResultType> result; + size_t reccnt = 0; + size_t tscnt = 0; + for (size_t i = 0; i < query->buffer->get_record_count(); i++) { + auto rec = query->buffer->get(i); + if (rec->rec.key >= query->global_parms.lower_bound && + rec->rec.key <= query->global_parms.upper_bound) { + if (!rec->is_deleted()) { + reccnt++; + if (rec->is_tombstone()) { + tscnt++; + } } - - return records; + } } - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - R res; - res.key = 0; - res.value = 0; - output.emplace_back(res); + result.push_back({reccnt, tscnt}); - for (size_t i=0; i<results.size(); i++) { - output[0].key += results[i][0].rec.key; // records - output[0].value += results[i][0].rec.value; // tombstones - } + return result; + } - output[0].key -= output[0].value; - return output; - } + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + size_t reccnt = 0; + size_t tscnt = 0; - static void delete_query_state(void *state) { + for (auto &local_result : local_results) { + reccnt += local_result[0].record_count; + tscnt += local_result[0].tombstone_count; } - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; + /* if more tombstones than results, clamp the output at 0 */ + if (tscnt > reccnt) { + tscnt = reccnt; } - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - return false; - } + output.push_back({reccnt - tscnt}); + } + + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return false; + } }; -}} +} // namespace rc +} // namespace de diff --git a/include/query/rangequery.h b/include/query/rangequery.h index e0690e6..e7be39c 100644 --- a/include/query/rangequery.h +++ b/include/query/rangequery.h @@ -1,177 +1,186 @@ /* * include/query/rangequery.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * - * A query class for single dimensional range queries. This query requires + * A query class for single dimensional range queries. This query requires * that the shard support get_lower_bound(key) and get_record_at(index). */ #pragma once #include "framework/QueryRequirements.h" +#include "framework/interface/Record.h" #include "psu-ds/PriorityQueue.h" #include "util/Cursor.h" -namespace de { namespace rq { +namespace de { +namespace rq { -template <RecordInterface R> -struct Parms { +template <ShardInterface S> class Query { + typedef typename S::RECORD R; + +public: + struct Parameters { decltype(R::key) lower_bound; decltype(R::key) upper_bound; -}; + }; -template <RecordInterface R> -struct State { + struct LocalQuery { size_t start_idx; size_t stop_idx; -}; + Parameters global_parms; + }; -template <RecordInterface R> -struct BufferState { + struct LocalQueryBuffer { BufferView<R> *buffer; - - BufferState(BufferView<R> *buffer) - : buffer(buffer) {} -}; - -template <RecordInterface R, ShardInterface<R> S> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(S *shard, void *parms) { - auto res = new State<R>(); - auto p = (Parms<R> *) parms; - - res->start_idx = shard->get_lower_bound(p->lower_bound); - res->stop_idx = shard->get_record_count(); - - return res; + Parameters global_parms; + }; + + typedef Wrapped<R> LocalResultType; + typedef R ResultType; + + constexpr static bool EARLY_ABORT = false; + constexpr static bool SKIP_DELETE_FILTER = true; + + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); + + query->start_idx = shard->get_lower_bound(parms->lower_bound); + query->stop_idx = shard->get_record_count(); + query->global_parms = *parms; + + return query; + } + + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); + query->buffer = buffer; + query->global_parms = *parms; + + return query; + } + + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return; + } + + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + std::vector<LocalResultType> result; + + /* + * if the returned index is one past the end of the + * records for the PGM, then there are not records + * in the index falling into the specified range. + */ + if (query->start_idx == shard->get_record_count()) { + return result; } - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - auto res = new BufferState<R>(buffer); + auto ptr = shard->get_record_at(query->start_idx); - return res; + /* + * roll the pointer forward to the first record that is + * greater than or equal to the lower bound. + */ + while (ptr < shard->get_data() + query->stop_idx && + ptr->rec.key < query->global_parms.lower_bound) { + ptr++; } - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { - return; + while (ptr < shard->get_data() + query->stop_idx && + ptr->rec.key <= query->global_parms.upper_bound) { + result.emplace_back(*ptr); + ptr++; } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - std::vector<Wrapped<R>> records; - auto p = (Parms<R> *) parms; - auto s = (State<R> *) q_state; - - /* - * if the returned index is one past the end of the - * records for the PGM, then there are not records - * in the index falling into the specified range. - */ - if (s->start_idx == shard->get_record_count()) { - return records; - } - - auto ptr = shard->get_record_at(s->start_idx); - - /* - * roll the pointer forward to the first record that is - * greater than or equal to the lower bound. - */ - while(ptr < shard->get_data() + s->stop_idx && ptr->rec.key < p->lower_bound) { - ptr++; - } - - while (ptr < shard->get_data() + s->stop_idx && ptr->rec.key <= p->upper_bound) { - records.emplace_back(*ptr); - ptr++; - } - - return records; - } + return result; + } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto p = (Parms<R> *) parms; - auto s = (BufferState<R> *) state; + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { - std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->buffer->get_record_count(); i++) { - auto rec = s->buffer->get(i); - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - records.emplace_back(*rec); - } - } - - return records; + std::vector<LocalResultType> result; + for (size_t i = 0; i < query->buffer->get_record_count(); i++) { + auto rec = query->buffer->get(i); + if (rec->rec.key >= query->global_parms.lower_bound && + rec->rec.key <= query->global_parms.upper_bound) { + result.emplace_back(*rec); + } } - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(results.size()); - - psudb::PriorityQueue<Wrapped<R>> pq(results.size()); - size_t total = 0; - size_t tmp_n = results.size(); - - - for (size_t i = 0; i < tmp_n; ++i) - if (results[i].size() > 0){ - auto base = results[i].data(); - cursors.emplace_back(Cursor<Wrapped<R>>{base, base + results[i].size(), 0, results[i].size()}); - assert(i == cursors.size() - 1); - total += results[i].size(); - pq.push(cursors[i].ptr, tmp_n - i - 1); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - - if (total == 0) { - return std::vector<R>(); - } - - output.reserve(total); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : psudb::queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[tmp_n - now.version - 1]; - auto& cursor2 = cursors[tmp_n - next.version - 1]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[tmp_n - now.version - 1]; - if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); - - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - return output; + return result; + } + + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + std::vector<Cursor<LocalResultType>> cursors; + cursors.reserve(local_results.size()); + + psudb::PriorityQueue<LocalResultType> pq(local_results.size()); + size_t total = 0; + size_t tmp_n = local_results.size(); + + for (size_t i = 0; i < tmp_n; ++i) + if (local_results[i].size() > 0) { + auto base = local_results[i].data(); + cursors.emplace_back(Cursor<LocalResultType>{ + base, base + local_results[i].size(), 0, local_results[i].size()}); + assert(i == cursors.size() - 1); + total += local_results[i].size(); + pq.push(cursors[i].ptr, tmp_n - i - 1); + } else { + cursors.emplace_back(Cursor<LocalResultType>{nullptr, nullptr, 0, 0}); + } + + if (total == 0) { + return; } - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; + output.reserve(total); + + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 + ? pq.peek(1) + : psudb::queue_record<LocalResultType>{nullptr, 0}; + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); + pq.pop(); + auto &cursor1 = cursors[tmp_n - now.version - 1]; + auto &cursor2 = cursors[tmp_n - next.version - 1]; + if (advance_cursor<LocalResultType>(cursor1)) + pq.push(cursor1.ptr, now.version); + if (advance_cursor<LocalResultType>(cursor2)) + pq.push(cursor2.ptr, next.version); + } else { + auto &cursor = cursors[tmp_n - now.version - 1]; + if (!now.data->is_tombstone()) + output.push_back(cursor.ptr->rec); + + pq.pop(); + + if (advance_cursor<LocalResultType>(cursor)) + pq.push(cursor.ptr, now.version); + } } - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; - } + return; + } - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - return false; - } + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + return false; + } }; -}} +} // namespace rq +} // namespace de diff --git a/include/query/wirs.h b/include/query/wirs.h deleted file mode 100644 index 62b43f6..0000000 --- a/include/query/wirs.h +++ /dev/null @@ -1,251 +0,0 @@ -/* - * include/query/wirs.h - * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * - * Distributed under the Modified BSD License. - * - * A query class for weighted independent range sampling. This - * class is tightly coupled with include/shard/AugBTree.h, and - * so is probably of limited general utility. - */ -#pragma once - -#include "framework/QueryRequirements.h" -#include "psu-ds/Alias.h" - -namespace de { namespace wirs { - -template <WeightedRecordInterface R> -struct Parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; - size_t sample_size; - gsl_rng *rng; -}; - -template <WeightedRecordInterface R> -struct State { - decltype(R::weight) total_weight; - std::vector<void*> nodes; - psudb::Alias* top_level_alias; - size_t sample_size; - - State() { - total_weight = 0; - top_level_alias = nullptr; - } - - ~State() { - if (top_level_alias) delete top_level_alias; - } -}; - -template <RecordInterface R> -struct BufferState { - size_t cutoff; - psudb::Alias* alias; - std::vector<Wrapped<R>> records; - decltype(R::weight) max_weight; - size_t sample_size; - decltype(R::weight) total_weight; - BufferView<R> *buffer; - - ~BufferState() { - delete alias; - } -}; - -template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(S *shard, void *parms) { - auto res = new State<R>(); - decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound; - decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound; - - std::vector<decltype(R::weight)> weights; - res->total_weight = shard->find_covering_nodes(lower_key, upper_key, res->nodes, weights); - - std::vector<double> normalized_weights; - for (auto weight : weights) { - normalized_weights.emplace_back(weight / res->total_weight); - } - - res->top_level_alias = new psudb::Alias(normalized_weights); - res->sample_size = 0; - - return res; - } - - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - BufferState<R> *state = new BufferState<R>(); - auto parameters = (Parms<R>*) parms; - - if constexpr (Rejection) { - state->cutoff = buffer->get_record_count() - 1; - state->max_weight = buffer->get_max_weight(); - state->total_weight = buffer->get_total_weight(); - state->sample_size = 0; - state->buffer = buffer; - return state; - } - - std::vector<decltype(R::weight)> weights; - - state->buffer = buffer; - decltype(R::weight) total_weight = 0; - - for (size_t i = 0; i <= buffer->get_record_count(); i++) { - auto rec = buffer->get(i); - - if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) { - weights.push_back(rec->rec.weight); - state->records.push_back(*rec); - total_weight += rec->rec.weight; - } - } - - std::vector<double> normalized_weights; - for (size_t i = 0; i < weights.size(); i++) { - normalized_weights.push_back(weights[i] / total_weight); - } - - state->total_weight = total_weight; - state->alias = new psudb::Alias(normalized_weights); - state->sample_size = 0; - - return state; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) { - auto p = (Parms<R> *) query_parms; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0); - size_t buffer_sz = 0; - - std::vector<decltype(R::weight)> weights; - - decltype(R::weight) total_weight = 0; - for (auto &s : buffer_states) { - auto bs = (BufferState<R> *) s; - total_weight += bs->total_weight; - weights.push_back(bs->total_weight); - } - - for (auto &s : shard_states) { - auto state = (State<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = psudb::Alias(normalized_weights); - for (size_t i=0; i<p->sample_size; i++) { - auto idx = shard_alias.get(p->rng); - - if (idx < buffer_states.size()) { - auto state = (BufferState<R> *) buffer_states[idx]; - state->sample_size++; - } else { - auto state = (State<R> *) shard_states[idx - buffer_states.size()]; - state->sample_size++; - } - } - } - - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - auto lower_key = ((Parms<R> *) parms)->lower_bound; - auto upper_key = ((Parms<R> *) parms)->upper_bound; - auto rng = ((Parms<R> *) parms)->rng; - - auto state = (State<R> *) q_state; - auto sample_size = state->sample_size; - - std::vector<Wrapped<R>> result_set; - - if (sample_size == 0) { - return result_set; - } - size_t cnt = 0; - size_t attempts = 0; - - for (size_t i=0; i<sample_size; i++) { - auto rec = shard->get_weighted_sample(lower_key, upper_key, - state->nodes[state->top_level_alias->get(rng)], - rng); - if (rec) { - result_set.emplace_back(*rec); - } - } - - return result_set; - } - - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto st = (BufferState<R> *) state; - auto p = (Parms<R> *) parms; - auto buffer = st->buffer; - - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); - - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get(idx); - - auto test = gsl_rng_uniform(p->rng) * st->max_weight; - - if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - result.emplace_back(*rec); - } - } - return result; - } - - for (size_t i=0; i<st->sample_size; i++) { - auto idx = st->alias->get(p->rng); - result.emplace_back(st->records[idx]); - } - - return result; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; - } - - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - auto p = (Parms<R> *) parms; - - if (results.size() < p->sample_size) { - return true; - } - return false; - } -}; -}} diff --git a/include/query/wss.h b/include/query/wss.h index fb0b414..54620ca 100644 --- a/include/query/wss.h +++ b/include/query/wss.h @@ -6,7 +6,7 @@ * Distributed under the Modified BSD License. * * A query class for weighted set sampling. This - * class is tightly coupled with include/shard/Alias.h, + * class is tightly coupled with include/shard/Alias.h, * and so is probably of limited general utility. */ #pragma once @@ -14,203 +14,177 @@ #include "framework/QueryRequirements.h" #include "psu-ds/Alias.h" -namespace de { namespace wss { +namespace de { +namespace wss { -template <WeightedRecordInterface R> -struct Parms { +template <ShardInterface S> class Query { + typedef typename S::RECORD R; + +public: + struct Parameters { size_t sample_size; gsl_rng *rng; -}; + }; -template <WeightedRecordInterface R> -struct State { - decltype(R::weight) total_weight; + struct LocalQuery { size_t sample_size; + decltype(R::weight) total_weight; - State() { - total_weight = 0; - } -}; + Parameters global_parms; + }; + + struct LocalQueryBuffer { + BufferView<R> *buffer; -template <RecordInterface R> -struct BufferState { - size_t cutoff; size_t sample_size; - psudb::Alias *alias; - decltype(R::weight) max_weight; decltype(R::weight) total_weight; - BufferView<R> *buffer; + decltype(R::weight) max_weight; + size_t cutoff; - ~BufferState() { - delete alias; - } -}; + std::unique_ptr<psudb::Alias> alias; -template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> -class Query { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; + Parameters global_parms; + }; - static void *get_query_state(S *shard, void *parms) { - auto res = new State<R>(); - res->total_weight = shard->get_total_weight(); - res->sample_size = 0; + constexpr static bool EARLY_ABORT = false; + constexpr static bool SKIP_DELETE_FILTER = false; - return res; - } + typedef Wrapped<R> LocalResultType; + typedef R ResultType; - static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { - BufferState<R> *state = new BufferState<R>(); - auto parameters = (Parms<R>*) parms; - if constexpr (Rejection) { - state->cutoff = buffer->get_record_count() - 1; - state->max_weight = buffer->get_max_weight(); - state->total_weight = buffer->get_total_weight(); - state->buffer = buffer; - return state; - } + static LocalQuery *local_preproc(S *shard, Parameters *parms) { + auto query = new LocalQuery(); - std::vector<double> weights; + query->global_parms = *parms; + query->total_weight = shard->get_total_weight(); + query->sample_size = 0; - double total_weight = 0.0; - state->buffer = buffer; + return query; + } - for (size_t i = 0; i <= buffer->get_record_count(); i++) { - auto rec = buffer->get_data(i); - weights.push_back(rec->rec.weight); - total_weight += rec->rec.weight; - } + static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer, + Parameters *parms) { + auto query = new LocalQueryBuffer(); - for (size_t i = 0; i < weights.size(); i++) { - weights[i] = weights[i] / total_weight; - } + query->cutoff = buffer->get_record_count() - 1; - state->alias = new psudb::Alias(weights); - state->total_weight = total_weight; + query->max_weight = 0; + query->total_weight = 0; - return state; - } + for (size_t i = 0; i < buffer->get_record_count(); i++) { + auto weight = buffer->get(i)->rec.weight; + query->total_weight += weight; - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) { - auto p = (Parms<R> *) query_parms; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0); - size_t buffer_sz = 0; - - std::vector<decltype(R::weight)> weights; - - decltype(R::weight) total_weight = 0; - for (auto &s : buffer_states) { - auto bs = (BufferState<R> *) s; - total_weight += bs->total_weight; - weights.push_back(bs->total_weight); - } - - for (auto &s : shard_states) { - auto state = (State<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = psudb::Alias(normalized_weights); - for (size_t i=0; i<p->sample_size; i++) { - auto idx = shard_alias.get(p->rng); - - if (idx < buffer_states.size()) { - auto state = (BufferState<R> *) buffer_states[idx]; - state->sample_size++; - } else { - auto state = (State<R> *) shard_states[idx - buffer_states.size()]; - state->sample_size++; - } - } + if (weight > query->max_weight) { + query->max_weight = weight; + } } - static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { - auto rng = ((Parms<R> *) parms)->rng; + query->buffer = buffer; + query->global_parms = *parms; - auto state = (State<R> *) q_state; - auto sample_size = state->sample_size; + query->alias = nullptr; - std::vector<Wrapped<R>> result_set; + return query; + } - if (sample_size == 0) { - return result_set; - } - size_t attempts = 0; - do { - attempts++; - size_t idx = shard->get_weighted_sample(rng); - result_set.emplace_back(*shard->get_record_at(idx)); - } while (attempts < sample_size); + static void distribute_query(Parameters *parms, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { - return result_set; + if (!buffer_query) { + assert(local_queries.size() == 1); + local_queries[0]->sample_size = + local_queries[0]->global_parms.sample_size; + return; } - static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { - auto st = (BufferState<R> *) state; - auto p = (Parms<R> *) parms; - auto buffer = st->buffer; + if (!buffer_query->alias) { + std::vector<decltype(R::weight)> weights; - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); + decltype(R::weight) total_weight = buffer_query->total_weight; + weights.push_back(total_weight); - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get(idx); + for (auto &q : local_queries) { + total_weight += q->total_weight; + weights.push_back(q->total_weight); + q->sample_size = 0; + } - auto test = gsl_rng_uniform(p->rng) * st->max_weight; + std::vector<double> normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double)w / (double)total_weight); + } - if (test <= rec->rec.weight) { - result.emplace_back(*rec); - } - } - return result; - } + buffer_query->alias = std::make_unique<psudb::Alias>(normalized_weights); + } - for (size_t i=0; i<st->sample_size; i++) { - auto idx = st->alias->get(p->rng); - result.emplace_back(*(buffer->get_data() + idx)); - } + for (size_t i = 0; i < parms->sample_size; i++) { + auto idx = buffer_query->alias->get(parms->rng); - return result; + if (idx == 0) { + buffer_query->sample_size++; + } else { + local_queries[idx - 1]->sample_size++; + } } + } - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) { - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } - } + static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) { + std::vector<LocalResultType> result; - return output; + if (query->sample_size == 0) { + return result; } - static void delete_query_state(void *state) { - auto s = (State<R> *) state; - delete s; + for (size_t i = 0; i < query->sample_size; i++) { + size_t idx = shard->get_weighted_sample(query->global_parms.rng); + if (!shard->get_record_at(idx)->is_deleted()) { + result.emplace_back(*shard->get_record_at(idx)); + } } - static void delete_buffer_query_state(void *state) { - auto s = (BufferState<R> *) state; - delete s; + return result; + } + + static std::vector<LocalResultType> + local_query_buffer(LocalQueryBuffer *query) { + std::vector<LocalResultType> result; + + for (size_t i = 0; i < query->sample_size; i++) { + auto idx = gsl_rng_uniform_int(query->global_parms.rng, query->cutoff); + auto rec = query->buffer->get(idx); + + auto test = gsl_rng_uniform(query->global_parms.rng) * query->max_weight; + if (test <= rec->rec.weight && !rec->is_deleted()) { + result.emplace_back(*rec); + } } - static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) { - auto p = (Parms<R> *) parms; + return result; + } - if (results.size() < p->sample_size) { - return true; - } - return false; + static void + combine(std::vector<std::vector<LocalResultType>> const &local_results, + Parameters *parms, std::vector<ResultType> &output) { + for (size_t i = 0; i < local_results.size(); i++) { + for (size_t j = 0; j < local_results[i].size(); j++) { + output.emplace_back(local_results[i][j].rec); + } + } + } + + static bool repeat(Parameters *parms, std::vector<ResultType> &output, + std::vector<LocalQuery *> const &local_queries, + LocalQueryBuffer *buffer_query) { + if (output.size() < parms->sample_size) { + parms->sample_size -= output.size(); + distribute_query(parms, local_queries, buffer_query); + return true; } -}; -}} + return false; + } +}; +} // namespace wss +} // namespace de diff --git a/include/shard/Alias.h b/include/shard/Alias.h index 72147d7..8fe70a5 100644 --- a/include/shard/Alias.h +++ b/include/shard/Alias.h @@ -25,21 +25,20 @@ using psudb::CACHELINE_SIZE; using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; using psudb::byte; namespace de { -static thread_local size_t wss_cancelations = 0; - template <WeightedRecordInterface R> class Alias { +public: + typedef R RECORD; private: typedef decltype(R::key) K; typedef decltype(R::value) V; typedef decltype(R::weight) W; + public: Alias(BufferView<R> buffer) : m_data(nullptr) @@ -71,7 +70,7 @@ public: } } - Alias(std::vector<Alias*> &shards) + Alias(std::vector<Alias*> const &shards) : m_data(nullptr) , m_alias(nullptr) , m_total_weight(0) @@ -167,7 +166,6 @@ public: size_t min = 0; size_t max = m_reccnt - 1; - const char * record_key; while (min < max) { size_t mid = (min + max) / 2; diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h deleted file mode 100644 index c60cbcd..0000000 --- a/include/shard/AugBTree.h +++ /dev/null @@ -1,311 +0,0 @@ -/* - * include/shard/AugBTree.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * Douglas B. Rumbaugh <drumbaugh@psu.edu> - * - * Distributed under the Modified BSD License. - * - * A shard shim around the alias augmented B-tree. Designed to be - * used along side the WIRS query in include/query/wirs.h, but - * also supports the necessary methods for other common query - * types. - * - * TODO: The code in this file is very poorly commented. - */ -#pragma once - - -#include <vector> -#include <cassert> - -#include "framework/ShardRequirements.h" - -#include "psu-ds/Alias.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "util/SortedMerge.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::Alias; -using psudb::byte; - -namespace de { - -template <WeightedRecordInterface R> -struct AugBTreeNode { - struct AugBTreeNode<R> *left, *right; - decltype(R::key) low, high; - decltype(R::weight) weight; - Alias* alias; -}; - -template <WeightedRecordInterface R> -class AugBTree { -private: - typedef decltype(R::key) K; - typedef decltype(R::value) V; - typedef decltype(R::weight) W; - -public: - AugBTree(BufferView<R> buffer) - : m_data(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_group_size(0) - , m_alloc_size(0) - , m_node_cnt(0) - , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) - { - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - buffer.get_record_count() * - sizeof(Wrapped<R>), - (byte**) &m_data); - - auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - AugBTree(std::vector<AugBTree*> shards) - : m_data(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_group_size(0) - , m_alloc_size(0) - , m_node_cnt(0) - , m_bf(nullptr) - { - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count); - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - attemp_reccnt * sizeof(Wrapped<R>), - (byte **) &m_data); - - auto res = sorted_array_merge<R>(cursors, m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - ~AugBTree() { - free(m_data); - for (size_t i=0; i<m_alias.size(); i++) { - delete m_alias[i]; - } - - delete m_bf; - free_tree(m_root); - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - size_t get_memory_usage() { - return m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>); - } - - size_t get_aux_memory_usage() { - return (m_bf) ? m_bf->memory_usage() : 0; - } - - size_t get_lower_bound(const K& key) const { - size_t min = 0; - size_t max = m_reccnt - 1; - - const char * record_key; - while (min < max) { - size_t mid = (min + max) / 2; - - if (key > m_data[mid].rec.key) { - min = mid + 1; - } else { - max = mid; - } - } - - return min; - } - - W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) { - W total_weight = 0; - - /* Simulate a stack to unfold recursion. */ - struct AugBTreeNode<R>* st[64] = {0}; - st[0] = m_root; - size_t top = 1; - while(top > 0) { - auto now = st[--top]; - if (covered_by(now, lower_key, upper_key) || - (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) { - nodes.emplace_back(now); - weights.emplace_back(now->weight); - total_weight += now->weight; - } else { - if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left; - if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right; - } - } - - - return total_weight; - } - - Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) { - /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */ - - /* first level */ - auto node = (AugBTreeNode<R>*) internal_node; - - /* second level */ - auto fat_point = node->low + node->alias->get(rng); - - /* third level */ - size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng); - auto record = m_data + rec_offset; - - /* bounds rejection */ - if (lower_key > record->rec.key || upper_key < record->rec.key) { - return nullptr; - } - - return record; - } - -private: - - bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key; - } - - bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key; - } - - void build_wirs_structure() { - m_group_size = std::ceil(std::log(m_reccnt)); - size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size); - - // Fat point construction + low level alias.... - double sum_weight = 0.0; - std::vector<W> weights; - std::vector<double> group_norm_weight; - size_t i = 0; - size_t group_no = 0; - while (i < m_reccnt) { - double group_weight = 0.0; - group_norm_weight.clear(); - for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) { - auto w = m_data[i].rec.weight; - group_norm_weight.emplace_back(w); - group_weight += w; - sum_weight += w; - } - - for (auto& w: group_norm_weight) - if (group_weight) w /= group_weight; - else w = 1.0 / group_norm_weight.size(); - m_alias.emplace_back(new Alias(group_norm_weight)); - - - weights.emplace_back(group_weight); - } - - assert(weights.size() == n_groups); - - m_root = construct_AugBTreeNode(weights, 0, n_groups-1); - } - - struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) { - if (low == high) { - return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})}; - } else if (low > high) return nullptr; - - std::vector<double> node_weights; - W sum = 0; - for (size_t i = low; i < high; ++i) { - node_weights.emplace_back(weights[i]); - sum += weights[i]; - } - - for (auto& w: node_weights) - if (sum) w /= sum; - else w = 1.0 / node_weights.size(); - - m_node_cnt += 1; - size_t mid = (low + high) / 2; - return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid), - construct_AugBTreeNode(weights, mid + 1, high), - low, high, sum, new Alias(node_weights)}; - } - - void free_tree(struct AugBTreeNode<R>* node) { - if (node) { - delete node->alias; - free_tree(node->left); - free_tree(node->right); - delete node; - } - } - - Wrapped<R>* m_data; - std::vector<Alias *> m_alias; - AugBTreeNode<R>* m_root; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_group_size; - size_t m_alloc_size; - size_t m_node_cnt; - BloomFilter<R> *m_bf; -}; -} diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h index 3783b38..4e51037 100644 --- a/include/shard/FSTrie.h +++ b/include/shard/FSTrie.h @@ -26,6 +26,8 @@ namespace de { template <KVPInterface R> class FSTrie { +public: + typedef R RECORD; private: typedef decltype(R::key) K; @@ -80,7 +82,7 @@ public: delete[] temp_buffer; } - FSTrie(std::vector<FSTrie*> &shards) + FSTrie(std::vector<FSTrie*> const &shards) : m_data(nullptr) , m_reccnt(0) , m_alloc_size(0) diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h index 1cca506..64c0b2b 100644 --- a/include/shard/ISAMTree.h +++ b/include/shard/ISAMTree.h @@ -1,8 +1,8 @@ /* * include/shard/ISAMTree.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -12,258 +12,246 @@ */ #pragma once -#include <vector> #include <cassert> +#include <vector> #include "framework/ShardRequirements.h" -#include "util/bf_config.h" #include "psu-ds/BloomFilter.h" #include "util/SortedMerge.h" +#include "util/bf_config.h" -using psudb::CACHELINE_SIZE; using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; using psudb::byte; +using psudb::CACHELINE_SIZE; namespace de { -template <KVPInterface R> -class ISAMTree { +template <KVPInterface R> class ISAMTree { private: + typedef decltype(R::key) K; + typedef decltype(R::value) V; -typedef decltype(R::key) K; -typedef decltype(R::value) V; - -constexpr static size_t NODE_SZ = 256; -constexpr static size_t INTERNAL_FANOUT = NODE_SZ / (sizeof(K) + sizeof(byte*)); + constexpr static size_t NODE_SZ = 256; + constexpr static size_t INTERNAL_FANOUT = + NODE_SZ / (sizeof(K) + sizeof(byte *)); -struct InternalNode { + struct InternalNode { K keys[INTERNAL_FANOUT]; - byte* child[INTERNAL_FANOUT]; -}; - -static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match"); + byte *child[INTERNAL_FANOUT]; + }; -constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R); + static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match"); + constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R); public: - ISAMTree(BufferView<R> buffer) - : m_bf(nullptr) - , m_isam_nodes(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_internal_node_cnt(0) - , m_deleted_cnt(0) - , m_alloc_size(0) - { - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - buffer.get_record_count() * - sizeof(Wrapped<R>), - (byte**) &m_data); - - auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_internal_levels(); - } + typedef R RECORD; + + ISAMTree(BufferView<R> buffer) + : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0), + m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0), + m_alloc_size(0) { + m_alloc_size = psudb::sf_aligned_alloc( + CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>), + (byte **)&m_data); + + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_internal_levels(); + } + } + + ISAMTree(std::vector<ISAMTree *> const &shards) + : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0), + m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0), + m_alloc_size(0) { + size_t attemp_reccnt = 0; + size_t tombstone_count = 0; + auto cursors = + build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count); + + m_bf = nullptr; + m_alloc_size = psudb::sf_aligned_alloc( + CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped<R>), (byte **)&m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_internal_levels(); } + } - ISAMTree(std::vector<ISAMTree*> &shards) - : m_bf(nullptr) - , m_isam_nodes(nullptr) - , m_root(nullptr) - , m_reccnt(0) - , m_tombstone_cnt(0) - , m_internal_node_cnt(0) - , m_deleted_cnt(0) - , m_alloc_size(0) - { - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count); - - m_bf = nullptr; - m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, - attemp_reccnt * sizeof(Wrapped<R>), - (byte **) &m_data); - - auto res = sorted_array_merge<R>(cursors, m_data, m_bf); - m_reccnt = res.record_count; - m_tombstone_cnt = res.tombstone_count; - - if (m_reccnt > 0) { - build_internal_levels(); - } + ~ISAMTree() { + free(m_data); + free(m_isam_nodes); + delete m_bf; + } + + Wrapped<R> *point_lookup(const R &rec, bool filter = false) { + if (filter && !m_bf->lookup(rec)) { + return nullptr; } - ~ISAMTree() { - free(m_data); - free(m_isam_nodes); - delete m_bf; + size_t idx = get_lower_bound(rec.key); + if (idx >= m_reccnt) { + return nullptr; } - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } + while (idx < m_reccnt && m_data[idx].rec < rec) + ++idx; - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } + if (m_data[idx].rec == rec) { + return m_data + idx; + } - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; + return nullptr; + } - if (m_data[idx].rec == rec) { - return m_data + idx; - } + Wrapped<R> *get_data() const { return m_data; } - return nullptr; - } + size_t get_record_count() const { return m_reccnt; } - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } + size_t get_tombstone_count() const { return m_tombstone_cnt; } - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } + size_t get_memory_usage() const { return m_internal_node_cnt * NODE_SZ; } + size_t get_aux_memory_usage() const { return (m_bf) ? m_bf->memory_usage() : 0; } - size_t get_memory_usage() { - return m_internal_node_cnt * NODE_SZ; - } + /* SortedShardInterface methods */ + size_t get_lower_bound(const K &key) const { + const InternalNode *now = m_root; + while (!is_leaf(reinterpret_cast<const byte *>(now))) { + const InternalNode *next = nullptr; + for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { + if (now->child[i + 1] == nullptr || key <= now->keys[i]) { + next = reinterpret_cast<InternalNode *>(now->child[i]); + break; + } + } - size_t get_aux_memory_usage() { - return (m_bf) ? m_bf->memory_usage() : 0; + now = next ? next + : reinterpret_cast<const InternalNode *>( + now->child[INTERNAL_FANOUT - 1]); } - /* SortedShardInterface methods */ - size_t get_lower_bound(const K& key) const { - const InternalNode* now = m_root; - while (!is_leaf(reinterpret_cast<const byte*>(now))) { - const InternalNode* next = nullptr; - for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { - if (now->child[i + 1] == nullptr || key <= now->keys[i]) { - next = reinterpret_cast<InternalNode*>(now->child[i]); - break; - } - } - - now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]); + const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now); + while (pos < m_data + m_reccnt && pos->rec.key < key) + pos++; + + return pos - m_data; + } + + size_t get_upper_bound(const K &key) const { + const InternalNode *now = m_root; + while (!is_leaf(reinterpret_cast<const byte *>(now))) { + const InternalNode *next = nullptr; + for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { + if (now->child[i + 1] == nullptr || key < now->keys[i]) { + next = reinterpret_cast<InternalNode *>(now->child[i]); + break; } + } - const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); - while (pos < m_data + m_reccnt && pos->rec.key < key) pos++; - - return pos - m_data; + now = next ? next + : reinterpret_cast<const InternalNode *>( + now->child[INTERNAL_FANOUT - 1]); } - size_t get_upper_bound(const K& key) const { - const InternalNode* now = m_root; - while (!is_leaf(reinterpret_cast<const byte*>(now))) { - const InternalNode* next = nullptr; - for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { - if (now->child[i + 1] == nullptr || key < now->keys[i]) { - next = reinterpret_cast<InternalNode*>(now->child[i]); - break; - } - } - - now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]); - } - - const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); - while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++; + const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now); + while (pos < m_data + m_reccnt && pos->rec.key <= key) + pos++; - return pos - m_data; - } + return pos - m_data; + } - const Wrapped<R>* get_record_at(size_t idx) const { - return (idx < m_reccnt) ? m_data + idx : nullptr; - } + const Wrapped<R> *get_record_at(size_t idx) const { + return (idx < m_reccnt) ? m_data + idx : nullptr; + } private: - void build_internal_levels() { - size_t n_leaf_nodes = m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0); - - size_t level_node_cnt = n_leaf_nodes; - size_t node_cnt = 0; - do { - level_node_cnt = level_node_cnt / INTERNAL_FANOUT + (level_node_cnt % INTERNAL_FANOUT != 0); - node_cnt += level_node_cnt; - } while (level_node_cnt > 1); - - m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, (byte**) &m_isam_nodes); - m_internal_node_cnt = node_cnt; - - InternalNode* current_node = m_isam_nodes; - - const Wrapped<R>* leaf_base = m_data; - const Wrapped<R>* leaf_stop = m_data + m_reccnt; - while (leaf_base < leaf_stop) { - size_t fanout = 0; - for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { - auto rec_ptr = leaf_base + LEAF_FANOUT * i; - if (rec_ptr >= leaf_stop) break; - const Wrapped<R>* sep_key = std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1); - current_node->keys[i] = sep_key->rec.key; - current_node->child[i] = (byte*)rec_ptr; - ++fanout; - } - current_node++; - leaf_base += fanout * LEAF_FANOUT; - } - - auto level_start = m_isam_nodes; - auto level_stop = current_node; - auto current_level_node_cnt = level_stop - level_start; - while (current_level_node_cnt > 1) { - auto now = level_start; - while (now < level_stop) { - size_t child_cnt = 0; - for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { - auto node_ptr = now + i; - ++child_cnt; - if (node_ptr >= level_stop) break; - current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1]; - current_node->child[i] = (byte*)node_ptr; - } - now += child_cnt; - current_node++; - } - level_start = level_stop; - level_stop = current_node; - current_level_node_cnt = level_stop - level_start; - } - - assert(current_level_node_cnt == 1); - m_root = level_start; + void build_internal_levels() { + size_t n_leaf_nodes = + m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0); + + size_t level_node_cnt = n_leaf_nodes; + size_t node_cnt = 0; + do { + level_node_cnt = level_node_cnt / INTERNAL_FANOUT + + (level_node_cnt % INTERNAL_FANOUT != 0); + node_cnt += level_node_cnt; + } while (level_node_cnt > 1); + + m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, + (byte **)&m_isam_nodes); + m_internal_node_cnt = node_cnt; + + InternalNode *current_node = m_isam_nodes; + + const Wrapped<R> *leaf_base = m_data; + const Wrapped<R> *leaf_stop = m_data + m_reccnt; + while (leaf_base < leaf_stop) { + size_t fanout = 0; + for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { + auto rec_ptr = leaf_base + LEAF_FANOUT * i; + if (rec_ptr >= leaf_stop) + break; + const Wrapped<R> *sep_key = + std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1); + current_node->keys[i] = sep_key->rec.key; + current_node->child[i] = (byte *)rec_ptr; + ++fanout; + } + current_node++; + leaf_base += fanout * LEAF_FANOUT; } - bool is_leaf(const byte* ptr) const { - return ptr >= (const byte*)m_data && ptr < (const byte*)(m_data + m_reccnt); + auto level_start = m_isam_nodes; + auto level_stop = current_node; + auto current_level_node_cnt = level_stop - level_start; + while (current_level_node_cnt > 1) { + auto now = level_start; + while (now < level_stop) { + size_t child_cnt = 0; + for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { + auto node_ptr = now + i; + ++child_cnt; + if (node_ptr >= level_stop) + break; + current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1]; + current_node->child[i] = (byte *)node_ptr; + } + now += child_cnt; + current_node++; + } + level_start = level_stop; + level_stop = current_node; + current_level_node_cnt = level_stop - level_start; } - psudb::BloomFilter<R> *m_bf; - InternalNode* m_isam_nodes; - InternalNode* m_root; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_internal_node_cnt; - size_t m_deleted_cnt; - size_t m_alloc_size; - - Wrapped<R>* m_data; + assert(current_level_node_cnt == 1); + m_root = level_start; + } + + bool is_leaf(const byte *ptr) const { + return ptr >= (const byte *)m_data && + ptr < (const byte *)(m_data + m_reccnt); + } + + psudb::BloomFilter<R> *m_bf; + InternalNode *m_isam_nodes; + InternalNode *m_root; + size_t m_reccnt; + size_t m_tombstone_cnt; + size_t m_internal_node_cnt; + size_t m_deleted_cnt; + size_t m_alloc_size; + + Wrapped<R> *m_data; }; -} +} // namespace de diff --git a/include/shard/PGM.h b/include/shard/PGM.h index 509796b..7d1f492 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -33,6 +33,8 @@ namespace de { template <RecordInterface R, size_t epsilon=128> class PGM { +public: + typedef R RECORD; private: typedef decltype(R::key) K; typedef decltype(R::value) V; @@ -109,7 +111,7 @@ public: } } - PGM(std::vector<PGM*> shards) + PGM(std::vector<PGM*> const &shards) : m_data(nullptr) , m_bf(nullptr) , m_reccnt(0) diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 581277e..9d8c3bb 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -30,6 +30,8 @@ namespace de { template <KVPInterface R, size_t E=1024> class TrieSpline { +public: + typedef R RECORD; private: typedef decltype(R::key) K; typedef decltype(R::value) V; @@ -122,7 +124,7 @@ public: } } - TrieSpline(std::vector<TrieSpline*> &shards) + TrieSpline(std::vector<TrieSpline*> const &shards) : m_reccnt(0) , m_tombstone_cnt(0) , m_alloc_size(0) diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h index d5a2393..477db5c 100644 --- a/include/shard/VPTree.h +++ b/include/shard/VPTree.h @@ -21,13 +21,15 @@ using psudb::CACHELINE_SIZE; using psudb::PriorityQueue; -using psudb::queue_record; using psudb::byte; namespace de { template <NDRecordInterface R, size_t LEAFSZ=100, bool HMAP=false> class VPTree { +public: + typedef R RECORD; + private: struct vpnode { size_t start; @@ -50,7 +52,7 @@ private: public: VPTree(BufferView<R> buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) { + : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) { m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, @@ -59,8 +61,6 @@ public: (byte**) &m_data); m_ptrs = new vp_ptr[buffer.get_record_count()]; - - size_t offset = 0; m_reccnt = 0; // FIXME: will eventually need to figure out tombstones @@ -87,7 +87,7 @@ public: } VPTree(std::vector<VPTree*> shards) - : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) { + : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) { size_t attemp_reccnt = 0; for (size_t i=0; i<shards.size(); i++) { @@ -363,7 +363,6 @@ private: if (d < *farthest) { if (pq.size() == k) { - auto t = pq.peek().data->rec; pq.pop(); } pq.push(m_ptrs[node->start].ptr); diff --git a/include/util/Cursor.h b/include/util/Cursor.h index e8ba53d..e7963b1 100644 --- a/include/util/Cursor.h +++ b/include/util/Cursor.h @@ -1,8 +1,8 @@ /* * include/util/Cursor.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -21,16 +21,15 @@ #include <vector> namespace de { -template<typename R> -struct Cursor { - R *ptr; - R *end; - size_t cur_rec_idx; - size_t rec_cnt; +template <typename R> struct Cursor { + const R *ptr; + const R *end; + size_t cur_rec_idx; + size_t rec_cnt; - friend bool operator==(const Cursor &a, const Cursor &b) { - return a.ptr == b.ptr && a.end == b.end; - } + friend bool operator==(const Cursor &a, const Cursor &b) { + return a.ptr == b.ptr && a.end == b.end; + } }; /* @@ -43,51 +42,55 @@ struct Cursor { * be updated to be equal to end, and false will be returned. Iterators will * not be closed. */ -template<typename R> -inline static bool advance_cursor(Cursor<R> &cur) { - cur.ptr++; - cur.cur_rec_idx++; +template <typename R> inline static bool advance_cursor(Cursor<R> &cur) { + cur.ptr++; + cur.cur_rec_idx++; - if (cur.cur_rec_idx >= cur.rec_cnt) return false; + if (cur.cur_rec_idx >= cur.rec_cnt) + return false; - if (cur.ptr >= cur.end) { - return false; - } - return true; + if (cur.ptr >= cur.end) { + return false; + } + return true; } /* * Process the list of cursors to return the cursor containing the next * largest element. Does not advance any of the cursors. If current is - * specified, then skip the current head of that cursor during checking. - * This allows for "peaking" at the next largest element after the current + * specified, then skip the current head of that cursor during checking. + * This allows for "peaking" at the next largest element after the current * largest is processed. */ template <typename R> -inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) { - const R *min_rec = nullptr; - Cursor<R> *result = nullptr; - // FIXME: for large cursor vectors, it may be worth it to use a - // PriorityQueue here instead of scanning. - for (size_t i=0; i< cursors.size(); i++) { - if (cursors[i] == (Cursor<R>) {0} ) continue; - - const R *rec = (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr; - if (rec >= cursors[i].end) continue; +inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, + Cursor<R> *current = nullptr) { + const R *min_rec = nullptr; + Cursor<R> *result = nullptr; + // FIXME: for large cursor vectors, it may be worth it to use a + // PriorityQueue here instead of scanning. + for (size_t i = 0; i < cursors.size(); i++) { + if (cursors[i] == (Cursor<R>){0}) + continue; - if (min_rec == nullptr) { - result = &cursors[i]; - min_rec = rec; - continue; - } + const R *rec = + (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr; + if (rec >= cursors[i].end) + continue; - if (*rec < *min_rec) { - result = &cursors[i]; - min_rec = rec; - } + if (min_rec == nullptr) { + result = &cursors[i]; + min_rec = rec; + continue; } - return result; -} + if (*rec < *min_rec) { + result = &cursors[i]; + min_rec = rec; + } + } + return result; } + +} // namespace de diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h index c149189..b0a3215 100644 --- a/include/util/SortedMerge.h +++ b/include/util/SortedMerge.h @@ -1,72 +1,78 @@ /* * include/util/SortedMerge.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * * A sorted array merge routine for use in Shard construction, as many - * shards will use a sorted array to represent their data. Also encapsulates + * shards will use a sorted array to represent their data. Also encapsulates * the necessary tombstone-cancellation logic. * - * FIXME: include generic per-record processing functionality for Shards that + * FIXME: include generic per-record processing functionality for Shards that * need it, to avoid needing to reprocess the array in the shard after * creation. */ #pragma once -#include "util/Cursor.h" +#include <algorithm> + #include "framework/interface/Shard.h" #include "psu-ds/PriorityQueue.h" +#include "util/Cursor.h" namespace de { -using psudb::PriorityQueue; using psudb::BloomFilter; -using psudb::queue_record; using psudb::byte; using psudb::CACHELINE_SIZE; +using psudb::PriorityQueue; +using psudb::queue_record; /* - * A simple struct to return record_count and tombstone_count information - * back to the caller. Could've been an std::pair, but I like the more + * A simple struct to return record_count and tombstone_count information + * back to the caller. Could've been an std::pair, but I like the more * explicit names. */ struct merge_info { - size_t record_count; - size_t tombstone_count; + size_t record_count; + size_t tombstone_count; }; /* * Build a vector of cursors corresponding to the records contained within * a vector of shards. The cursor at index i in the output will correspond - * to the shard at index i in the input. + * to the shard at index i in the input. * * The values of reccnt and tscnt will be updated with the sum of the * records contained within the shards. Note that these counts include deleted * records that may be removed during shard construction, and so constitute * upper bounds only. */ -template <RecordInterface R, ShardInterface<R> S> -static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(shards.size()); - - *reccnt = 0; - *tscnt = 0; - - for (size_t i = 0; i < shards.size(); ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor<Wrapped<R>>{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - *reccnt += shards[i]->get_record_count(); - *tscnt += shards[i]->get_tombstone_count(); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } +template <RecordInterface R, ShardInterface S> +static std::vector<Cursor<Wrapped<R>>> +build_cursor_vec(std::vector<S *> const &shards, size_t *reccnt, + size_t *tscnt) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(shards.size()); + + *reccnt = 0; + *tscnt = 0; + + for (size_t i = 0; i < shards.size(); ++i) { + if (shards[i]) { + auto base = shards[i]->get_data(); + cursors.emplace_back( + Cursor<Wrapped<R>>{base, base + shards[i]->get_record_count(), 0, + shards[i]->get_record_count()}); + *reccnt += shards[i]->get_record_count(); + *tscnt += shards[i]->get_tombstone_count(); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); } + } - return cursors; + return cursors; } /* @@ -80,126 +86,128 @@ static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, * program will be aborted if the allocation fails. */ template <RecordInterface R> -static merge_info sorted_array_from_bufferview(BufferView<R> bv, - Wrapped<R> *buffer, - psudb::BloomFilter<R> *bf=nullptr) { - /* - * Copy the contents of the buffer view into a temporary buffer, and - * sort them. We still need to iterate over these temporary records to - * apply tombstone/deleted record filtering, as well as any possible - * per-record processing that is required by the shard being built. - */ - auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, - bv.get_record_count(), - sizeof(Wrapped<R>)); - bv.copy_to_buffer((byte *) temp_buffer); - - auto base = temp_buffer; - auto stop = base + bv.get_record_count(); - std::sort(base, stop, std::less<Wrapped<R>>()); - - merge_info info = {0, 0}; - - /* - * Iterate over the temporary buffer to process the records, copying - * them into buffer as needed - */ - while (base < stop) { - if (!base->is_tombstone() && (base + 1 < stop) - && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - buffer[info.record_count++] = *base; - - if (base->is_tombstone()) { - info.tombstone_count++; - if (bf){ - bf->insert(base->rec); - } - } +static merge_info +sorted_array_from_bufferview(BufferView<R> bv, Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf = nullptr) { + /* + * Copy the contents of the buffer view into a temporary buffer, and + * sort them. We still need to iterate over these temporary records to + * apply tombstone/deleted record filtering, as well as any possible + * per-record processing that is required by the shard being built. + */ + auto temp_buffer = (Wrapped<R> *)psudb::sf_aligned_calloc( + CACHELINE_SIZE, bv.get_record_count(), sizeof(Wrapped<R>)); + bv.copy_to_buffer((byte *)temp_buffer); + + auto base = temp_buffer; + auto stop = base + bv.get_record_count(); + std::sort(base, stop, std::less<Wrapped<R>>()); + + merge_info info = {0, 0}; + + /* + * Iterate over the temporary buffer to process the records, copying + * them into buffer as needed + */ + while (base < stop) { + if (!base->is_tombstone() && (base + 1 < stop) && + base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { + base += 2; + continue; + } else if (base->is_deleted()) { + base += 1; + continue; + } - base++; + // FIXME: this shouldn't be necessary, but the tagged record + // bypass doesn't seem to be working on this code-path, so this + // ensures that tagged records from the buffer are able to be + // dropped, eventually. It should only need to be &= 1 + base->header &= 3; + buffer[info.record_count++] = *base; + + if (base->is_tombstone()) { + info.tombstone_count++; + if (bf) { + bf->insert(base->rec); + } } - free(temp_buffer); - return info; + base++; + } + + free(temp_buffer); + return info; } /* * Perform a sorted merge of the records within cursors into the provided * buffer. Includes tombstone and tagged delete cancellation logic, and - * will insert tombstones into a bloom filter, if one is provided. + * will insert tombstones into a bloom filter, if one is provided. * * The behavior of this function is undefined if the provided buffer does * not have space to contain all of the records within the input cursors. */ template <RecordInterface R> -static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors, - Wrapped<R> *buffer, - psudb::BloomFilter<R> *bf=nullptr) { - - // FIXME: For smaller cursor arrays, it may be more efficient to skip - // the priority queue and just do a scan. - PriorityQueue<Wrapped<R>> pq(cursors.size()); - for (size_t i=0; i<cursors.size(); i++) { - pq.push(cursors[i].ptr, i); - } - - merge_info info = {0, 0}; - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - /* - * if the current record is not a tombstone, and the next record is - * a tombstone that matches the current one, then the current one - * has been deleted, and both it and its tombstone can be skipped - * over. +static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors, + Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf = nullptr) { + + // FIXME: For smaller cursor arrays, it may be more efficient to skip + // the priority queue and just do a scan. + PriorityQueue<Wrapped<R>> pq(cursors.size()); + for (size_t i = 0; i < cursors.size(); i++) { + pq.push(cursors[i].ptr, i); + } + + merge_info info = {0, 0}; + while (pq.size()) { + auto now = pq.peek(); + auto next = + pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + /* + * if the current record is not a tombstone, and the next record is + * a tombstone that matches the current one, then the current one + * has been deleted, and both it and its tombstone can be skipped + * over. + */ + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); + pq.pop(); + auto &cursor1 = cursors[now.version]; + auto &cursor2 = cursors[next.version]; + if (advance_cursor(cursor1)) + pq.push(cursor1.ptr, now.version); + if (advance_cursor(cursor2)) + pq.push(cursor2.ptr, next.version); + } else { + auto &cursor = cursors[now.version]; + /* skip over records that have been deleted via tagging */ + if (!cursor.ptr->is_deleted()) { + buffer[info.record_count++] = *cursor.ptr; + + /* + * if the record is a tombstone, increment the ts count and + * insert it into the bloom filter if one has been + * provided. */ - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - /* skip over records that have been deleted via tagging */ - if (!cursor.ptr->is_deleted()) { - buffer[info.record_count++] = *cursor.ptr; - - /* - * if the record is a tombstone, increment the ts count and - * insert it into the bloom filter if one has been - * provided. - */ - if (cursor.ptr->is_tombstone()) { - info.tombstone_count++; - if (bf) { - bf->insert(cursor.ptr->rec); - } - } - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); + if (cursor.ptr->is_tombstone()) { + info.tombstone_count++; + if (bf) { + bf->insert(cursor.ptr->rec); + } } + } + pq.pop(); + + if (advance_cursor(cursor)) + pq.push(cursor.ptr, now.version); } + } - return info; + return info; } - - -} +} // namespace de diff --git a/include/util/bf_config.h b/include/util/bf_config.h index 9f29ed7..836e452 100644 --- a/include/util/bf_config.h +++ b/include/util/bf_config.h @@ -1,8 +1,8 @@ /* * include/util/bf_config.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -26,19 +26,15 @@ static double BF_FPR = .01; static size_t BF_HASH_FUNCS = 7; /* - * Adjust the value of BF_FPR. The argument must be on the interval + * Adjust the value of BF_FPR. The argument must be on the interval * (0, 1), or the behavior of bloom filters is undefined. */ -static void BF_SET_FPR(double fpr) { - BF_FPR = fpr; -} +[[maybe_unused]] static void BF_SET_FPR(double fpr) { BF_FPR = fpr; } /* * Adjust the value of BF_HASH_FUNCS. The argument must be on the interval * (0, INT64_MAX], or the behavior of bloom filters is undefined. */ -static void BF_SET_HASHFUNC(size_t func_cnt) { - BF_HASH_FUNCS = func_cnt; -} +[[maybe_unused]] static void BF_SET_HASHFUNC(size_t func_cnt) { BF_HASH_FUNCS = func_cnt; } -} +} // namespace de diff --git a/include/util/types.h b/include/util/types.h index cf61412..b8a1343 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -1,7 +1,7 @@ /* * include/util/types.h * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -17,10 +17,10 @@ */ #pragma once +#include <cassert> #include <cstdint> #include <cstdlib> #include <vector> -#include <cassert> namespace de { @@ -30,14 +30,14 @@ typedef uint32_t PageNum; /* * Byte offset within a page. Also used for lengths of records, etc., * within the codebase. size_t isn't necessary, as the maximum offset - * is only parm::PAGE_SIZE + * is only parm::PAGE_SIZE */ typedef uint16_t PageOffset; /* A unique identifier for a frame within a buffer or cache */ typedef int32_t FrameId; -/* +/* * A unique timestamp for use in MVCC concurrency control. Currently stored in * record headers, but not used by anything. */ @@ -45,7 +45,7 @@ typedef uint32_t Timestamp; const Timestamp TIMESTAMP_MIN = 0; const Timestamp TIMESTAMP_MAX = UINT32_MAX; -/* +/* * Invalid values for various IDs. Used throughout the code base to indicate * uninitialized values and error conditions. */ @@ -60,90 +60,85 @@ const FrameId INVALID_FRID = -1; * as a contiguous index space. */ struct ShardID { - ssize_t level_idx; - ssize_t shard_idx; + ssize_t level_idx; + ssize_t shard_idx; - friend bool operator==(const ShardID &shid1, const ShardID &shid2) { - return shid1.level_idx == shid2.level_idx && shid1.shard_idx == shid2.shard_idx; - } + friend bool operator==(const ShardID &shid1, const ShardID &shid2) { + return shid1.level_idx == shid2.level_idx && + shid1.shard_idx == shid2.shard_idx; + } }; -/* A placeholder for an invalid shard--also used to indicate the mutable buffer */ +/* + * A placeholder for an invalid shard--also used to indicate the + * mutable buffer + */ const ShardID INVALID_SHID = {-1, -1}; typedef ssize_t level_index; typedef struct ReconstructionTask { - std::vector<level_index> sources; - level_index target; - size_t reccnt; + std::vector<level_index> sources; + level_index target; + size_t reccnt; - void add_source(level_index source, size_t cnt) { - sources.push_back(source); - reccnt += cnt; - } + void add_source(level_index source, size_t cnt) { + sources.push_back(source); + reccnt += cnt; + } } ReconstructionTask; class ReconstructionVector { public: - ReconstructionVector() - : total_reccnt(0) {} + ReconstructionVector() : total_reccnt(0) {} - ~ReconstructionVector() = default; + ~ReconstructionVector() = default; - ReconstructionTask operator[](size_t idx) { - return m_tasks[idx]; - } + ReconstructionTask operator[](size_t idx) { return m_tasks[idx]; } - void add_reconstruction(level_index source, level_index target, size_t reccnt) { - m_tasks.push_back({{source}, target, reccnt}); - total_reccnt += reccnt; - } + void add_reconstruction(level_index source, level_index target, + size_t reccnt) { + m_tasks.push_back({{source}, target, reccnt}); + total_reccnt += reccnt; + } - void add_reconstruction(ReconstructionTask task) { - m_tasks.push_back(task); - } + void add_reconstruction(ReconstructionTask task) { m_tasks.push_back(task); } - ReconstructionTask remove_reconstruction(size_t idx) { - assert(idx < m_tasks.size()); - auto task = m_tasks[idx]; + ReconstructionTask remove_reconstruction(size_t idx) { + assert(idx < m_tasks.size()); + auto task = m_tasks[idx]; - m_tasks.erase(m_tasks.begin() + idx); - total_reccnt -= task.reccnt; + m_tasks.erase(m_tasks.begin() + idx); + total_reccnt -= task.reccnt; - return task; - } + return task; + } - ReconstructionTask remove_smallest_reconstruction() { - size_t min_size = m_tasks[0].reccnt; - size_t idx = 0; - for (size_t i=1; i<m_tasks.size(); i++) { - if (m_tasks[i].reccnt < min_size) { - min_size = m_tasks[i].reccnt; - idx = i; - } - } - - auto task = m_tasks[idx]; - m_tasks.erase(m_tasks.begin() + idx); - total_reccnt -= task.reccnt; - - return task; + ReconstructionTask remove_smallest_reconstruction() { + size_t min_size = m_tasks[0].reccnt; + size_t idx = 0; + for (size_t i = 1; i < m_tasks.size(); i++) { + if (m_tasks[i].reccnt < min_size) { + min_size = m_tasks[i].reccnt; + idx = i; + } } - size_t get_total_reccnt() { - return total_reccnt; - } + auto task = m_tasks[idx]; + m_tasks.erase(m_tasks.begin() + idx); + total_reccnt -= task.reccnt; - size_t size() { - return m_tasks.size(); - } + return task; + } + + size_t get_total_reccnt() { return total_reccnt; } + size_t size() { return m_tasks.size(); } private: - std::vector<ReconstructionTask> m_tasks; - size_t total_reccnt; + std::vector<ReconstructionTask> m_tasks; + size_t total_reccnt; }; -} +} // namespace de diff --git a/tests/alias_tests.cpp b/tests/alias_tests.cpp index 98d0c63..dcd3eec 100644 --- a/tests/alias_tests.cpp +++ b/tests/alias_tests.cpp @@ -1,7 +1,7 @@ /* - * tests/alias_tests.cpp + * tests/isam_tests.cpp * - * Unit tests for Alias shard + * Unit tests for ISAM Tree shard * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> @@ -11,27 +11,22 @@ */ #include "shard/Alias.h" -#include "query/wss.h" -#include "framework/structure/MutableBuffer.h" #include "include/testing.h" - - #include <check.h> using namespace de; -typedef WRec R; +typedef WeightedRecord<uint64_t, uint32_t, uint32_t> R; typedef Alias<R> Shard; - #include "include/shard_standard.h" -#include "include/rangequery.h" +#include "include/wss.h" Suite *unit_testing() { - Suite *unit = suite_create("ISAMTree Shard Unit Testing"); + Suite *unit = suite_create("Walker's Alias Shard Unit Testing"); - inject_rangequery_tests(unit); + inject_wss_tests(unit); inject_shard_tests(unit); return unit; @@ -58,4 +53,3 @@ int main() return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; } - diff --git a/tests/de_bsm_tag.cpp b/tests/de_bsm_tag.cpp new file mode 100644 index 0000000..4063cfe --- /dev/null +++ b/tests/de_bsm_tag.cpp @@ -0,0 +1,61 @@ +/* + * tests/de_level_tomb.cpp + * + * Unit tests for Dynamic Extension Framework + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#include <set> +#include <random> +#include <algorithm> + +#include "include/testing.h" +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" + +#include <check.h> +using namespace de; + +typedef Rec R; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::BSM, DeletePolicy::TAGGING, SerialScheduler> DE; + +#include "include/dynamic_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tagging BSM Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_bsm_tomb.cpp b/tests/de_bsm_tomb.cpp index 493440e..3a24e87 100644 --- a/tests/de_bsm_tomb.cpp +++ b/tests/de_bsm_tomb.cpp @@ -22,7 +22,10 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::BSM, DeletePolicy::TOMBSTONE, SerialScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::BSM, DeletePolicy::TOMBSTONE, SerialScheduler> DE; #include "include/dynamic_extension.h" diff --git a/tests/de_level_concurrent.cpp b/tests/de_level_concurrent.cpp index d1e0496..afd1af2 100644 --- a/tests/de_level_concurrent.cpp +++ b/tests/de_level_concurrent.cpp @@ -22,7 +22,10 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; #include "include/concurrent_extension.h" diff --git a/tests/de_level_tag.cpp b/tests/de_level_tag.cpp index 75131c4..c175357 100644 --- a/tests/de_level_tag.cpp +++ b/tests/de_level_tag.cpp @@ -22,7 +22,10 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE; #include "include/dynamic_extension.h" diff --git a/tests/de_level_tomb.cpp b/tests/de_level_tomb.cpp index 6da211d..e587817 100644 --- a/tests/de_level_tomb.cpp +++ b/tests/de_level_tomb.cpp @@ -23,7 +23,10 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; #include "include/dynamic_extension.h" diff --git a/tests/de_tier_concurrent.cpp b/tests/de_tier_concurrent.cpp index bb2ec7f..ce41dbc 100644 --- a/tests/de_tier_concurrent.cpp +++ b/tests/de_tier_concurrent.cpp @@ -17,12 +17,16 @@ #include "framework/DynamicExtension.h" #include "shard/ISAMTree.h" #include "query/rangequery.h" +#include "framework/scheduling//FIFOScheduler.h" #include <check.h> using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; #include "include/concurrent_extension.h" diff --git a/tests/de_tier_tag.cpp b/tests/de_tier_tag.cpp index 79bb7bf..97a5299 100644 --- a/tests/de_tier_tag.cpp +++ b/tests/de_tier_tag.cpp @@ -23,7 +23,10 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; + +typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; #include "include/dynamic_extension.h" diff --git a/tests/de_tier_tomb.cpp b/tests/de_tier_tomb.cpp index b1387bb..930d0d5 100644 --- a/tests/de_tier_tomb.cpp +++ b/tests/de_tier_tomb.cpp @@ -23,7 +23,9 @@ using namespace de; typedef Rec R; -typedef DynamicExtension<Rec, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; +typedef ISAMTree<R> S; +typedef rq::Query<S> Q; +typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; #include "include/dynamic_extension.h" diff --git a/tests/include/concurrent_extension.h b/tests/include/concurrent_extension.h index 927a094..02bd694 100644 --- a/tests/include/concurrent_extension.h +++ b/tests/include/concurrent_extension.h @@ -22,17 +22,20 @@ * should be included in the source file that includes this one, above the * include statement. */ -/*#include "testing.h" -#include "framework/DynamicExtension.h" -#include "framework/scheduling/FIFOScheduler.h" -#include "shard/ISAMTree.h" -#include "query/rangequery.h" -#include <check.h> - -//using namespace de; -//typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; -*/ - +// #include "testing.h" +// #include "framework/DynamicExtension.h" +// //#include "framework/scheduling/FIFOScheduler.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include <check.h> +// #include <set> +// #include <random> + +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> S; +// typedef rq::Query<S> Q; +// typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE> DE; //, FIFOScheduler> DE; START_TEST(t_create) { @@ -164,11 +167,11 @@ START_TEST(t_range_query) uint64_t lower_key = keys[idx]; uint64_t upper_key = keys[idx + 250]; - rq::Parms<R> p; + Q::Parameters p; p.lower_bound = lower_key; p.upper_bound = upper_key; - auto result = test_de->query(&p); + auto result = test_de->query(std::move(p)); auto r = result.get(); std::sort(r.begin(), r.end()); @@ -203,8 +206,6 @@ START_TEST(t_tombstone_merging_01) records.insert({key, val}); } - size_t deletes = 0; - size_t cnt=0; for (auto rec : records) { R r = {rec.first, rec.second}; while (!test_de->insert(r)) { @@ -220,7 +221,6 @@ START_TEST(t_tombstone_merging_01) while (!test_de->erase(dr)) { _mm_pause(); } - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -258,7 +258,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { records.insert({key, val}); } - size_t deletes = 0; for (auto rec : records) { ck_assert_int_eq(test_de->insert(rec), 1); @@ -268,7 +267,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { for (size_t i=0; i<del_vec.size(); i++) { test_de->erase(del_vec[i]); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -304,15 +302,10 @@ START_TEST(t_static_structure) records.insert({key, val}); } - size_t deletes = 0; - size_t t_reccnt = 0; - size_t k=0; for (auto rec : records) { - k++; while (!test_de->insert(rec)) { _mm_pause(); } - t_reccnt++; if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { std::vector<R> del_vec; @@ -323,7 +316,6 @@ START_TEST(t_static_structure) _mm_pause(); } - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } diff --git a/tests/include/dynamic_extension.h b/tests/include/dynamic_extension.h index 6e9b16c..90c6906 100644 --- a/tests/include/dynamic_extension.h +++ b/tests/include/dynamic_extension.h @@ -22,18 +22,24 @@ * should be included in the source file that includes this one, above the * include statement. */ -/* -#include "testing.h" -#include "framework/DynamicExtension.h" -#include "framework/scheduling/SerialScheduler.h" -#include "shard/ISAMTree.h" -#include "query/rangequery.h" -#include <check.h> -using namespace de; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; -*/ + +// #include "testing.h" +// #include "framework/DynamicExtension.h" +// #include "framework/scheduling/SerialScheduler.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include <check.h> +// #include <random> +// #include <set> + +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> S; +// typedef rq::Query<S> Q; +// typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; +#include "framework/util/Configuration.h" START_TEST(t_create) { auto test_de = new DE(100, 1000, 2); @@ -103,7 +109,16 @@ START_TEST(t_insert_with_mem_merges) test_de->await_next_epoch(); ck_assert_int_eq(test_de->get_record_count(), 300); - ck_assert_int_eq(test_de->get_height(), 1); + + /* + * BSM grows on every flush, so the height will be different than + * normal layout policies + */ + if (test_de->Layout == de::LayoutPolicy::BSM) { + ck_assert_int_eq(test_de->get_height(), 2); + } else { + ck_assert_int_eq(test_de->get_height(), 1); + } delete test_de; } @@ -138,11 +153,12 @@ START_TEST(t_range_query) uint64_t lower_key = keys[idx]; uint64_t upper_key = keys[idx + 250]; - rq::Parms<R> p; + Q::Parameters p; + p.lower_bound = lower_key; p.upper_bound = upper_key; - auto result = test_de->query(&p); + auto result = test_de->query(std::move(p)); auto r = result.get(); std::sort(r.begin(), r.end()); ck_assert_int_eq(r.size(), 251); @@ -176,8 +192,6 @@ START_TEST(t_tombstone_merging_01) records.insert({key, val}); } - size_t deletes = 0; - size_t cnt=0; for (auto rec : records) { R r = {rec.first, rec.second}; ck_assert_int_eq(test_de->insert(r), 1); @@ -189,7 +203,6 @@ START_TEST(t_tombstone_merging_01) for (size_t i=0; i<del_vec.size(); i++) { R dr = {del_vec[i].first, del_vec[i].second}; test_de->erase(dr); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -209,14 +222,14 @@ START_TEST(t_tombstone_merging_01) } END_TEST -DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { +[[maybe_unused]] static DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { auto rng = gsl_rng_alloc(gsl_rng_mt19937); auto test_de = new DE(1000, 10000, 2); - std::set<R> records; - std::set<R> to_delete; - std::set<R> deleted; + std::set<Rec> records; + std::set<Rec> to_delete; + std::set<Rec> deleted; while (records.size() < reccnt) { uint64_t key = rand(); @@ -227,17 +240,15 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { records.insert({key, val}); } - size_t deletes = 0; for (auto rec : records) { ck_assert_int_eq(test_de->insert(rec), 1); if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<R> del_vec; + std::vector<Rec> del_vec; std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); for (size_t i=0; i<del_vec.size(); i++) { test_de->erase(del_vec[i]); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -260,9 +271,9 @@ START_TEST(t_static_structure) size_t reccnt = 100000; auto test_de = new DE(100, 1000, 2); - std::set<R> records; - std::set<R> to_delete; - std::set<R> deleted; + std::set<Rec> records; + std::set<Rec> to_delete; + std::set<Rec> deleted; while (records.size() < reccnt) { uint64_t key = rand(); @@ -274,15 +285,11 @@ START_TEST(t_static_structure) } size_t deletes = 0; - size_t t_reccnt = 0; - size_t k=0; for (auto rec : records) { - k++; ck_assert_int_eq(test_de->insert(rec), 1); - t_reccnt++; if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<R> del_vec; + std::vector<Rec> del_vec; std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); for (size_t i=0; i<del_vec.size(); i++) { diff --git a/tests/include/irs.h b/tests/include/irs.h new file mode 100644 index 0000000..1c5be2c --- /dev/null +++ b/tests/include/irs.h @@ -0,0 +1,165 @@ +/* + * tests/include/irs.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +#include "query/irs.h" +#include <algorithm> + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "testing.h" +#include <check.h> +#include <gsl/gsl_rng.h> +using namespace de; + +typedef Rec R; +typedef ISAMTree<R> Shard; +typedef irs::Query<ISAMTree<R>> Query; + +static gsl_rng *g_rng; + +START_TEST(t_irs) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + auto shard = Shard(buffer->get_buffer_view()); + + size_t k = 5; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + auto local_query = irs::Query<Shard>::local_preproc(&shard, &parms); + irs::Query<Shard>::distribute_query(&parms, {local_query}, nullptr); + + auto result = irs::Query<Shard>::local_query(&shard, local_query); + delete local_query; + + ck_assert_int_eq(result.size(), k); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_irs) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + + size_t k = 5; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + { + auto view = buffer->get_buffer_view(); + auto query = irs::Query<Shard>::local_preproc_buffer(&view, &parms); + irs::Query<Shard>::distribute_query(&parms, {}, query); + auto result = irs::Query<Shard>::local_query_buffer(query); + delete query; + + ck_assert_int_le(result.size(), k); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + } + + delete buffer; +} +END_TEST + + +START_TEST(t_irs_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + size_t k = 10; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + /* necessary to store the alias structure */ + auto dummy_buffer_query = irs::Query<Shard>::LocalQueryBuffer(); + dummy_buffer_query.buffer = nullptr; + dummy_buffer_query.sample_size = 0; + dummy_buffer_query.cutoff = 0; + dummy_buffer_query.global_parms = parms; + dummy_buffer_query.records = {}; + dummy_buffer_query.alias = nullptr; + + auto query1 = irs::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = irs::Query<Shard>::local_preproc(&shard2, &parms); + + irs::Query<Shard>::distribute_query(&parms, {query1, query2}, &dummy_buffer_query); + + std::vector<std::vector<irs::Query<Shard>::LocalResultType>> results(2); + results[0] = irs::Query<Shard>::local_query(&shard1, query1); + results[1] = irs::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; + + ck_assert_int_eq(results[0].size() + results[1].size(), k); + + std::vector<std::vector<Wrapped<R>>> proc_results; + + for (size_t j=0; j<results.size(); j++) { + proc_results.emplace_back(std::vector<Wrapped<R>>()); + for (size_t i=0; i<results[j].size(); i++) { + proc_results[j].emplace_back(results[j][i]); + } + } + + std::vector<irs::Query<Shard>::ResultType> result; + irs::Query<Shard>::combine(proc_results, nullptr, result); + ck_assert_int_eq(result.size(), k); + + delete buffer1; + delete buffer2; +} +END_TEST + +static void inject_irs_tests(Suite *suite) { + g_rng = gsl_rng_alloc(gsl_rng_mt19937); + + TCase *irs = tcase_create("Independent Range Sampling Query Testing"); + tcase_add_test(irs, t_irs); + tcase_add_test(irs, t_buffer_irs); + tcase_add_test(irs, t_irs_merge); + suite_add_tcase(suite, irs); +} diff --git a/tests/include/pointlookup.h b/tests/include/pointlookup.h index 71a1099..af58440 100644 --- a/tests/include/pointlookup.h +++ b/tests/include/pointlookup.h @@ -17,6 +17,8 @@ */ #pragma once +#include "query/pointlookup.h" + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -25,15 +27,12 @@ * include statement. */ -//#include "shard/FSTrie.h" -#include "query/pointlookup.h" +#include "shard/FSTrie.h" #include "testing.h" - #include <check.h> - using namespace de; -//typedef StringRec R; -//typedef FSTrie<R> Shard; +typedef StringRec R; +typedef FSTrie<R> Shard; START_TEST(t_point_lookup_query) { @@ -45,23 +44,21 @@ START_TEST(t_point_lookup_query) for (size_t i=0; i<bv.get_record_count(); i++) { auto key = bv.get(i)->rec.key; - pl::Parms<R> parms = {key}; - auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = pl::Query<R, Shard>::query(&shard, state, &parms); - pl::Query<R, Shard>::delete_query_state(state); - + pl::Query<Shard>::Parameters parms = {key}; + auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms); + auto result = pl::Query<Shard>::local_query(&shard,local_query); + delete local_query; ck_assert_int_eq(result.size(), 1); - //ck_assert_str_eq(result[0].rec.key, key); - //ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value); + ck_assert_str_eq(result[0].rec.key, key); + ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value); } /* point lookup miss; result size should be 0 */ const char *c = "computer"; - pl::Parms<R> parms = {c}; - - auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = pl::Query<R, Shard>::query(&shard, state, &parms); - pl::Query<R, Shard>::delete_query_state(state); + pl::Query<Shard>::Parameters parms = {c}; + auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms); + auto result = pl::Query<Shard>::local_query(&shard,local_query); + delete local_query; ck_assert_int_eq(result.size(), 0); } @@ -78,24 +75,21 @@ START_TEST(t_buffer_point_lookup) { auto view = buffer->get_buffer_view(); for (int i=view.get_record_count()-1; i>=0; i--) { - pl::Parms<R> parms = {view.get(i)->rec.key}; - - auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = pl::Query<R, Shard>::buffer_query(state, &parms); - pl::Query<R, Shard>::delete_buffer_query_state(state); + pl::Query<Shard>::Parameters parms = {view.get(i)->rec.key}; + auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = pl::Query<Shard>::local_query_buffer(local_query); + delete local_query; ck_assert_int_eq(result.size(), 1); - //ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key); - //ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value); + ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key); + ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value); } /* point lookup miss; result size should be 0 */ const char *c = "computer"; - pl::Parms<R> parms = {c}; - - auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = pl::Query<R, Shard>::buffer_query(state, &parms); - pl::Query<R, Shard>::delete_buffer_query_state(state); + pl::Query<Shard>::Parameters parms = {c}; + auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = pl::Query<Shard>::local_query_buffer(local_query); ck_assert_int_eq(result.size(), 0); } diff --git a/tests/include/rangecount.h b/tests/include/rangecount.h index 1951221..22189b9 100644 --- a/tests/include/rangecount.h +++ b/tests/include/rangecount.h @@ -1,5 +1,5 @@ /* - * tests/include/rangecount.h + * tests/include/rangequery.h * * Standardized unit tests for range queries against supporting * shard types @@ -17,6 +17,9 @@ */ #pragma once +#include "query/rangecount.h" +#include <algorithm> + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,30 +27,29 @@ * should be included in the source file that includes this one, above the * include statement. */ -//#include "shard/ISAMTree.h" -//#include "query/rangecount.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include "testing.h" +// #include <check.h> +// using namespace de; -#include "query/rangecount.h" +// typedef Rec R; +// typedef ISAMTree<R> Shard; +// typedef rc::Query<ISAMTree<R>> Query; START_TEST(t_range_count) { - auto buffer = create_sequential_mbuffer<R>(100, 1000); auto shard = Shard(buffer->get_buffer_view()); - rc::Parms<R> parms = {300, 500}; + rc::Query<Shard>::Parameters parms = {300, 500}; - auto state = rc::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rc::Query<R, Shard>::query(&shard, state, &parms); - rc::Query<R, Shard>::delete_query_state(state); + auto local_query = rc::Query<Shard>::local_preproc(&shard, &parms); + + auto result = rc::Query<Shard>::local_query(&shard, local_query); + delete local_query; - ck_assert_int_eq(result.size(), 1); - ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1); delete buffer; } @@ -58,16 +60,15 @@ START_TEST(t_buffer_range_count) { auto buffer = create_sequential_mbuffer<R>(100, 1000); - rc::Parms<R> parms = {300, 500}; + rc::Query<Shard>::Parameters parms = {300, 500}; { auto view = buffer->get_buffer_view(); - auto state = rc::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rc::Query<R, Shard>::buffer_query(state, &parms); - rc::Query<R, Shard>::delete_buffer_query_state(state); + auto query = rc::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = rc::Query<Shard>::local_query_buffer(query); + delete query; - ck_assert_int_eq(result.size(), 1); - ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1); } delete buffer; @@ -83,66 +84,31 @@ START_TEST(t_range_count_merge) auto shard1 = Shard(buffer1->get_buffer_view()); auto shard2 = Shard(buffer2->get_buffer_view()); - rc::Parms<R> parms = {150, 500}; + rc::Query<Shard>::Parameters parms = {150, 500}; size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - auto state1 = rc::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rc::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rc::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rc::Query<R, Shard>::query(&shard2, state2, &parms); - - rc::Query<R, Shard>::delete_query_state(state1); - rc::Query<R, Shard>::delete_query_state(state2); + auto query1 = rc::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = rc::Query<Shard>::local_preproc(&shard2, &parms); - ck_assert_int_eq(results[0].size(), 1); - ck_assert_int_eq(results[1].size(), 1); + std::vector<std::vector<rc::Query<Shard>::LocalResultType>> results(2); + results[0] = rc::Query<Shard>::local_query(&shard1, query1); + results[1] = rc::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; - std::vector<R> result; - rc::Query<R, Shard>::merge(results, nullptr, result); + size_t reccnt = results[0][0].record_count + results[1][0].record_count; + size_t tscnt = results[0][0].tombstone_count + results[1][0].tombstone_count; - ck_assert_int_eq(result[0].key, result_size); - - delete buffer1; - delete buffer2; -} -END_TEST + ck_assert_int_eq(reccnt - tscnt, result_size); + std::vector<rc::Query<Shard>::ResultType> result; + rc::Query<Shard>::combine(results, nullptr, result); -START_TEST(t_lower_bound) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = new Shard(buffer1->get_buffer_view()); - auto shard2 = new Shard(buffer2->get_buffer_view()); - - std::vector<Shard*> shards = {shard1, shard2}; - - auto merged = Shard(shards); - - for (uint32_t i=100; i<1000; i++) { - R r = R{i, i}; - - auto idx = merged.get_lower_bound(i); - - assert(idx < merged.get_record_count()); - - auto res = merged.get_record_at(idx); - - if (i >=200 && i <400) { - ck_assert_int_lt(res->rec.key, i); - } else { - ck_assert_int_eq(res->rec.key, i); - } - } + ck_assert_int_eq(result[0], result_size); delete buffer1; delete buffer2; - delete shard1; - delete shard2; } END_TEST diff --git a/tests/include/rangequery.h b/tests/include/rangequery.h index f90e107..5c3c1d6 100644 --- a/tests/include/rangequery.h +++ b/tests/include/rangequery.h @@ -17,6 +17,9 @@ */ #pragma once +#include "query/rangequery.h" +#include <algorithm> + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,26 +27,27 @@ * should be included in the source file that includes this one, above the * include statement. */ -//#include "shard/ISAMTree.h" -//#include "query/rangequery.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - -#include "query/rangequery.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include "testing.h" +// #include <check.h> +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> Shard; +// typedef rq::Query<ISAMTree<R>> Query; START_TEST(t_range_query) { auto buffer = create_sequential_mbuffer<R>(100, 1000); auto shard = Shard(buffer->get_buffer_view()); - rq::Parms<R> parms = {300, 500}; + rq::Query<Shard>::Parameters parms = {300, 500}; - auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rq::Query<R, Shard>::query(&shard, state, &parms); - rq::Query<R, Shard>::delete_query_state(state); + auto local_query = rq::Query<Shard>::local_preproc(&shard, &parms); + + auto result = rq::Query<Shard>::local_query(&shard, local_query); + delete local_query; ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); for (size_t i=0; i<result.size(); i++) { @@ -60,13 +64,13 @@ START_TEST(t_buffer_range_query) { auto buffer = create_sequential_mbuffer<R>(100, 1000); - rq::Parms<R> parms = {300, 500}; + rq::Query<Shard>::Parameters parms = {300, 500}; { auto view = buffer->get_buffer_view(); - auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rq::Query<R, Shard>::buffer_query(state, &parms); - rq::Query<R, Shard>::delete_buffer_query_state(state); + auto query = rq::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = rq::Query<Shard>::local_query_buffer(query); + delete query; ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); for (size_t i=0; i<result.size(); i++) { @@ -88,19 +92,18 @@ START_TEST(t_range_query_merge) auto shard1 = Shard(buffer1->get_buffer_view()); auto shard2 = Shard(buffer2->get_buffer_view()); - rq::Parms<R> parms = {150, 500}; + rq::Query<Shard>::Parameters parms = {150, 500}; size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); + auto query1 = rq::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = rq::Query<Shard>::local_preproc(&shard2, &parms); - rq::Query<R, Shard>::delete_query_state(state1); - rq::Query<R, Shard>::delete_query_state(state2); + std::vector<std::vector<rq::Query<Shard>::LocalResultType>> results(2); + results[0] = rq::Query<Shard>::local_query(&shard1, query1); + results[1] = rq::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; ck_assert_int_eq(results[0].size() + results[1].size(), result_size); @@ -113,8 +116,8 @@ START_TEST(t_range_query_merge) } } - std::vector<R> result; - rq::Query<R, Shard>::merge(proc_results, nullptr, result); + std::vector<rq::Query<Shard>::ResultType> result; + rq::Query<Shard>::combine(proc_results, nullptr, result); std::sort(result.begin(), result.end()); ck_assert_int_eq(result.size(), result_size); @@ -145,8 +148,6 @@ START_TEST(t_lower_bound) auto merged = Shard(shards); for (uint32_t i=100; i<1000; i++) { - R r = R{i, i}; - auto idx = merged.get_lower_bound(i); assert(idx < merged.get_record_count()); diff --git a/tests/include/shard_standard.h b/tests/include/shard_standard.h index 2809d74..ece2a57 100644 --- a/tests/include/shard_standard.h +++ b/tests/include/shard_standard.h @@ -75,7 +75,6 @@ START_TEST(t_shard_init) ck_assert_int_eq(shard4->get_record_count(), n * 3); ck_assert_int_eq(shard4->get_tombstone_count(), 0); - size_t total_cnt = 0; size_t shard1_idx = 0; size_t shard2_idx = 0; size_t shard3_idx = 0; diff --git a/tests/include/shard_string.h b/tests/include/shard_string.h index 881f41a..2ef4cec 100644 --- a/tests/include/shard_string.h +++ b/tests/include/shard_string.h @@ -73,7 +73,6 @@ START_TEST(t_shard_init) ck_assert_int_eq(shard4->get_record_count(), n * 3); ck_assert_int_eq(shard4->get_tombstone_count(), 0); - size_t total_cnt = 0; size_t shard1_idx = 0; size_t shard2_idx = 0; size_t shard3_idx = 0; diff --git a/tests/include/testing.h b/tests/include/testing.h index d0bff2d..33cbb3f 100644 --- a/tests/include/testing.h +++ b/tests/include/testing.h @@ -34,7 +34,7 @@ static std::string summa_wordlist = "tests/data/summa-wordlist.txt"; static std::vector<std::unique_ptr<char[]>> string_data; -static std::vector<StringRec> read_string_data(std::string fname, size_t n) { +[[maybe_unused]] static std::vector<StringRec> read_string_data(std::string fname, size_t n) { std::vector<StringRec> vec; vec.reserve(n); string_data.reserve(n); @@ -50,14 +50,14 @@ static std::vector<StringRec> read_string_data(std::string fname, size_t n) { std::string field; std::getline(ls, field, '\t'); - auto val = atol(field.c_str()); + uint64_t val = atol(field.c_str()); std::getline(ls, field, '\n'); char *c = strdup(field.c_str()); string_data.push_back(std::unique_ptr<char[]>(c)); - StringRec r(string_data[string_data.size() -1].get(), val, field.size()); + StringRec r{string_data[string_data.size() -1].get(), val, field.size()}; vec.push_back(r); } @@ -76,7 +76,7 @@ std::vector<R> strip_wrapping(std::vector<de::Wrapped<R>> vec) { return out; } -static bool initialize_test_file(std::string fname, size_t page_cnt) +[[maybe_unused]] static bool initialize_test_file(std::string fname, size_t page_cnt) { auto flags = O_RDWR | O_CREAT | O_TRUNC; mode_t mode = 0640; @@ -113,7 +113,7 @@ error: return 0; } -static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) { +[[maybe_unused]] static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) { return ((double) std::abs(n1 - n2) / (double) mag) < epsilon; } diff --git a/tests/include/wirs.h b/tests/include/wirs.h deleted file mode 100644 index 4c0630f..0000000 --- a/tests/include/wirs.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * tests/include/rangequery.h - * - * Standardized unit tests for range queries against supporting - * shard types - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * Distributed under the Modified BSD License. - * - * WARNING: This file must be included in the main unit test set - * after the definition of an appropriate Shard and R - * type. In particular, R needs to implement the key-value - * pair interface and Shard needs to support lower_bound. - * For other types of record and shard, you'll need to - * use a different set of unit tests. - */ -#pragma once - -/* - * Uncomment these lines temporarily to remove errors in this file - * temporarily for development purposes. They should be removed prior - * to building, to ensure no duplicate definitions. These includes/defines - * should be included in the source file that includes this one, above the - * include statement. - */ -//#include "shard/ISAMTree.h" -//#include "query/rangequery.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - - -START_TEST(t_range_query) -{ - auto buffer = create_sequential_mbuffer<R>(100, 1000); - auto shard = Shard(buffer->get_buffer_view()); - - rq::Parms<R> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rq::Query<R, Shard>::query(&shard, state, &parms); - rq::Query<R, Shard>::delete_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_buffer_range_query) -{ - auto buffer = create_sequential_mbuffer<R>(100, 1000); - - rq::Parms<R> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - { - auto view = buffer->get_buffer_view(); - auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rq::Query<R, Shard>::buffer_query(state, &parms); - rq::Query<R, Shard>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - } - - delete buffer; -} -END_TEST - - -START_TEST(t_range_query_merge) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = Shard(buffer1->get_buffer_view()); - auto shard2 = Shard(buffer2->get_buffer_view()); - - rq::Parms<R> parms; - parms.lower_bound = 150; - parms.upper_bound = 500; - - size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - - auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); - - rq::Query<R, Shard>::delete_query_state(state1); - rq::Query<R, Shard>::delete_query_state(state2); - - ck_assert_int_eq(results[0].size() + results[1].size(), result_size); - - std::vector<std::vector<Wrapped<R>>> proc_results; - - for (size_t j=0; j<results.size(); j++) { - proc_results.emplace_back(std::vector<Wrapped<R>>()); - for (size_t i=0; i<results[j].size(); i++) { - proc_results[j].emplace_back(results[j][i]); - } - } - - std::vector<R> result; - rq::Query<R, Shard>::merge(proc_results, nullptr, result); - std::sort(result.begin(), result.end()); - - ck_assert_int_eq(result.size(), result_size); - auto key = parms.lower_bound; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(key++, result[i].key); - if (key == 200) { - key = 400; - } - } - - delete buffer1; - delete buffer2; -} -END_TEST - - -START_TEST(t_lower_bound) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = new Shard(buffer1->get_buffer_view()); - auto shard2 = new Shard(buffer2->get_buffer_view()); - - std::vector<Shard*> shards = {shard1, shard2}; - - auto merged = Shard(shards); - - for (size_t i=100; i<1000; i++) { - R r; - r.key = i; - r.value = i; - - auto idx = merged.get_lower_bound(i); - - assert(idx < merged.get_record_count()); - - auto res = merged.get_record_at(idx); - - if (i >=200 && i <400) { - ck_assert_int_lt(res->rec.key, i); - } else { - ck_assert_int_eq(res->rec.key, i); - } - } - - delete buffer1; - delete buffer2; - delete shard1; - delete shard2; -} -END_TEST - -static void inject_rangequery_tests(Suite *suite) { - TCase *range_query = tcase_create("Range Query Testing"); - tcase_add_test(range_query, t_range_query); - tcase_add_test(range_query, t_buffer_range_query); - tcase_add_test(range_query, t_range_query_merge); - suite_add_tcase(suite, range_query); -} diff --git a/tests/include/wss.h b/tests/include/wss.h index f0ac74c..01327d2 100644 --- a/tests/include/wss.h +++ b/tests/include/wss.h @@ -1,10 +1,10 @@ /* - * tests/include/rangequery.h + * tests/include/wss.h * - * Standardized unit tests for range queries against supporting + * Standardized unit tests for weighted set sampling against supporting * shard types * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -17,6 +17,8 @@ */ #pragma once +#include "query/wss.h" + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,28 +26,38 @@ * should be included in the source file that includes this one, above the * include statement. */ -#include "shard/Alias.h" -#include "testing.h" -#include <check.h> -using namespace de; -typedef Alias<R> Shard; +// #include "framework/interface/Record.h" +// #include "shard/Alias.h" +// #include "testing.h" +// #include <check.h> -#include "query/wss.h" +// using namespace de; + +// typedef WeightedRecord<int64_t, int32_t, int32_t> R; +// typedef Alias<R> Shard; + +typedef wss::Query<Shard> Q; START_TEST(t_wss_query) { auto buffer = create_weighted_mbuffer<R>(1000); auto shard = Shard(buffer->get_buffer_view()); - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - wss::Parms<R> parms; + size_t k = 20; + + Q::Parameters parms; parms.rng = rng; - parms.sample_size = 20; + parms.sample_size = k; + + auto query = Q::local_preproc(&shard, &parms); + Q::distribute_query(&parms, {query}, nullptr); + + auto result = Q::local_query(&shard, query); + delete query; + + ck_assert_int_eq(result.size(), k); - auto state = wss::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = wss::Query<R, Shard>::query(&shard, state, &parms); - wss::Query<R, Shard>::delete_query_state(state); delete buffer; gsl_rng_free(rng); @@ -56,83 +68,28 @@ END_TEST START_TEST(t_buffer_wss_query) { auto buffer = create_weighted_mbuffer<R>(1000); - - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - wss::Parms<R> parms; + size_t k = 20; + + Q::Parameters parms; parms.rng = rng; + parms.sample_size = k; { auto view = buffer->get_buffer_view(); - auto state = wss::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = wss::Query<R, Shard>::buffer_query(state, &parms); - wss::Query<R, Shard>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.sample_size); - for (size_t i=0; i<result.size(); i++) { - - } - } - - delete buffer; -} -END_TEST + auto query = Q::local_preproc_buffer(&view, &parms); + Q::distribute_query(&parms, {}, query); + auto result = Q::local_query_buffer(query); - -/* -START_TEST(t_range_query_merge) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = Shard(buffer1->get_buffer_view()); - auto shard2 = Shard(buffer2->get_buffer_view()); - - wss::Parms<R> parms; - parms.lower_bound = 150; - parms.upper_bound = 500; - - size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - - auto state1 = wss::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = wss::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = wss::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = wss::Query<R, Shard>::query(&shard2, state2, &parms); - - wss::Query<R, Shard>::delete_query_state(state1); - wss::Query<R, Shard>::delete_query_state(state2); - - ck_assert_int_eq(results[0].size() + results[1].size(), result_size); - - std::vector<std::vector<Wrapped<R>>> proc_results; - - for (size_t j=0; j<results.size(); j++) { - proc_results.emplace_back(std::vector<Wrapped<R>>()); - for (size_t i=0; i<results[j].size(); i++) { - proc_results[j].emplace_back(results[j][i]); - } - } - - auto result = wss::Query<R, Shard>::merge(proc_results, nullptr); - std::sort(result.begin(), result.end()); - - ck_assert_int_eq(result.size(), result_size); - auto key = parms.lower_bound; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(key++, result[i].key); - if (key == 200) { - key = 400; - } + delete query; + ck_assert_int_le(result.size(), k); } - delete buffer1; - delete buffer2; + delete buffer; + gsl_rng_free(rng); } END_TEST -*/ static void inject_wss_tests(Suite *suite) { diff --git a/tests/internal_level_tests.cpp b/tests/internal_level_tests.cpp index 06b0bab..e11b7c7 100644 --- a/tests/internal_level_tests.cpp +++ b/tests/internal_level_tests.cpp @@ -22,7 +22,7 @@ using namespace de; -typedef InternalLevel<Rec, ISAMTree<Rec>, rq::Query<Rec, ISAMTree<Rec>>> ILevel; +typedef InternalLevel<ISAMTree<Rec>, rq::Query<ISAMTree<Rec>>> ILevel; START_TEST(t_memlevel_merge) { diff --git a/tests/irs_tests.cpp b/tests/irs_tests.cpp new file mode 100644 index 0000000..6ef03f4 --- /dev/null +++ b/tests/irs_tests.cpp @@ -0,0 +1,55 @@ +/* + * tests/rangequery_tests.cpp + * + * Unit tests for Range Queries across several different + * shards + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "include/testing.h" + +#include <check.h> + +using namespace de; + +typedef Rec R; +typedef ISAMTree<R> Shard; + +#include "include/irs.h" + +Suite *unit_testing() +{ + Suite *unit = suite_create("Independent Range Sampling Query Testing"); + inject_irs_tests(unit); + + return unit; +} + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + gsl_rng_free(g_rng); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/mutable_buffer_tests.cpp b/tests/mutable_buffer_tests.cpp index 26057a2..c3e1b34 100644 --- a/tests/mutable_buffer_tests.cpp +++ b/tests/mutable_buffer_tests.cpp @@ -323,13 +323,11 @@ START_TEST(t_bview_delete) /* insert 75 records and get tail when LWM is exceeded */ size_t new_head = 0; Rec rec = {1, 1}; - size_t cnt = 0; for (size_t i=0; i<75; i++) { ck_assert_int_eq(buffer->append(rec), 1); rec.key++; rec.value++; - cnt++; if (buffer->is_at_low_watermark() && new_head == 0) { new_head = buffer->get_tail(); @@ -343,7 +341,6 @@ START_TEST(t_bview_delete) rec.key++; rec.value++; - cnt++; } Rec dr1 = {67, 67}; diff --git a/tests/vptree_tests.cpp b/tests/vptree_tests.cpp index ff99ba6..faa704f 100644 --- a/tests/vptree_tests.cpp +++ b/tests/vptree_tests.cpp @@ -20,6 +20,8 @@ using namespace de; typedef PRec R; typedef VPTree<R> Shard; +typedef knn::Query<Shard> Q; + START_TEST(t_mbuffer_init) { @@ -123,15 +125,15 @@ START_TEST(t_buffer_query) target.data[0] = 120; target.data[1] = 120; - knn::Parms<PRec> p; + Q::Parameters p; p.k = 10; p.point = target; { auto bv = buffer->get_buffer_view(); - auto state = knn::Query<PRec, Shard>::get_buffer_query_state(&bv, &p); - auto result = knn::Query<PRec, Shard>::buffer_query(state, &p); - knn::Query<PRec, Shard>::delete_buffer_query_state(state); + auto query = Q::local_preproc_buffer(&bv, &p); + auto result = Q::local_query_buffer(query); + delete query; std::sort(result.begin(), result.end()); size_t start = 120 - 5; @@ -150,15 +152,16 @@ START_TEST(t_knn_query) auto vptree = VPTree<PRec>(buffer->get_buffer_view()); - knn::Parms<PRec> p; + Q::Parameters p; + for (size_t i=0; i<100; i++) { p.k = rand() % 150; p.point.data[0] = rand() % (n-p.k); p.point.data[1] = p.point.data[0]; - auto state = knn::Query<PRec, Shard>::get_query_state(&vptree, &p); - auto results = knn::Query<PRec, Shard>::query(&vptree, state, &p); - knn::Query<PRec, Shard>::delete_query_state(state); + auto query = Q::local_preproc(&vptree, &p); + auto results = Q::local_query(&vptree, query); + delete query; ck_assert_int_eq(results.size(), p.k); diff --git a/tests/wss_tests.cpp b/tests/wss_tests.cpp new file mode 100644 index 0000000..39e9d6e --- /dev/null +++ b/tests/wss_tests.cpp @@ -0,0 +1,56 @@ +/* + * tests/rangequery_tests.cpp + * + * Unit tests for Range Queries across several different + * shards + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "include/testing.h" + +#include <check.h> + +using namespace de; + +typedef Rec R; +typedef ISAMTree<Rec> Shard; + +#include "include/wss.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("Range Count Query Testing"); + inject_rangecount_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} |