diff options
| author | Douglas B. Rumbaugh <dbr4@psu.edu> | 2024-02-09 14:06:59 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-09 14:06:59 -0500 |
| commit | bc0f3cca3a5b495fcae1d3ad8d09e6d714da5d30 (patch) | |
| tree | 66333c55feb0ea8875a50e6dc07c8535d241bf1c | |
| parent | 076e104b8672924c3d80cd1da2fdb5ebee1766ac (diff) | |
| parent | 46885246313358a3b606eca139b20280e96db10e (diff) | |
| download | dynamic-extension-bc0f3cca3a5b495fcae1d3ad8d09e6d714da5d30.tar.gz | |
Merge pull request #1 from dbrumbaugh/new-buffer
Initial Concurrency Implementation
101 files changed, 8893 insertions, 6298 deletions
diff --git a/.gitmodules b/.gitmodules index 7616a12..de448fe 100644 --- a/.gitmodules +++ b/.gitmodules @@ -15,4 +15,7 @@ url = https://github.com/microsoft/ALEX [submodule "external/psudb-common"] path = external/psudb-common - url = git@github.com:PSU-Database-Systems-Group/psudb-common + url = git@github.com/psu-db/psudb-common +[submodule "external/ctpl"] + path = external/ctpl + url = git@github.com:vit-vit/CTPL.git diff --git a/CMakeLists.txt b/CMakeLists.txt index f642714..81fdb63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,11 +8,15 @@ project("Practical Dynamic Extension" VERSION 0.1.0) set(debug false) set(tests True) -set(bench false) +set(bench true) +set(old_bench False) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin") +set(CMAKE_CXX_FLAGS=-latomic -mcx16) -add_compile_options(-Iinclude -Iexternal/PLEX/include) +add_compile_options(-Iinclude -Iexternal/PLEX/include -Iexternal -mcx16 -fconcepts-diagnostics-depth=3) if (debug) add_compile_options(-g -O0) @@ -29,64 +33,141 @@ if (tests) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/tests") file(MAKE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/tests/data") - add_executable(wirs_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/wirs_tests.cpp) - target_link_libraries(wirs_tests PUBLIC gsl check subunit pthread) - target_include_directories(wirs_tests PRIVATE include external/psudb-common/cpp/include) + add_executable(augbtree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/augbtree_tests.cpp) + target_link_libraries(augbtree_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(augbtree_tests PUBLIC -mcx16) + target_include_directories(augbtree_tests PRIVATE include external/psudb-common/cpp/include external/ctpl) add_executable(internal_level_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/internal_level_tests.cpp) - target_link_libraries(internal_level_tests PUBLIC gsl check subunit pthread) + target_link_libraries(internal_level_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(internal_level_tests PUBLIC -mcx16) target_include_directories(internal_level_tests PRIVATE include external/psudb-common/cpp/include) add_executable(mutable_buffer_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/mutable_buffer_tests.cpp) - target_link_libraries(mutable_buffer_tests PUBLIC gsl check subunit pthread) + target_link_libraries(mutable_buffer_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(mutable_buffer_tests PUBLIC -mcx16) target_include_directories(mutable_buffer_tests PRIVATE include external/psudb-common/cpp/include) - add_executable(vptree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/vptree_tests.cpp) - target_link_libraries(vptree_tests PUBLIC gsl check subunit pthread) - target_include_directories(vptree_tests PRIVATE include external/vptree external/psudb-common/cpp/include) + add_executable(rangequery_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/rangequery_tests.cpp) + target_link_libraries(rangequery_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(rangequery_tests PUBLIC -mcx16) + target_include_directories(rangequery_tests PRIVATE include external/psudb-common/cpp/include) - #add_executable(dynamic_extension_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/dynamic_extension_tests.cpp) - #target_link_libraries(dynamic_extension_tests PUBLIC gsl check subunit pthread) - #target_include_directories(dynamic_extension_tests PRIVATE include) + add_executable(rangecount_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/rangecount_tests.cpp) + target_link_libraries(rangecount_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(rangecount_tests PUBLIC -mcx16) + target_include_directories(rangecount_tests PRIVATE include external/psudb-common/cpp/include) + + + add_executable(vptree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/vptree_tests.cpp) + target_link_libraries(vptree_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(vptree_tests PUBLIC -mcx16) + target_include_directories(vptree_tests PRIVATE include external/vptree external/psudb-common/cpp/include) + add_executable(de_tier_tag ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_tier_tag.cpp) - target_link_libraries(de_tier_tag PUBLIC gsl check subunit pthread) - target_include_directories(de_tier_tag PRIVATE include external/psudb-common/cpp/include) + target_link_libraries(de_tier_tag PUBLIC gsl check subunit pthread atomic) + target_link_options(de_tier_tag PUBLIC -mcx16) + target_include_directories(de_tier_tag PRIVATE include external/psudb-common/cpp/include external) add_executable(de_tier_tomb ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_tier_tomb.cpp) - target_link_libraries(de_tier_tomb PUBLIC gsl check subunit pthread) - target_include_directories(de_tier_tomb PRIVATE include external/psudb-common/cpp/include) + target_link_libraries(de_tier_tomb PUBLIC gsl check subunit pthread atomic) + target_link_options(de_tier_tomb PUBLIC -mcx16) + target_include_directories(de_tier_tomb PRIVATE include external/PLEX/include external/psudb-common/cpp/include external) add_executable(de_level_tag ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_level_tag.cpp) - target_link_libraries(de_level_tag PUBLIC gsl check subunit pthread) - target_include_directories(de_level_tag PRIVATE include external/psudb-common/cpp/include) + target_link_libraries(de_level_tag PUBLIC gsl check subunit pthread atomic) + target_link_options(de_level_tag PUBLIC -mcx16) + target_include_directories(de_level_tag PRIVATE include external/psudb-common/cpp/include external) add_executable(de_level_tomb ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_level_tomb.cpp) - target_link_libraries(de_level_tomb PUBLIC gsl check subunit pthread) - target_include_directories(de_level_tomb PRIVATE include external/psudb-common/cpp/include) + target_link_libraries(de_level_tomb PUBLIC gsl check subunit pthread atomic) + target_link_options(de_level_tomb PUBLIC -mcx16) + target_include_directories(de_level_tomb PRIVATE include external/ctpl external/PLEX/include external/psudb-common/cpp/include external) + + add_executable(de_level_concurrent ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_level_concurrent.cpp) + target_link_libraries(de_level_concurrent PUBLIC gsl check subunit pthread atomic) + target_link_options(de_level_concurrent PUBLIC -mcx16) + target_include_directories(de_level_concurrent PRIVATE include external/ctpl external/PLEX/include external/psudb-common/cpp/include external) + + add_executable(de_tier_concurrent ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_tier_concurrent.cpp) + target_link_libraries(de_tier_concurrent PUBLIC gsl check subunit pthread atomic) + target_link_options(de_tier_concurrent PUBLIC -mcx16) + target_include_directories(de_tier_concurrent PRIVATE include external/ctpl external/PLEX/include external/psudb-common/cpp/include external) add_executable(memisam_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/memisam_tests.cpp) - target_link_libraries(memisam_tests PUBLIC gsl check subunit pthread) + target_link_libraries(memisam_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(memisam_tests PUBLIC -mcx16) target_include_directories(memisam_tests PRIVATE include external/psudb-common/cpp/include) - add_executable(wss_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/wss_tests.cpp) - target_link_libraries(wss_tests PUBLIC gsl check subunit pthread) - target_include_directories(wss_tests PRIVATE include external/psudb-common/cpp/include) - add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp) - target_link_libraries(triespline_tests PUBLIC gsl check subunit pthread) - target_include_directories(triespline_tests PRIVATE include external/PLEX/include external/psudb-common/cpp/include) + target_link_libraries(triespline_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(triespline_tests PUBLIC -mcx16) + target_include_directories(triespline_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include) + + add_executable(alias_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/alias_tests.cpp) + target_link_libraries(alias_tests PUBLIC gsl check subunit pthread atomic) + target_link_options(alias_tests PUBLIC -mcx16) + target_include_directories(alias_tests PRIVATE include external/psudb-common/cpp/include) add_executable(pgm_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/pgm_tests.cpp) - target_link_libraries(pgm_tests PUBLIC gsl check subunit pthread gomp) + target_link_libraries(pgm_tests PUBLIC gsl check subunit pthread gomp atomic) target_include_directories(pgm_tests PRIVATE include external/PGM-index/include external/psudb-common/cpp/include) + target_link_options(pgm_tests PUBLIC -mcx16) target_compile_options(pgm_tests PUBLIC -fopenmp) endif() -# Benchmark build instructions -if (bench) +if (bench) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/benchmarks") + add_executable(reconstruction_interference ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/reconstruction_interference.cpp) + target_link_libraries(reconstruction_interference PUBLIC gsl pthread gomp atomic) + target_link_options(reconstruction_interference PUBLIC -mcx16) + target_include_directories(reconstruction_interference PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + + add_executable(insertion_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insertion_tput.cpp) + target_link_libraries(insertion_tput PUBLIC gsl pthread gomp atomic) + target_include_directories(insertion_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(insertion_tput PUBLIC -mcx16) + + add_executable(query_workload_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/query_workload_bench.cpp) + target_link_libraries(query_workload_bench PUBLIC gsl pthread gomp atomic) + target_include_directories(query_workload_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(query_workload_bench PUBLIC -mcx16) + + add_executable(insert_query_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insert_query_tput.cpp) + target_link_libraries(insert_query_tput PUBLIC gsl pthread gomp atomic) + target_include_directories(insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(insert_query_tput PUBLIC -mcx16) + + + add_executable(btree_insert_query_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/btree_insert_query_tput.cpp) + target_link_libraries(btree_insert_query_tput PUBLIC gsl pthread gomp atomic) + target_include_directories(btree_insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(btree_insert_query_tput PUBLIC -mcx16) + + add_executable(watermark_testing ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/watermark_testing.cpp) + target_link_libraries(watermark_testing PUBLIC gsl pthread gomp atomic) + target_include_directories(watermark_testing PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(watermark_testing PUBLIC -mcx16) + + add_executable(irs_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/irs_bench.cpp) + target_link_libraries(irs_bench PUBLIC gsl pthread gomp atomic) + target_include_directories(irs_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(irs_bench PUBLIC -mcx16) + + #add_executable(static_dynamic_comp ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/static_dynamic_comp.cpp) + #target_link_libraries(static_dynamic_comp PUBLIC gsl pthread gomp atomic) + #target_include_directories(static_dynamic_comp PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + #target_link_options(static_dynamic_comp PUBLIC -mcx16) + + add_executable(insert_tail_latency ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insert_tail_latency.cpp) + target_link_libraries(insert_tail_latency PUBLIC gsl pthread gomp atomic) + target_include_directories(insert_tail_latency PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include) + target_link_options(insert_tail_latency PUBLIC -mcx16) +endif() + +if (old_bench) add_executable(alias_wss_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/alias_wss_bench.cpp) target_link_libraries(alias_wss_bench PUBLIC gsl pthread gomp) target_include_directories(alias_wss_bench PRIVATE include external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include) @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2023, Douglas Rumbaugh and Dong Xie +Copyright (c) 2023, Douglas B. Rumbaugh and Dong Xie Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/benchmarks/btree_insert_query_tput.cpp b/benchmarks/btree_insert_query_tput.cpp new file mode 100644 index 0000000..f838f80 --- /dev/null +++ b/benchmarks/btree_insert_query_tput.cpp @@ -0,0 +1,120 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "query/irs.h" +#include "include/data-proc.h" +#include "psu-ds/BTree.h" +#include <mutex> + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::irs::Parms<Rec> QP; + +std::atomic<bool> inserts_done = false; + +std::mutex g_btree_lock; + +void query_thread(BenchBTree *tree, std::vector<QP> *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + + std::vector<int64_t> result; + g_btree_lock.lock(); + tree->range_sample(q.lower_bound, q.upper_bound, 1000, result, rng); + g_btree_lock.unlock(); + + total += result.size(); + usleep(1); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(BenchBTree *tree, size_t start, std::vector<int64_t> *records) { + size_t reccnt = 0; + for (size_t i=start; i<records->size(); i++) { + btree_record r; + r.key = (*records)[i]; + r.value = i; + + g_btree_lock.lock(); + tree->insert(r); + g_btree_lock.unlock(); + + if (i % 100000 == 0) { + fprintf(stderr, "Inserted %ld records\n", i); + } + } + + inserts_done.store(true); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "btree_insert_query_tput reccnt query_threads datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); + + auto tree = new BenchBTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; i<warmup; i++) { + btree_record r; + r.key = data[i]; + r.value = i; + + tree->insert(r); + } + + TIMER_INIT(); + + std::vector<std::thread> qthreads(qthread_cnt); + + TIMER_START(); + std::thread i_thrd(insert_thread, tree, warmup, &data); + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i] = std::thread(query_thread, tree, &queries); + } + i_thrd.join(); + TIMER_STOP(); + + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i].join(); + } + + auto total_latency = TIMER_RESULT(); + size_t throughput = (size_t) ((double) (n - warmup) / (double) total_latency * 1e9); + fprintf(stdout, "T\t%ld\t%ld\n", total_latency, throughput); + + gsl_rng_free(rng); + delete tree; + fflush(stderr); +} + diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h new file mode 100644 index 0000000..571c073 --- /dev/null +++ b/benchmarks/include/btree-util.h @@ -0,0 +1,27 @@ +#pragma once + +#include <cstdlib> +#include "psu-ds/BTree.h" + +struct btree_record { + int64_t key; + int64_t value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const int64_t &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree; + + diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h new file mode 100644 index 0000000..444cb94 --- /dev/null +++ b/benchmarks/include/data-proc.h @@ -0,0 +1,258 @@ +#include <cstdlib> +#include <cstdio> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> +#include <gsl/gsl_rng.h> +#include <cstring> +#include <vector> + +#include "psu-ds/BTree.h" + +#pragma once + +typedef int64_t key_type; +typedef int64_t value_type; +typedef uint64_t weight_type; + +static gsl_rng *g_rng; +static bool g_osm_data; + +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree; + +static key_type g_min_key = UINT64_MAX; +static key_type g_max_key = 0; + +static size_t g_max_record_cnt = 0; +static size_t g_reccnt = 0; + +static constexpr unsigned int DEFAULT_SEED = 0; + +static unsigned int get_random_seed() +{ + unsigned int seed = 0; + std::fstream urandom; + urandom.open("/dev/urandom", std::ios::in|std::ios::binary); + urandom.read((char *) &seed, sizeof(seed)); + urandom.close(); + + return seed; +} + +static key_type osm_to_key(const char *key_field) { + double tmp_key = (atof(key_field) + 180) * 10e6; + return (key_type) tmp_key; +} + +static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) +{ + g_rng = gsl_rng_alloc(type); + gsl_rng_set(g_rng, seed); +} + +static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) +{ + unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; + init_bench_rng(seed, gsl_rng_mt19937); + g_osm_data = osm_correction; + g_max_record_cnt = max_reccnt; + g_reccnt = 0; +} + +static void delete_bench_env() +{ + gsl_rng_free(g_rng); +} + + +template <typename QP> +static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) { + std::vector<QP> queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template <typename QP> +static std::vector<QP> read_range_queries(std::string &fname, double selectivity) { + std::vector<QP> queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template <typename QP> +static std::vector<QP> read_knn_queries(std::string fname, size_t k) { + std::vector<QP> queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +/* + * NOTE: The QP type must have lower_bound and upper_bound attributes, which + * this function will initialize. Any other query parameter attributes must + * be manually initialized after the call. + */ +template <typename R> +static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + +template <typename R> +static bool next_record(std::fstream &file, R &record, bool binary=false) +{ + static value_type value = 1; + if (g_reccnt >= g_max_record_cnt) return false; + + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + std::string key_field; + std::string value_field; + std::string weight_field; + + std::getline(line_stream, value_field, '\t'); + std::getline(line_stream, key_field, '\t'); + std::getline(line_stream, weight_field, '\t'); + + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + g_reccnt++; + + return true; + } + + return false; +} + +template <typename R> +static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} + +static std::vector<int64_t> read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector<int64_t> records(n); + for (size_t i=0; i<n; i++) { + file.read((char*) &(records[i]), sizeof(int64_t)); + } + + return records; +} diff --git a/benchmarks/insert_query_tput.cpp b/benchmarks/insert_query_tput.cpp new file mode 100644 index 0000000..ce05264 --- /dev/null +++ b/benchmarks/insert_query_tput.cpp @@ -0,0 +1,121 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "include/data-proc.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::irs::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; +typedef de::irs::Parms<Rec> QP; + +std::atomic<bool> inserts_done = false; + +void query_thread(Ext *extension, std::vector<QP> *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + q.rng = rng; + q.sample_size = 1000; + + auto res = extension->query(&q); + auto r = res.get(); + total += r.size(); + usleep(1); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(Ext *extension, size_t start, std::vector<int64_t> *records) { + size_t reccnt = 0; + Rec r; + for (size_t i=start; i<records->size(); i++) { + r.key = (*records)[i]; + r.value = i; + + while (!extension->insert(r)) { + usleep(1); + } + } + + inserts_done.store(true); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "insert_query_tput reccnt query_threads datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); + + auto extension = new Ext(1000, 12000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + Rec r; + for (size_t i=0; i<warmup; i++) { + r.key = data[i]; + r.value = gsl_rng_uniform_int(rng, n); + + while (!extension->insert(r)) { + usleep(1); + } + } + + extension->await_next_epoch(); + + TIMER_INIT(); + + std::vector<std::thread> qthreads(qthread_cnt); + + TIMER_START(); + std::thread i_thrd(insert_thread, extension, warmup, &data); + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i] = std::thread(query_thread, extension, &queries); + } + i_thrd.join(); + TIMER_STOP(); + + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i].join(); + } + + auto total_latency = TIMER_RESULT(); + size_t throughput = (size_t) ((double) (n - warmup) / (double) total_latency * 1e9); + fprintf(stdout, "T\t%ld\t%ld\n", total_latency, throughput); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp new file mode 100644 index 0000000..bdc4536 --- /dev/null +++ b/benchmarks/insert_tail_latency.cpp @@ -0,0 +1,81 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include <unistd.h> +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::rc::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; + +std::atomic<size_t> total_latency = 0; + +void insert_thread(Ext *extension, size_t n, size_t k, size_t rate) { + int64_t delay = (1.0 / (double) rate) * 10e6; // delay in us + TIMER_INIT(); + for (int64_t i=0; i<n; i+=k) { + TIMER_START(); + for (int64_t j=0; j<k; j++) { + Rec r = {i+j, i+j}; + while (!extension->insert(r)) { + _mm_pause(); + } + + //usleep(delay); + /* + for (size_t i=0; i<10000; i++) { + __asm__ __volatile__ ("":::"memory"); + } + */ + } + TIMER_STOP(); + + auto insert_lat = TIMER_RESULT(); + + total_latency.fetch_add(insert_lat); + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", i+k, insert_lat, k); + } +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + auto extension = new Ext(12000, 12001, 3); + size_t n = 10000000; + size_t per_trial = 1000; + double selectivity = .001; + size_t rate = 1000000; + + total_latency.store(0); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::thread i_thrd1(insert_thread, extension, n, per_trial, rate); + //std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate); + + + i_thrd1.join(); + //i_thrd2.join(); + + auto avg_latency = total_latency.load() / n; + auto throughput = (int64_t) ((double) n / (double) total_latency * 1e9); + + fprintf(stdout, "AVG LAT: %ld\nThroughput: %ld\n", avg_latency, throughput); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/insertion_tput.cpp b/benchmarks/insertion_tput.cpp new file mode 100644 index 0000000..b4428f6 --- /dev/null +++ b/benchmarks/insertion_tput.cpp @@ -0,0 +1,72 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::rq::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; + + +void insert_thread(int64_t start, int64_t end, Ext *extension) { + for (int64_t i=start; i<end; i++) { + Rec r = {i, i}; + while (!extension->insert(r)) { + _mm_pause(); + } + } +} + + +int main(int argc, char **argv) { + + + size_t n = 1000000000; + + std::vector<int> counts = {1, 2, 4, 8}; //, 16, 32, 64}; + + + for (auto thread_count : counts) { + + auto extension = new Ext(1000, 12000, 8); + + size_t per_thread = n / thread_count; + + std::thread threads[thread_count]; + + TIMER_INIT(); + TIMER_START(); + for (size_t i=0; i<thread_count; i++) { + threads[i] = std::thread(insert_thread, i*per_thread, + i*per_thread+per_thread, extension); + } + + for (size_t i=0; i<thread_count; i++) { + threads[i].join(); + } + + TIMER_STOP(); + + auto total_time = TIMER_RESULT(); + + double tput = (double) n / (double) total_time * 1e9; + + fprintf(stdout, "%ld\t%d\t%lf\n", extension->get_record_count(), + thread_count, tput); + + delete extension; + } + + fflush(stderr); +} + diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp new file mode 100644 index 0000000..ddb4220 --- /dev/null +++ b/benchmarks/irs_bench.cpp @@ -0,0 +1,125 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "include/data-proc.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::irs::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::irs::Parms<Rec> QP; + +void run_queries(Ext *extension, std::vector<QP> &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; i<queries.size(); i++) { + auto q = &queries[i]; + q->rng = rng; + q->sample_size = 1000; + + auto res = extension->query(q); + auto r = res.get(); + total += r.size(); + } + + fprintf(stderr, "%ld\n", total); +} + +size_t g_deleted_records = 0; +double delete_proportion = 0.05; + +void insert_records(Ext *extension, size_t start, + size_t stop, + std::vector<int64_t> &records, + std::vector<size_t> &to_delete, + size_t &delete_idx, + bool delete_records, + gsl_rng *rng) { + size_t reccnt = 0; + Rec r; + for (size_t i=start; i<stop; i++) { + r.key = records[i]; + r.value = i; + + while (!extension->insert(r)) { + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { + r.key = records[to_delete[delete_idx]]; + r.value = (int64_t) (to_delete[delete_idx]); + while (!extension->erase(r)) { + usleep(1); + } + delete_idx++; + g_deleted_records++; + } + } +} + +int main(int argc, char **argv) { + + if (argc < 4) { + fprintf(stderr, "insert_query_tput reccnt datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .3 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/alex_rq_bench.cpp b/benchmarks/old-bench/alex_rq_bench.cpp index f75afa6..f75afa6 100644 --- a/benchmarks/alex_rq_bench.cpp +++ b/benchmarks/old-bench/alex_rq_bench.cpp diff --git a/benchmarks/alias_wss_bench.cpp b/benchmarks/old-bench/alias_wss_bench.cpp index a3a43f2..a3a43f2 100644 --- a/benchmarks/alias_wss_bench.cpp +++ b/benchmarks/old-bench/alias_wss_bench.cpp diff --git a/benchmarks/btree_irs_bench.cpp b/benchmarks/old-bench/btree_irs_bench.cpp index 862fc6b..862fc6b 100644 --- a/benchmarks/btree_irs_bench.cpp +++ b/benchmarks/old-bench/btree_irs_bench.cpp diff --git a/benchmarks/btree_rq_bench.cpp b/benchmarks/old-bench/btree_rq_bench.cpp index d92b45d..d92b45d 100644 --- a/benchmarks/btree_rq_bench.cpp +++ b/benchmarks/old-bench/btree_rq_bench.cpp diff --git a/benchmarks/include/bench.h b/benchmarks/old-bench/include/bench.h index 586ff12..586ff12 100644 --- a/benchmarks/include/bench.h +++ b/benchmarks/old-bench/include/bench.h diff --git a/benchmarks/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h index e33b93d..e33b93d 100644 --- a/benchmarks/include/bench_utility.h +++ b/benchmarks/old-bench/include/bench_utility.h diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/old-bench/include/standalone_utility.h index 9876e84..727daa5 100644 --- a/benchmarks/include/standalone_utility.h +++ b/benchmarks/old-bench/include/standalone_utility.h @@ -1,18 +1,12 @@ #include <cstdlib> #include <cstdio> -#include <chrono> -#include <algorithm> -#include <numeric> -#include <memory> #include <iostream> #include <fstream> #include <sstream> -#include <unordered_set> -#include <set> #include <string> -#include <random> #include <gsl/gsl_rng.h> #include <cstring> +#include <vector> typedef uint64_t key_type; typedef uint64_t value_type; @@ -244,19 +238,3 @@ static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, siz td: return true; } - -/* - * helper routines for displaying progress bars to stderr - */ -static const char *g_prog_bar = "======================================================================"; -static const size_t g_prog_width = 50; - -static void progress_update(double percentage, std::string prompt) { - int val = (int) (percentage * 100); - int lpad = (int) (percentage * g_prog_width); - int rpad = (int) (g_prog_width - lpad); - fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, ""); - fflush(stderr); - - if (percentage >= 1) fprintf(stderr, "\n"); -} diff --git a/benchmarks/isam_irs_bench.cpp b/benchmarks/old-bench/isam_irs_bench.cpp index 96525f0..96525f0 100644 --- a/benchmarks/isam_irs_bench.cpp +++ b/benchmarks/old-bench/isam_irs_bench.cpp diff --git a/benchmarks/isam_rq_bench.cpp b/benchmarks/old-bench/isam_rq_bench.cpp index bb5626e..bb5626e 100644 --- a/benchmarks/isam_rq_bench.cpp +++ b/benchmarks/old-bench/isam_rq_bench.cpp diff --git a/benchmarks/mtree_knn_bench.cpp b/benchmarks/old-bench/mtree_knn_bench.cpp index 9d4cc57..9d4cc57 100644 --- a/benchmarks/mtree_knn_bench.cpp +++ b/benchmarks/old-bench/mtree_knn_bench.cpp diff --git a/benchmarks/pgm_pl_bench.cpp b/benchmarks/old-bench/pgm_pl_bench.cpp index f798861..f798861 100644 --- a/benchmarks/pgm_pl_bench.cpp +++ b/benchmarks/old-bench/pgm_pl_bench.cpp diff --git a/benchmarks/pgm_rq_bench.cpp b/benchmarks/old-bench/pgm_rq_bench.cpp index e25d29f..e25d29f 100644 --- a/benchmarks/pgm_rq_bench.cpp +++ b/benchmarks/old-bench/pgm_rq_bench.cpp diff --git a/benchmarks/test.cpp b/benchmarks/old-bench/test.cpp index 75bffe3..75bffe3 100644 --- a/benchmarks/test.cpp +++ b/benchmarks/old-bench/test.cpp diff --git a/benchmarks/triespline_rq_bench.cpp b/benchmarks/old-bench/triespline_rq_bench.cpp index 967c3b0..967c3b0 100644 --- a/benchmarks/triespline_rq_bench.cpp +++ b/benchmarks/old-bench/triespline_rq_bench.cpp diff --git a/benchmarks/upgm_pl_bench.cpp b/benchmarks/old-bench/upgm_pl_bench.cpp index e0445b2..e0445b2 100644 --- a/benchmarks/upgm_pl_bench.cpp +++ b/benchmarks/old-bench/upgm_pl_bench.cpp diff --git a/benchmarks/upgm_rq_bench.cpp b/benchmarks/old-bench/upgm_rq_bench.cpp index 940a9e6..940a9e6 100644 --- a/benchmarks/upgm_rq_bench.cpp +++ b/benchmarks/old-bench/upgm_rq_bench.cpp diff --git a/benchmarks/vptree_knn_bench.cpp b/benchmarks/old-bench/vptree_knn_bench.cpp index d8247e4..d8247e4 100644 --- a/benchmarks/vptree_knn_bench.cpp +++ b/benchmarks/old-bench/vptree_knn_bench.cpp diff --git a/benchmarks/query_workload_bench.cpp b/benchmarks/query_workload_bench.cpp new file mode 100644 index 0000000..d79daf2 --- /dev/null +++ b/benchmarks/query_workload_bench.cpp @@ -0,0 +1,170 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::rc::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; + +size_t g_insert_size = 50000; +size_t g_insert_frequency = 1000; +size_t g_query_count = 5000; + +void query_thread(Ext *extension, gsl_rng *rng, size_t n, bool parallel=true) { + TIMER_INIT(); + double selectivity = .001; + size_t k = 100; + size_t range = n * selectivity; + + size_t total_result = 0; + + auto q = new de::rc::Parms<Rec>(); + + std::vector<std::future<std::vector<Rec>>> results(k); + + TIMER_START(); + for (int64_t i=0; i<k; i++) { + size_t start = gsl_rng_uniform_int(rng, n - range); + + q->lower_bound = start; + q->upper_bound = start + range; + results[i] = extension->query(q); + if (!parallel) { + auto x = results[i].get(); + total_result += x[0].key; + } + } + + if (parallel) { + for (size_t i=0; i<k; i++) { + auto x = results[i].get(); + total_result += x[0].key; + } + } + + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", extension->get_record_count(), query_lat, k); + fprintf(stderr, "Q Total: %ld\n", total_result); + delete q; +} + +void insert_thread(Ext *extension, size_t n) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + TIMER_INIT(); + size_t k=1000; + + Rec r; + for (size_t i=0; i<g_insert_size; i+=k) { + TIMER_START(); + for (size_t j=0; j<k; j++) { + r.key = gsl_rng_uniform_int(rng, n); + r.value = gsl_rng_uniform_int(rng, n); + + while (!extension->insert(r)) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_lat = TIMER_RESULT(); + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", extension->get_record_count(), insert_lat, k); + } + + gsl_rng_free(rng); +} + +void parallel_bench(Ext *extension, gsl_rng *rng, size_t n) { + TIMER_INIT(); + + TIMER_START(); + for (size_t i=0; i < g_query_count; i+=100) { + query_thread(extension, rng, n); + if (i % g_insert_frequency == 0) { + auto x = std::thread(insert_thread, extension, n); + x.detach(); + } + } + TIMER_STOP(); + + auto workload_duration = TIMER_RESULT(); + fprintf(stdout, "W\t%ld\n", workload_duration); +} + + +void serial_bench(Ext *extension, gsl_rng *rng, size_t n) { + TIMER_INIT(); + TIMER_START(); + for (size_t i=0; i < g_query_count; i+=100) { + query_thread(extension, rng, n, false); + if (i % g_insert_frequency == 0) { + auto x = std::thread(insert_thread, extension, n); + x.join(); + } + } + TIMER_STOP(); + + auto workload_duration = TIMER_RESULT(); + fprintf(stdout, "W\t%ld\n", workload_duration); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "query_workload_bench reccnt lwm hwm parallel\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t lwm = atol(argv[2]); + size_t hwm = atol(argv[3]); + bool parallel = atoi(argv[4]); + + size_t scale_factor = 8; + + auto extension = new Ext(lwm, hwm, scale_factor); + size_t per_trial = 1000; + double selectivity = .001; + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + /* build initial structure */ + size_t reccnt = 0; + Rec r; + for (size_t i=0; i<n; i++) { + r.key = gsl_rng_uniform_int(rng, n); + r.value = gsl_rng_uniform_int(rng, n); + + while (!extension->insert(r)) { + _mm_pause(); + } + } + + if (parallel) { + parallel_bench(extension, rng, n); + } else { + serial_bench(extension, rng, n); + } + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/reconstruction_interference.cpp b/benchmarks/reconstruction_interference.cpp new file mode 100644 index 0000000..57eb923 --- /dev/null +++ b/benchmarks/reconstruction_interference.cpp @@ -0,0 +1,124 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::rc::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; + +volatile std::atomic<bool> queries_done; + +void query_thread(Ext *extension, double selectivity, size_t k) { + TIMER_INIT(); + + size_t reccnt = extension->get_record_count(); + size_t range = reccnt * selectivity; + + auto q = new de::rc::Parms<Rec>(); + + TIMER_START(); + for (int64_t i=0; i<k; i++) { + size_t start = rand() % (reccnt - range); + q->lower_bound = start; + q->upper_bound = start + range; + auto res = extension->query(q); + auto r = res.get(); + } + TIMER_STOP(); + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\n", reccnt, query_lat, k); + delete q; +} + +Ext *build_structure(size_t n) { + auto extension = new Ext(1000, 10000, 2); + + size_t i=0; + Rec r; + do { + r.key = rand() % n; + r.value = i; + if (extension->insert(r)) { + i++; + } else { + _mm_pause(); + } + } while (i < n); + + extension->await_next_epoch(); + return extension; +} + +void query_benchmark(double selectivity, size_t k, Ext *extension, size_t query_thrd_cnt) { + TIMER_INIT(); + + std::vector<std::thread> thrds(query_thrd_cnt); + + TIMER_START(); + for (size_t i=0; i<query_thrd_cnt; i++) { + thrds[i] = std::thread(query_thread, extension, selectivity, k); + } + + for (size_t i=0; i<query_thrd_cnt; i++) { + thrds[i].join(); + } + TIMER_STOP(); + + auto query_lat = TIMER_RESULT(); + fprintf(stdout, "Q\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(), query_lat, k, query_thrd_cnt); + + queries_done.store(true); +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + size_t n = 10000000; + + size_t per_trial = 1000; + double selectivity = .001; + + /* build initial structure */ + auto extension = build_structure(n); + + std::vector<size_t> thread_counts = {8, 16, 32, 64, 128}; + + for (auto &threads : thread_counts) { + /* benchmark queries w/o any interference from reconstructions */ + query_benchmark(selectivity, per_trial, extension, threads); + + fprintf(stderr, "Running interference test...\n"); + + queries_done.store(false); + /* trigger a worst-case reconstruction and benchmark the queries */ + + std::thread q_thrd(query_benchmark, selectivity, per_trial, extension, threads); + + while (!queries_done.load()) { + auto s = extension->create_static_structure(); + delete s; + } + + fprintf(stderr, "Construction complete\n"); + q_thrd.join(); + } + + extension->print_scheduler_statistics(); + delete extension; + + fflush(stderr); +} + diff --git a/benchmarks/static_dynamic_comp.cpp b/benchmarks/static_dynamic_comp.cpp new file mode 100644 index 0000000..5a89d88 --- /dev/null +++ b/benchmarks/static_dynamic_comp.cpp @@ -0,0 +1,117 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "query/rangecount.h" +#include "shard/TrieSpline.h" +#include "shard/ISAMTree.h" + + +#include "framework/interface/Record.h" +#include "framework/interface/Query.h" +#include "include/data-proc.h" + +#include "psu-util/timer.h" + + +typedef de::Record<key_type, value_type> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::TrieSpline<Rec> TS; + +typedef de::rc::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; + +typedef de::MutableBuffer<Rec> Buffer; + +typedef de::rc::Parms<Rec> query; + +Buffer *file_to_mbuffer(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto buff = new Buffer(n, n+1); + + Rec rec; + while (next_record(file, rec) && buff->get_record_count() < n) { + buff->append(rec); + } + + return buff; +} + +BenchBTree *file_to_btree(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto btree = new BenchBTree(); + Rec rec; + while (next_record(file, rec) && btree->size() < n) { + btree->insert({rec.key, rec.value}); + } + + return btree; +} + +template<de::ShardInterface S> +void benchmark_shard(S *shard, std::vector<query> &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + auto state = de::rc::Query<S, Rec>::get_query_state(shard, &q); + auto res = de::rc::Query<S, Rec>::query(shard, state, &q); + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + fprintf(stdout, "%ld %ld\n", latency, shard->get_memory_usage() - shard->get_record_count() * sizeof(de::Wrapped<Rec>)); +} + +void benchmark_btree(BenchBTree *btree, std::vector<query> &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + size_t c = 0; + auto ptr = btree->find(q.lower_bound); + while(ptr != btree->end() && ptr->key <= q.upper_bound) { + c++; + } + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + auto mem = btree->get_stats().inner_nodes * psudb::btree_default_traits<key_type, btree_record>::inner_slots * (sizeof(key_type) + sizeof(void*)); + fprintf(stdout, "%ld %ld\n", latency, mem); +} + +int main(int argc, char **argv) { + if (argc < 4) { + fprintf(stderr, "Usage: static_dynamic_comp <filename> <record_count> <query_file>\n"); + exit(EXIT_FAILURE); + } + + std::string d_fname = std::string(argv[1]); + size_t reccnt = atol(argv[2]); + std::string q_fname = std::string(argv[3]); + + init_bench_env(reccnt, true, false); + auto queries = read_range_queries<query>(q_fname, .001); + + auto buff = file_to_mbuffer(d_fname, reccnt); + + TS *ts = new TS(buff->get_buffer_view()); + benchmark_shard<TS>(ts, queries); + delete ts; + + ISAM *isam = new ISAM(buff->get_buffer_view()); + benchmark_shard<ISAM>(isam, queries); + delete isam; + + auto btree = file_to_btree(d_fname, reccnt); + +} + diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp new file mode 100644 index 0000000..caba8ff --- /dev/null +++ b/benchmarks/watermark_testing.cpp @@ -0,0 +1,55 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::rq::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; + + + +int main(int argc, char **argv) { + std::vector hwms = {5000l, 10000l, 20000l, 50000l}; + std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; + + size_t n = 1000000; + + TIMER_INIT(); + + for (auto &hwm : hwms) { + for (size_t i=0; i<lwms.size(); i++) { + size_t lwm = hwm * lwms[i]; + + auto extension = new Ext(lwm, hwm, 8); + TIMER_START(); + for (int64_t i=0; i<n; i++) { + Rec r = {i, i}; + while (!extension->insert(r)) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_time = TIMER_RESULT(); + double insert_throughput = (double) n / (double) insert_time * 1e9; + + fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + + extension->print_scheduler_statistics(); + + delete extension; + } + } +} + diff --git a/external/ctpl b/external/ctpl new file mode 160000 +Subproject 437e135dbd94eb65b45533d9ce8ee28b5bd37b6 diff --git a/external/psudb-common b/external/psudb-common -Subproject e5a10e888d248638e48bf82da70fa356eef47ba +Subproject fc23de575c11f3881358c8997b9949bb096d2ad diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h index 524024b..7ea5370 100644 --- a/include/framework/DynamicExtension.h +++ b/include/framework/DynamicExtension.h @@ -1,302 +1,639 @@ /* * include/framework/DynamicExtension.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #pragma once #include <atomic> -#include <numeric> #include <cstdio> #include <vector> -#include "framework/MutableBuffer.h" -#include "framework/InternalLevel.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" -#include "framework/RecordInterface.h" +#include "framework/interface/Scheduler.h" +#include "framework/scheduling/FIFOScheduler.h" +#include "framework/scheduling/SerialScheduler.h" -#include "shard/WIRS.h" -#include "psu-util/timer.h" -#include "psu-ds/Alias.h" +#include "framework/structure/MutableBuffer.h" +#include "framework/interface/Record.h" +#include "framework/structure/ExtensionStructure.h" -namespace de { - -thread_local size_t sampling_attempts = 0; -thread_local size_t sampling_rejections = 0; -thread_local size_t deletion_rejections = 0; -thread_local size_t bounds_rejections = 0; -thread_local size_t tombstone_rejections = 0; -thread_local size_t buffer_rejections = 0; - -/* - * thread_local size_t various_sampling_times go here. - */ -thread_local size_t sample_range_time = 0; -thread_local size_t alias_time = 0; -thread_local size_t alias_query_time = 0; -thread_local size_t rejection_check_time = 0; -thread_local size_t buffer_sample_time = 0; -thread_local size_t memlevel_sample_time = 0; -thread_local size_t disklevel_sample_time = 0; -thread_local size_t sampling_bailouts = 0; - - -enum class LayoutPolicy { - LEVELING, - TEIRING -}; - -enum class DeletePolicy { - TOMBSTONE, - TAGGING -}; +#include "framework/util/Configuration.h" +#include "framework/scheduling/Epoch.h" -typedef ssize_t level_index; +namespace de { -template <RecordInterface R, ShardInterface S, QueryInterface Q, LayoutPolicy L=LayoutPolicy::TEIRING, DeletePolicy D=DeletePolicy::TAGGING> +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING, + DeletePolicy D=DeletePolicy::TAGGING, SchedulerInterface SCHED=FIFOScheduler> class DynamicExtension { - //typedef typename S<R> Shard; typedef S Shard; typedef MutableBuffer<R> Buffer; + typedef ExtensionStructure<R, S, Q, L> Structure; + typedef Epoch<R, S, Q, L> _Epoch; + typedef BufferView<R> BufView; + + static constexpr size_t QUERY = 1; + static constexpr size_t RECONSTRUCTION = 2; + + struct epoch_ptr { + _Epoch *epoch; + size_t refcnt; + }; public: - DynamicExtension(size_t buffer_cap, size_t scale_factor, double max_delete_prop) - : m_scale_factor(scale_factor), m_max_delete_prop(max_delete_prop), - m_buffer(new Buffer(buffer_cap, buffer_cap * max_delete_prop)) - { } + DynamicExtension(size_t buffer_lwm, size_t buffer_hwm, size_t scale_factor, size_t memory_budget=0, + size_t thread_cnt=16) + : m_scale_factor(scale_factor) + , m_max_delete_prop(1) + , m_sched(memory_budget, thread_cnt) + , m_buffer(new Buffer(buffer_lwm, buffer_hwm)) + , m_core_cnt(thread_cnt) + , m_next_core(0) + , m_epoch_cnt(0) + { + auto vers = new Structure(buffer_hwm, m_scale_factor, m_max_delete_prop); + m_current_epoch.store({new _Epoch(0, vers, m_buffer, 0), 0}); + m_previous_epoch.store({nullptr, 0}); + m_next_epoch.store({nullptr, 0}); + } ~DynamicExtension() { - delete m_buffer; - for (size_t i=0; i<m_levels.size(); i++) { - delete m_levels[i]; - } + /* let any in-flight epoch transition finish */ + await_next_epoch(); + + /* shutdown the scheduler */ + m_sched.shutdown(); + + /* delete all held resources */ + delete m_next_epoch.load().epoch; + delete m_current_epoch.load().epoch; + delete m_previous_epoch.load().epoch; + + delete m_buffer; } + /* + * Insert the record `rec` into the index. If the buffer is full and + * the framework is blocking on an epoch transition, this call may fail + * and return 0. In this case, retry the call again later. If + * successful, 1 will be returned. The record will be immediately + * visible in the buffer upon the successful return of this function. + */ int insert(const R &rec) { return internal_append(rec, false); } + /* + * Erase the record `rec` from the index. It is assumed that `rec` + * currently exists--no special checks are made for correctness here. + * The behavior if this function will differ depending on if tombstone + * or tagged deletes are used. + * + * Tombstone deletes - inserts a tombstone record for `rec`. This *may* + * return 0 and fail if the buffer is full and the framework is + * blocking on an epoch transition. In this case, repeat the call + * later. 1 will be returned when the tombstone is successfully + * inserted. + * + * Tagging deletes - Does a point lookup for the record across the + * entire structure, and sets its delete bit when found. Returns 1 if + * the record is found and marked, and 0 if it was not (i.e., if it + * isn't present in the index). + */ int erase(const R &rec) { - Buffer *buffer; - + // FIXME: delete tagging will require a lot of extra work to get + // operating "correctly" in a concurrent environment. + + /* + * Get a view on the buffer *first*. This will ensure a stronger + * ordering than simply accessing the buffer directly, but is + * not *strictly* necessary. + */ if constexpr (D == DeletePolicy::TAGGING) { - auto buffer = get_buffer(); + static_assert(std::same_as<SCHED, SerialScheduler>, "Tagging is only supported in single-threaded operation"); - // Check the levels first. This assumes there aren't - // any undeleted duplicate records. - for (auto level : m_levels) { - if (level && level->delete_record(rec)) { - return 1; - } + auto view = m_buffer->get_buffer_view(); + + auto epoch = get_active_epoch(); + if (epoch->get_structure()->tagged_delete(rec)) { + end_job(epoch); + return 1; } - // the buffer will take the longest amount of time, and - // probably has the lowest probability of having the record, - // so we'll check it last. - return buffer->delete_record(rec); + end_job(epoch); + + /* + * the buffer will take the longest amount of time, and + * probably has the lowest probability of having the record, + * so we'll check it last. + */ + return view.delete_record(rec); } + /* + * If tagging isn't used, then delete using a tombstone + */ return internal_append(rec, true); } - std::vector<R> query(void *parms) { - auto buffer = get_buffer(); + /* + * Execute the query with parameters `parms` and return a future. This + * future can be used to access a vector containing the results of the + * query. + * + * The behavior of this function is undefined if `parms` is not a + * pointer to a valid query parameter object for the query type used as + * a template parameter to construct the framework. + */ + std::future<std::vector<R>> query(void *parms) { + return schedule_query(parms); + } + + /* + * Returns the number of records (included tagged records and + * tombstones) currently within the framework. + */ + size_t get_record_count() { + auto epoch = get_active_epoch(); + auto t = epoch->get_buffer().get_record_count() + epoch->get_structure()->get_record_count(); + end_job(epoch); + + return t; + } + + /* + * Returns the number of tombstone records currently within the + * framework. This function can be called when tagged deletes are used, + * but will always return 0 in that case. + */ + size_t get_tombstone_count() { + auto epoch = get_active_epoch(); + auto t = epoch->get_buffer().get_tombstone_count() + epoch->get_structure()->get_tombstone_count(); + end_job(epoch); - // Get the buffer query state - auto buffer_state = Q::get_buffer_query_state(buffer, parms); + return t; + } - // Get the shard query states - std::vector<std::pair<ShardID, Shard*>> shards; - std::vector<void*> states; + /* + * Get the number of levels within the framework. This count will + * include any empty levels, but will not include the buffer. Note that + * this is *not* the same as the number of shards when tiering is used, + * as each level can contain multiple shards in that case. + */ + size_t get_height() { + auto epoch = get_active_epoch(); + auto t = epoch->get_structure()->get_height(); + end_job(epoch); - for (auto &level : m_levels) { - level->get_query_states(shards, states, parms); - } + return t; + } - Q::process_query_states(parms, states, buffer_state); + /* + * Get the number of bytes of memory allocated across the framework for + * storing records and associated index information (i.e., internal + * ISAM tree nodes). This includes memory that is allocated but + * currently unused in the buffer, or in shards themselves + * (overallocation due to delete cancellation, etc.). + */ + size_t get_memory_usage() { + auto epoch = get_active_epoch(); + auto t= epoch->get_buffer().get_memory_usage() + epoch->get_structure()->get_memory_usage(); + end_job(epoch); - std::vector<std::vector<Wrapped<R>>> query_results(shards.size() + 1); + return t; + } - // Execute the query for the buffer - auto buffer_results = Q::buffer_query(buffer, buffer_state, parms); - query_results[0] = std::move(filter_deletes(buffer_results, {-1, -1}, buffer)); - if constexpr (Q::EARLY_ABORT) { - if (query_results[0].size() > 0) { - auto result = Q::merge(query_results, parms); - for (size_t i=0; i<states.size(); i++) { - Q::delete_query_state(states[i]); - } + /* + * Get the number of bytes of memory allocated across the framework for + * auxiliary structures. This can include bloom filters, aux + * hashtables, etc. + */ + size_t get_aux_memory_usage() { + auto epoch = get_active_epoch(); + auto t = epoch->get_buffer().get_aux_memory_usage() + epoch->get_structure()->get_aux_memory_usage(); + end_job(epoch); - Q::delete_buffer_query_state(buffer_state); - return result; - } + return t; + } + + /* + * Returns the maximum physical capacity of the buffer, measured in + * records. + */ + size_t get_buffer_capacity() { + return m_buffer->get_capacity(); + } + + /* + * Create a new single Shard object containing all of the records + * within the framework (buffer and shards). The optional parameter can + * be used to specify whether the Shard should be constructed with the + * currently active state of the framework (false), or if shard + * construction should wait until any ongoing reconstructions have + * finished and use that new version (true). + */ + Shard *create_static_structure(bool await_reconstruction_completion=false) { + if (await_reconstruction_completion) { + await_next_epoch(); } - // Execute the query for each shard - for (size_t i=0; i<shards.size(); i++) { - auto shard_results = Q::query(shards[i].second, states[i], parms); - query_results[i+1] = std::move(filter_deletes(shard_results, shards[i].first, buffer)); - if constexpr (Q::EARLY_ABORT) { - if (query_results[i].size() > 0) { - auto result = Q::merge(query_results, parms); - for (size_t i=0; i<states.size(); i++) { - Q::delete_query_state(states[i]); - } + auto epoch = get_active_epoch(); + auto vers = epoch->get_structure(); + std::vector<Shard *> shards; - Q::delete_buffer_query_state(buffer_state); - return result; + if (vers->get_levels().size() > 0) { + for (int i=vers->get_levels().size() - 1; i>= 0; i--) { + if (vers->get_levels()[i] && vers->get_levels()[i]->get_record_count() > 0) { + shards.emplace_back(vers->get_levels()[i]->get_combined_shard()); } } } - - // Merge the results together - auto result = Q::merge(query_results, parms); - for (size_t i=0; i<states.size(); i++) { - Q::delete_query_state(states[i]); + /* + * construct a shard from the buffer view. We'll hold the view + * for as short a time as possible: once the records are exfiltrated + * from the buffer, there's no reason to retain a hold on the view's + * head pointer any longer + */ + { + auto bv = epoch->get_buffer(); + if (bv.get_record_count() > 0) { + shards.emplace_back(new S(std::move(bv))); + } } - Q::delete_buffer_query_state(buffer_state); + Shard *flattened = new S(shards); - return result; - } + for (auto shard : shards) { + delete shard; + } - size_t get_record_count() { - size_t cnt = get_buffer()->get_record_count(); + end_job(epoch); + return flattened; + } - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_record_count(); + /* + * If the current epoch is *not* the newest one, then wait for + * the newest one to become available. Otherwise, returns immediately. + */ + void await_next_epoch() { + while (m_next_epoch.load().epoch != nullptr) { + std::unique_lock<std::mutex> lk(m_epoch_cv_lk); + m_epoch_cv.wait(lk); } + } - return cnt; + /* + * Mostly exposed for unit-testing purposes. Verifies that the current + * active version of the ExtensionStructure doesn't violate the maximum + * tombstone proportion invariant. + */ + bool validate_tombstone_proportion() { + auto epoch = get_active_epoch(); + auto t = epoch->get_structure()->validate_tombstone_proportion(); + end_job(epoch); + return t; } - size_t get_tombstone_cnt() { - size_t cnt = get_buffer()->get_tombstone_count(); - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_tombstone_count(); - } + void print_scheduler_statistics() { + m_sched.print_statistics(); + } + +private: + SCHED m_sched; - return cnt; + Buffer *m_buffer; + + //std::mutex m_struct_lock; + //std::set<Structure *> m_versions; + + alignas(64) std::atomic<bool> m_reconstruction_scheduled; + + std::atomic<epoch_ptr> m_next_epoch; + std::atomic<epoch_ptr> m_current_epoch; + std::atomic<epoch_ptr> m_previous_epoch; + + std::condition_variable m_epoch_cv; + std::mutex m_epoch_cv_lk; + + std::atomic<size_t> m_epoch_cnt; + + size_t m_scale_factor; + double m_max_delete_prop; + + std::atomic<int> m_next_core; + size_t m_core_cnt; + + void enforce_delete_invariant(_Epoch *epoch) { + auto structure = epoch->get_structure(); + auto compactions = structure->get_compaction_tasks(); + + while (compactions.size() > 0) { + + /* schedule a compaction */ + ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>(); + args->epoch = epoch; + args->merges = compactions; + args->extension = this; + args->compaction = true; + /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */ + + auto wait = args->result.get_future(); + + m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); + + /* wait for compaction completion */ + wait.get(); + + /* get a new batch of compactions to perform, if needed */ + compactions = structure->get_compaction_tasks(); + } } - size_t get_height() { - return m_levels.size(); + _Epoch *get_active_epoch() { + epoch_ptr old, new_ptr; + + do { + /* + * during an epoch transition, a nullptr will installed in the + * current_epoch. At this moment, the "new" current epoch will + * soon be installed, but the "current" current epoch has been + * moved back to m_previous_epoch. + */ + if (m_current_epoch.load().epoch == nullptr) { + old = m_previous_epoch; + new_ptr = {old.epoch, old.refcnt+1}; + if (old.epoch != nullptr && m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + } else { + old = m_current_epoch; + new_ptr = {old.epoch, old.refcnt+1}; + if (old.epoch != nullptr && m_current_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + } + } while (true); + + assert(new_ptr.refcnt > 0); + + return new_ptr.epoch; } - size_t get_memory_usage() { - size_t cnt = m_buffer->get_memory_usage(); + void advance_epoch(size_t buffer_head) { + + retire_epoch(m_previous_epoch.load().epoch); + + epoch_ptr tmp = {nullptr, 0}; + epoch_ptr cur; + do { + cur = m_current_epoch; + } while(!m_current_epoch.compare_exchange_strong(cur, tmp)); + + m_previous_epoch.store(cur); - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) cnt += m_levels[i]->get_memory_usage(); + // FIXME: this may currently block because there isn't any + // query preemption yet. At this point, we'd need to either + // 1) wait for all queries on the old_head to finish + // 2) kill all queries on the old_head + // 3) somehow migrate all queries on the old_head to the new + // version + while (!m_next_epoch.load().epoch->advance_buffer_head(buffer_head)) { + _mm_pause(); } - return cnt; + + m_current_epoch.store(m_next_epoch); + m_next_epoch.store({nullptr, 0}); + + + /* notify any blocking threads that the new epoch is available */ + m_epoch_cv_lk.lock(); + m_epoch_cv.notify_all(); + m_epoch_cv_lk.unlock(); } - size_t get_aux_memory_usage() { - size_t cnt = m_buffer->get_aux_memory_usage(); + /* + * Creates a new epoch by copying the currently active one. The new epoch's + * structure will be a shallow copy of the old one's. + */ + _Epoch *create_new_epoch() { + /* + * This epoch access is _not_ protected under the assumption that + * only one reconstruction will be able to trigger at a time. If that condition + * is violated, it is possible that this code will clone a retired + * epoch. + */ + assert(m_next_epoch.load().epoch == nullptr); + auto current_epoch = get_active_epoch(); + + m_epoch_cnt.fetch_add(1); + m_next_epoch.store({current_epoch->clone(m_epoch_cnt.load()), 0}); + + end_job(current_epoch); + + return m_next_epoch.load().epoch; + } - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) { - cnt += m_levels[i]->get_aux_memory_usage(); - } + void retire_epoch(_Epoch *epoch) { + /* + * Epochs with currently active jobs cannot + * be retired. By the time retire_epoch is called, + * it is assumed that a new epoch is active, meaning + * that the epoch to be retired should no longer + * accumulate new active jobs. Eventually, this + * number will hit zero and the function will + * proceed. + */ + + if (epoch == nullptr) { + return; } - return cnt; - } + epoch_ptr old, new_ptr; + new_ptr = {nullptr, 0}; + do { + old = m_previous_epoch.load(); + + /* + * If running in single threaded mode, the failure to retire + * an Epoch will result in the thread of execution blocking + * indefinitely. + */ + if constexpr (std::same_as<SCHED, SerialScheduler>) { + if (old.epoch == epoch) assert(old.refcnt == 0); + } - bool validate_tombstone_proportion() { - long double ts_prop; - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) { - ts_prop = (long double) m_levels[i]->get_tombstone_count() / (long double) calc_level_record_capacity(i); - if (ts_prop > (long double) m_max_delete_prop) { - return false; - } + if (old.epoch == epoch && old.refcnt == 0 && + m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; } - } + usleep(1); + + } while(true); - return true; + delete epoch; } - size_t get_buffer_capacity() { - return m_buffer->get_capacity(); + static void reconstruction(void *arguments) { + auto args = (ReconstructionArgs<R, S, Q, L> *) arguments; + + ((DynamicExtension *) args->extension)->SetThreadAffinity(); + Structure *vers = args->epoch->get_structure(); + + for (ssize_t i=0; i<args->merges.size(); i++) { + vers->reconstruction(args->merges[i].second, args->merges[i].first); + } + + /* + * we'll grab the buffer AFTER doing the internal reconstruction, so we + * can flush as many records as possible in one go. The reconstruction + * was done so as to make room for the full buffer anyway, so there's + * no real benefit to doing this first. + */ + auto buffer_view = args->epoch->get_buffer(); + size_t new_head = buffer_view.get_tail(); + + /* + * if performing a compaction, don't flush the buffer, as + * there is no guarantee that any necessary reconstructions + * will free sufficient space in L0 to support a flush + */ + if (!args->compaction) { + vers->flush_buffer(std::move(buffer_view)); + } + + args->result.set_value(true); + + /* + * Compactions occur on an epoch _before_ it becomes active, + * and as a result the active epoch should _not_ be advanced as + * part of a compaction + */ + if (!args->compaction) { + ((DynamicExtension *) args->extension)->advance_epoch(new_head); + } + + ((DynamicExtension *) args->extension)->m_reconstruction_scheduled.store(false); + + delete args; } - Shard *create_static_structure() { - std::vector<Shard *> shards; + static void async_query(void *arguments) { + QueryArgs<R, S, Q, L> *args = (QueryArgs<R, S, Q, L> *) arguments; - if (m_levels.size() > 0) { - for (int i=m_levels.size() - 1; i>= 0; i--) { - if (m_levels[i]) { - shards.emplace_back(m_levels[i]->get_merged_shard()); - } - } - } + auto epoch = ((DynamicExtension *) args->extension)->get_active_epoch(); + + auto ptr1 = ((DynamicExtension *) args->extension)->m_previous_epoch.load().epoch; + auto ptr2 = ((DynamicExtension *) args->extension)->m_current_epoch.load().epoch; + auto ptr3 = ((DynamicExtension *) args->extension)->m_next_epoch.load().epoch; + + + auto buffer = epoch->get_buffer(); + auto vers = epoch->get_structure(); + void *parms = args->query_parms; + + /* Get the buffer query states */ + void *buffer_state = Q::get_buffer_query_state(&buffer, parms); + + /* Get the shard query states */ + std::vector<std::pair<ShardID, Shard*>> shards; + std::vector<void *> states = vers->get_query_states(shards, parms); + + Q::process_query_states(parms, states, buffer_state); - shards.emplace_back(new S(get_buffer())); + std::vector<std::vector<Wrapped<R>>> query_results(shards.size() + 1); + for (size_t i=0; i<query_results.size(); i++) { + std::vector<Wrapped<R>> local_results; + ShardID shid; + + if (i == 0) { /* process the buffer first */ + local_results = Q::buffer_query(buffer_state, parms); + shid = INVALID_SHID; + } else { + local_results = Q::query(shards[i - 1].second, states[i - 1], parms); + shid = shards[i - 1].first; + } - Shard *shards_array[shards.size()]; + query_results[i] = std::move(filter_deletes(local_results, shid, vers, &buffer)); - size_t j = 0; - for (size_t i=0; i<shards.size(); i++) { - if (shards[i]) { - shards_array[j++] = shards[i]; + if constexpr (Q::EARLY_ABORT) { + if (query_results[i].size() > 0) break; } } - Shard *flattened = new S(shards_array, j); + auto result = Q::merge(query_results, parms); + args->result_set.set_value(std::move(result)); - for (auto shard : shards) { - delete shard; + ((DynamicExtension *) args->extension)->end_job(epoch); + + Q::delete_buffer_query_state(buffer_state); + for (size_t i=0; i<states.size(); i++) { + Q::delete_query_state(states[i]); } - return flattened; + delete args; } -private: - Buffer *m_buffer; + void schedule_reconstruction() { + auto epoch = create_new_epoch(); + /* + * the reconstruction process calls end_job(), + * so we must start one before calling it + */ + + ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>(); + args->epoch = epoch; + args->merges = epoch->get_structure()->get_reconstruction_tasks(m_buffer->get_high_watermark()); + args->extension = this; + args->compaction = false; + /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */ + + m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION); + } - size_t m_scale_factor; - double m_max_delete_prop; + std::future<std::vector<R>> schedule_query(void *query_parms) { + QueryArgs<R, S, Q, L> *args = new QueryArgs<R, S, Q, L>(); + args->extension = this; + args->query_parms = query_parms; + auto result = args->result_set.get_future(); - std::vector<InternalLevel<R, S, Q> *> m_levels; + m_sched.schedule_job(async_query, 0, args, QUERY); - Buffer *get_buffer() { - return m_buffer; + return result; } int internal_append(const R &rec, bool ts) { - Buffer *buffer; - while (!(buffer = get_buffer())) - ; - - if (buffer->is_full()) { - merge_buffer(); + if (m_buffer->is_at_low_watermark()) { + auto old = false; + + if (m_reconstruction_scheduled.compare_exchange_strong(old, true)) { + schedule_reconstruction(); + } } - return buffer->append(rec, ts); + /* this will fail if the HWM is reached and return 0 */ + return m_buffer->append(rec, ts); } - std::vector<Wrapped<R>> filter_deletes(std::vector<Wrapped<R>> &records, ShardID shid, Buffer *buffer) { - if constexpr (!Q::SKIP_DELETE_FILTER) { + static std::vector<Wrapped<R>> filter_deletes(std::vector<Wrapped<R>> &records, ShardID shid, Structure *vers, BufView *bview) { + if constexpr (Q::SKIP_DELETE_FILTER) { return records; } std::vector<Wrapped<R>> processed_records; processed_records.reserve(records.size()); - // For delete tagging, we just need to check the delete bit on each - // record. + /* + * For delete tagging, we just need to check the delete bit + * on each record. + */ if constexpr (D == DeletePolicy::TAGGING) { for (auto &rec : records) { if (rec.is_deleted()) { @@ -309,25 +646,35 @@ private: return processed_records; } - // For tombstone deletes, we need to search for the corresponding - // tombstone for each record. + /* + * For tombstone deletes, we need to search for the corresponding + * tombstone for each record. + */ for (auto &rec : records) { if (rec.is_tombstone()) { continue; } - if (buffer->check_tombstone(rec.rec)) { - continue; + // FIXME: need to figure out how best to re-enable the buffer tombstone + // check in the correct manner. + //if (buffview.check_tombstone(rec.rec)) { + //continue; + //} + + for (size_t i=0; i<bview->get_record_count(); i++) { + if (bview->get(i)->is_tombstone() && bview->get(i)->rec == rec.rec) { + continue; + } } if (shid != INVALID_SHID) { for (size_t lvl=0; lvl<=shid.level_idx; lvl++) { - if (m_levels[lvl]->check_tombstone(0, rec.rec)) { + if (vers->get_levels()[lvl]->check_tombstone(0, rec.rec)) { continue; } } - if (m_levels[shid.level_idx]->check_tombstone(shid.shard_idx + 1, rec.rec)) { + if (vers->get_levels()[shid.level_idx]->check_tombstone(shid.shard_idx + 1, rec.rec)) { continue; } } @@ -338,197 +685,70 @@ private: return processed_records; } - /* - * Add a new level to the LSM Tree and return that level's index. Will - * automatically determine whether the level should be on memory or on disk, - * and act appropriately. - */ - inline level_index grow() { - level_index new_idx; - - size_t new_shard_cnt = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - new_idx = m_levels.size(); - if (new_idx > 0) { - assert(m_levels[new_idx - 1]->get_shard(0)->get_tombstone_count() == 0); - } - m_levels.emplace_back(new InternalLevel<R, Shard, Q>(new_idx, new_shard_cnt)); - - return new_idx; - } - - - // Merge the memory table down into the tree, completing any required other - // merges to make room for it. - inline void merge_buffer() { - auto buffer = get_buffer(); - - if (!can_merge_with(0, buffer->get_record_count())) { - merge_down(0); + void SetThreadAffinity() { + int core = m_next_core.fetch_add(1) % m_core_cnt; + cpu_set_t mask; + CPU_ZERO(&mask); + + switch (core % 2) { + case 0: + // 0 |-> 0 + // 2 |-> 2 + // 4 |-> 4 + core = core; + break; + case 1: + // 1 |-> 28 + // 3 |-> 30 + // 5 |-> 32 + core = (core - 1) + m_core_cnt; + break; } - - merge_buffer_into_l0(buffer); - enforce_delete_maximum(0); - - buffer->truncate(); - return; + CPU_SET(core, &mask); + ::sched_setaffinity(0, sizeof(mask), &mask); } - /* - * Merge the specified level down into the tree. The level index must be - * non-negative (i.e., this function cannot be used to merge the buffer). This - * routine will recursively perform any necessary merges to make room for the - * specified level. - */ - inline void merge_down(level_index idx) { - level_index merge_base_level = find_mergable_level(idx); - if (merge_base_level == -1) { - merge_base_level = grow(); - } - for (level_index i=merge_base_level; i>idx; i--) { - merge_levels(i, i-1); - enforce_delete_maximum(i); - } + void end_job(_Epoch *epoch) { + epoch_ptr old, new_ptr; - return; - } + do { + if (m_previous_epoch.load().epoch == epoch) { + old = m_previous_epoch; + /* + * This could happen if we get into the system during a + * transition. In this case, we can just back out and retry + */ + if (old.epoch == nullptr) { + continue; + } - /* - * Find the first level below the level indicated by idx that - * is capable of sustaining a merge operation and return its - * level index. If no such level exists, returns -1. Also - * returns -1 if idx==0, and no such level exists, to simplify - * the logic of the first merge. - */ - inline level_index find_mergable_level(level_index idx, Buffer *buffer=nullptr) { + assert(old.refcnt > 0); - if (idx == 0 && m_levels.size() == 0) return -1; + new_ptr = {old.epoch, old.refcnt - 1}; + if (m_previous_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } + } else { + old = m_current_epoch; + /* + * This could happen if we get into the system during a + * transition. In this case, we can just back out and retry + */ + if (old.epoch == nullptr) { + continue; + } - bool level_found = false; - bool disk_level; - level_index merge_level_idx; + assert(old.refcnt > 0); - size_t incoming_rec_cnt = get_level_record_count(idx, buffer); - for (level_index i=idx+1; i<m_levels.size(); i++) { - if (can_merge_with(i, incoming_rec_cnt)) { - return i; + new_ptr = {old.epoch, old.refcnt - 1}; + if (m_current_epoch.compare_exchange_strong(old, new_ptr)) { + break; + } } - - incoming_rec_cnt = get_level_record_count(i); - } - - return -1; - } - - /* - * Merge the level specified by incoming level into the level specified - * by base level. The two levels should be sequential--i.e. no levels - * are skipped in the merge process--otherwise the tombstone ordering - * invariant may be violated by the merge operation. - */ - inline void merge_levels(level_index base_level, level_index incoming_level) { - // merging two memory levels - if constexpr (L == LayoutPolicy::LEVELING) { - auto tmp = m_levels[base_level]; - m_levels[base_level] = InternalLevel<R, Shard, Q>::merge_levels(m_levels[base_level], m_levels[incoming_level]); - mark_as_unused(tmp); - } else { - m_levels[base_level]->append_merged_shards(m_levels[incoming_level]); - } - - mark_as_unused(m_levels[incoming_level]); - m_levels[incoming_level] = new InternalLevel<R, Shard, Q>(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor); - } - - - inline void merge_buffer_into_l0(Buffer *buffer) { - assert(m_levels[0]); - if constexpr (L == LayoutPolicy::LEVELING) { - // FIXME: Kludgey implementation due to interface constraints. - auto old_level = m_levels[0]; - auto temp_level = new InternalLevel<R, Shard, Q>(0, 1); - temp_level->append_buffer(buffer); - auto new_level = InternalLevel<R, Shard, Q>::merge_levels(old_level, temp_level); - - m_levels[0] = new_level; - delete temp_level; - mark_as_unused(old_level); - } else { - m_levels[0]->append_buffer(buffer); - } - } - - /* - * Mark a given memory level as no-longer in use by the tree. For now this - * will just free the level. In future, this will be more complex as the - * level may not be able to immediately be deleted, depending upon who - * else is using it. - */ - inline void mark_as_unused(InternalLevel<R, Shard, Q> *level) { - delete level; - } - - /* - * Check the tombstone proportion for the specified level and - * if the limit is exceeded, forcibly merge levels until all - * levels below idx are below the limit. - */ - inline void enforce_delete_maximum(level_index idx) { - long double ts_prop = (long double) m_levels[idx]->get_tombstone_count() / (long double) calc_level_record_capacity(idx); - - if (ts_prop > (long double) m_max_delete_prop) { - merge_down(idx); - } - - return; - } - - /* - * Assume that level "0" should be larger than the buffer. The buffer - * itself is index -1, which should return simply the buffer capacity. - */ - inline size_t calc_level_record_capacity(level_index idx) { - return get_buffer()->get_capacity() * pow(m_scale_factor, idx+1); + } while (true); } - /* - * Returns the actual number of records present on a specified level. An - * index value of -1 indicates the memory table. Can optionally pass in - * a pointer to the memory table to use, if desired. Otherwise, there are - * no guarantees about which buffer will be accessed if level_index is -1. - */ - inline size_t get_level_record_count(level_index idx, Buffer *buffer=nullptr) { - - assert(idx >= -1); - if (idx == -1) { - return (buffer) ? buffer->get_record_count() : get_buffer()->get_record_count(); - } - - return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0; - } - - /* - * Determines if the specific level can merge with another record containing - * incoming_rec_cnt number of records. The provided level index should be - * non-negative (i.e., not refer to the buffer) and will be automatically - * translated into the appropriate index into either the disk or memory level - * vector. - */ - inline bool can_merge_with(level_index idx, size_t incoming_rec_cnt) { - if (idx>= m_levels.size() || !m_levels[idx]) { - return false; - } - - if (L == LayoutPolicy::LEVELING) { - return m_levels[idx]->get_record_count() + incoming_rec_cnt <= calc_level_record_capacity(idx); - } else { - return m_levels[idx]->get_shard_count() < m_scale_factor; - } - - // unreachable - assert(true); - } }; - } diff --git a/include/framework/InternalLevel.h b/include/framework/InternalLevel.h deleted file mode 100644 index ec8ffc4..0000000 --- a/include/framework/InternalLevel.h +++ /dev/null @@ -1,213 +0,0 @@ -/* - * include/framework/InternalLevel.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <vector> -#include <memory> - -#include "util/types.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" -#include "framework/RecordInterface.h" -#include "framework/MutableBuffer.h" - -namespace de { - -template <RecordInterface R, ShardInterface S, QueryInterface Q> -class InternalLevel { - typedef S Shard; - typedef MutableBuffer<R> Buffer; -public: - InternalLevel(ssize_t level_no, size_t shard_cap) - : m_level_no(level_no) - , m_shard_cnt(0) - , m_shards(shard_cap, nullptr) - , m_owns(shard_cap, true) - {} - - // Create a new memory level sharing the shards and repurposing it as previous level_no + 1 - // WARNING: for leveling only. - InternalLevel(InternalLevel* level) - : m_level_no(level->m_level_no + 1) - , m_shard_cnt(level->m_shard_cnt) - , m_shards(level->m_shards.size(), nullptr) - , m_owns(level->m_owns.size(), true) { - assert(m_shard_cnt == 1 && m_shards.size() == 1); - - for (size_t i=0; i<m_shards.size(); i++) { - level->m_owns[i] = false; - m_shards[i] = level->m_shards[i]; - } - } - - ~InternalLevel() { - for (size_t i=0; i<m_shards.size(); i++) { - if (m_owns[i]) delete m_shards[i]; - } - } - - // WARNING: for leveling only. - // assuming the base level is the level new level is merging into. (base_level is larger.) - static InternalLevel* merge_levels(InternalLevel* base_level, InternalLevel* new_level) { - assert(base_level->m_level_no > new_level->m_level_no || (base_level->m_level_no == 0 && new_level->m_level_no == 0)); - auto res = new InternalLevel(base_level->m_level_no, 1); - res->m_shard_cnt = 1; - Shard* shards[2]; - shards[0] = base_level->m_shards[0]; - shards[1] = new_level->m_shards[0]; - - res->m_shards[0] = new S(shards, 2); - return res; - } - - void append_buffer(Buffer* buffer) { - assert(m_shard_cnt < m_shards.size()); - m_shards[m_shard_cnt] = new S(buffer); - m_owns[m_shard_cnt] = true; - ++m_shard_cnt; - } - - void append_merged_shards(InternalLevel* level) { - assert(m_shard_cnt < m_shards.size()); - m_shards[m_shard_cnt] = new S(level->m_shards.data(), level->m_shard_cnt); - m_owns[m_shard_cnt] = true; - - ++m_shard_cnt; - } - - Shard *get_merged_shard() { - Shard *shards[m_shard_cnt]; - - for (size_t i=0; i<m_shard_cnt; i++) { - shards[i] = m_shards[i]; - } - - return new S(shards, m_shard_cnt); - } - - // Append the sample range in-order..... - void get_query_states(std::vector<std::pair<ShardID, Shard *>> &shards, std::vector<void*>& shard_states, void *query_parms) { - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - auto shard_state = Q::get_query_state(m_shards[i], query_parms); - shards.push_back({{m_level_no, (ssize_t) i}, m_shards[i]}); - shard_states.emplace_back(shard_state); - } - } - } - - bool check_tombstone(size_t shard_stop, const R& rec) { - if (m_shard_cnt == 0) return false; - - for (int i = m_shard_cnt - 1; i >= (ssize_t) shard_stop; i--) { - if (m_shards[i]) { - auto res = m_shards[i]->point_lookup(rec, true); - if (res && res->is_tombstone()) { - return true; - } - } - } - return false; - } - - bool delete_record(const R &rec) { - if (m_shard_cnt == 0) return false; - - for (size_t i = 0; i < m_shards.size(); ++i) { - if (m_shards[i]) { - auto res = m_shards[i]->point_lookup(rec); - if (res) { - res->set_delete(); - return true; - } - } - } - - return false; - } - - Shard* get_shard(size_t idx) { - return m_shards[idx]; - } - - size_t get_shard_count() { - return m_shard_cnt; - } - - size_t get_record_count() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - cnt += m_shards[i]->get_record_count(); - } - - return cnt; - } - - size_t get_tombstone_count() { - size_t res = 0; - for (size_t i = 0; i < m_shard_cnt; ++i) { - res += m_shards[i]->get_tombstone_count(); - } - return res; - } - - size_t get_aux_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - cnt += m_shards[i]->get_aux_memory_usage(); - } - - return cnt; - } - - size_t get_memory_usage() { - size_t cnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - cnt += m_shards[i]->get_memory_usage(); - } - } - - return cnt; - } - - double get_tombstone_prop() { - size_t tscnt = 0; - size_t reccnt = 0; - for (size_t i=0; i<m_shard_cnt; i++) { - if (m_shards[i]) { - tscnt += m_shards[i]->get_tombstone_count(); - reccnt += (*m_shards[i])->get_record_count(); - } - } - - return (double) tscnt / (double) (tscnt + reccnt); - } - -private: - ssize_t m_level_no; - - size_t m_shard_cnt; - size_t m_shard_size_cap; - - std::vector<Shard*> m_shards; - std::vector<bool> m_owns; - - InternalLevel *clone() { - auto new_level = new InternalLevel(m_level_no, m_shards.size()); - for (size_t i=0; i<m_shard_cnt; i++) { - new_level->m_shards[i] = m_shards[i]; - new_level->m_owns[i] = true; - m_owns[i] = false; - } - } -}; - -} diff --git a/include/framework/MutableBuffer.h b/include/framework/MutableBuffer.h deleted file mode 100644 index b79fc02..0000000 --- a/include/framework/MutableBuffer.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * include/framework/MutableBuffer.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <cstdlib> -#include <atomic> -#include <cassert> -#include <numeric> -#include <algorithm> -#include <type_traits> - -#include "psu-util/alignment.h" -#include "util/bf_config.h" -#include "psu-ds/BloomFilter.h" -#include "psu-ds/Alias.h" -#include "psu-util/timer.h" -#include "framework/RecordInterface.h" - -using psudb::CACHELINE_SIZE; - -namespace de { - -template <RecordInterface R> -class MutableBuffer { -public: - MutableBuffer(size_t capacity, size_t max_tombstone_cap) - : m_cap(capacity), m_tombstone_cap(max_tombstone_cap), m_reccnt(0) - , m_tombstonecnt(0), m_weight(0), m_max_weight(0) { - auto len = capacity * sizeof(Wrapped<R>); - size_t aligned_buffersize = len + (CACHELINE_SIZE - (len % CACHELINE_SIZE)); - m_data = (Wrapped<R>*) std::aligned_alloc(CACHELINE_SIZE, aligned_buffersize); - m_tombstone_filter = nullptr; - if (max_tombstone_cap > 0) { - m_tombstone_filter = new psudb::BloomFilter<R>(BF_FPR, max_tombstone_cap, BF_HASH_FUNCS); - } - } - - ~MutableBuffer() { - if (m_data) free(m_data); - if (m_tombstone_filter) delete m_tombstone_filter; - } - - template <typename R_ = R> - int append(const R &rec, bool tombstone=false) { - if (tombstone && m_tombstonecnt + 1 > m_tombstone_cap) return 0; - - int32_t pos = 0; - if ((pos = try_advance_tail()) == -1) return 0; - - Wrapped<R> wrec; - wrec.rec = rec; - wrec.header = 0; - if (tombstone) wrec.set_tombstone(); - - m_data[pos] = wrec; - m_data[pos].header |= (pos << 2); - - if (tombstone) { - m_tombstonecnt.fetch_add(1); - if (m_tombstone_filter) m_tombstone_filter->insert(rec); - } - - if constexpr (WeightedRecordInterface<R_>) { - m_weight.fetch_add(rec.weight); - double old = m_max_weight.load(); - while (old < rec.weight) { - m_max_weight.compare_exchange_strong(old, rec.weight); - old = m_max_weight.load(); - } - } else { - m_weight.fetch_add(1); - } - - return 1; - } - - bool truncate() { - m_tombstonecnt.store(0); - m_reccnt.store(0); - m_weight.store(0); - m_max_weight.store(0); - if (m_tombstone_filter) m_tombstone_filter->clear(); - - return true; - } - - size_t get_record_count() { - return m_reccnt; - } - - size_t get_capacity() { - return m_cap; - } - - bool is_full() { - return m_reccnt == m_cap; - } - - size_t get_tombstone_count() { - return m_tombstonecnt.load(); - } - - bool delete_record(const R& rec) { - auto offset = 0; - while (offset < m_reccnt.load()) { - if (m_data[offset].rec == rec) { - m_data[offset].set_delete(); - return true; - } - offset++; - } - - return false; - } - - bool check_tombstone(const R& rec) { - if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) return false; - - auto offset = 0; - while (offset < m_reccnt.load()) { - if (m_data[offset].rec == rec && m_data[offset].is_tombstone()) { - return true; - } - offset++;; - } - return false; - } - - size_t get_memory_usage() { - return m_cap * sizeof(R); - } - - size_t get_aux_memory_usage() { - return m_tombstone_filter->get_memory_usage(); - } - - size_t get_tombstone_capacity() { - return m_tombstone_cap; - } - - double get_total_weight() { - return m_weight.load(); - } - - Wrapped<R> *get_data() { - return m_data; - } - - double get_max_weight() { - return m_max_weight; - } - -private: - int32_t try_advance_tail() { - size_t new_tail = m_reccnt.fetch_add(1); - - if (new_tail < m_cap) return new_tail; - else return -1; - } - - size_t m_cap; - size_t m_tombstone_cap; - - Wrapped<R>* m_data; - psudb::BloomFilter<R>* m_tombstone_filter; - - alignas(64) std::atomic<size_t> m_tombstonecnt; - alignas(64) std::atomic<uint32_t> m_reccnt; - alignas(64) std::atomic<double> m_weight; - alignas(64) std::atomic<double> m_max_weight; -}; - -} diff --git a/include/framework/QueryInterface.h b/include/framework/QueryInterface.h deleted file mode 100644 index 46a1ce1..0000000 --- a/include/framework/QueryInterface.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * include/framework/QueryInterface.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <vector> -#include <concepts> -#include "util/types.h" - -template <typename Q> -concept QueryInterface = requires(Q q, void *p, std::vector<void*> &s) { - -/* - {q.get_query_state(p, p)} -> std::convertible_to<void*>; - {q.get_buffer_query_state(p, p)}; - {q.query(p, p)}; - {q.buffer_query(p, p)}; - {q.merge()}; - {q.delete_query_state(p)}; -*/ - {Q::EARLY_ABORT} -> std::convertible_to<bool>; - {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>; - //{Q::get_query_state(p, p)} -> std::convertible_to<void*>; - //{Q::get_buffer_query_state(p, p)} -> std::convertible_to<void*>; - {Q::process_query_states(p, s, p)}; - - {Q::delete_query_state(std::declval<void*>())} -> std::same_as<void>; - {Q::delete_buffer_query_state(p)}; - -}; diff --git a/include/framework/QueryRequirements.h b/include/framework/QueryRequirements.h new file mode 100644 index 0000000..dcba67e --- /dev/null +++ b/include/framework/QueryRequirements.h @@ -0,0 +1,17 @@ +/* + * include/framework/QueryRequirements.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A header file containing the necessary includes for Query + * development. + * + */ +#pragma once + +#include "framework/structure/BufferView.h" +#include "framework/interface/Record.h" +#include "framework/interface/Shard.h" +#include "framework/interface/Query.h" diff --git a/include/framework/ShardInterface.h b/include/framework/ShardInterface.h deleted file mode 100644 index 3aa62df..0000000 --- a/include/framework/ShardInterface.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * include/framework/ShardInterface.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <concepts> - -#include "util/types.h" -#include "framework/RecordInterface.h" - -namespace de { - -//template <template<typename> typename S, typename R> -template <typename S> -concept ShardInterface = requires(S s, void *p, bool b) { - //{s.point_lookup(r, b) } -> std::same_as<R*>; - {s.get_record_count()} -> std::convertible_to<size_t>; - {s.get_memory_usage()} -> std::convertible_to<size_t>; -}; - -} diff --git a/include/framework/ShardRequirements.h b/include/framework/ShardRequirements.h new file mode 100644 index 0000000..d054030 --- /dev/null +++ b/include/framework/ShardRequirements.h @@ -0,0 +1,17 @@ +/* + * include/framework/ShardRequirements.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A header file containing the necessary includes for Shard + * development. + * + */ +#pragma once + +#include "framework/structure/BufferView.h" +#include "framework/interface/Record.h" +#include "framework/interface/Shard.h" +#include "framework/interface/Query.h" diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h new file mode 100644 index 0000000..3d487f0 --- /dev/null +++ b/include/framework/interface/Query.h @@ -0,0 +1,30 @@ +/* + * include/framework/interface/Query.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include "framework/QueryRequirements.h" + +namespace de{ + +template <typename Q, typename R, typename S> +concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv) { + {Q::get_query_state(sh, p)} -> std::convertible_to<void*>; + {Q::get_buffer_query_state(bv, p)} -> std::convertible_to<void *>; + {Q::process_query_states(p, s, p)}; + {Q::query(sh, p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; + {Q::buffer_query(p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>; + {Q::merge(rv, p)} -> std::convertible_to<std::vector<R>>; + + {Q::delete_query_state(p)} -> std::same_as<void>; + {Q::delete_buffer_query_state(p)} -> std::same_as<void>; + + {Q::EARLY_ABORT} -> std::convertible_to<bool>; + {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>; +}; +} diff --git a/include/framework/RecordInterface.h b/include/framework/interface/Record.h index f78918c..5b9f307 100644 --- a/include/framework/RecordInterface.h +++ b/include/framework/interface/Record.h @@ -1,11 +1,12 @@ /* - * include/framework/RecordInterface.h + * include/framework/interface/Record.h * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * + * FIXME: the record implementations could probably be broken out into + * different files, leaving only the interface here */ #pragma once @@ -137,7 +138,7 @@ struct CosinePoint{ return true; } - // lexicographic order + /* lexicographic order */ inline bool operator<(const CosinePoint& other) const { for (size_t i=0; i<D; i++) { if (data[i] < other.data[i]) { @@ -181,7 +182,7 @@ struct EuclidPoint{ return true; } - // lexicographic order + /* lexicographic order */ inline bool operator<(const EuclidPoint& other) const { for (size_t i=0; i<D; i++) { if (data[i] < other.data[i]) { @@ -207,8 +208,24 @@ struct EuclidPoint{ template<RecordInterface R> struct RecordHash { size_t operator()(R const &rec) const { - return psudb::hash_bytes((char *) &rec, sizeof(R)); + return psudb::hash_bytes((std::byte *) &rec, sizeof(R)); } }; +template <typename R> +class DistCmpMax { +public: + DistCmpMax(R *baseline) : P(baseline) {} + + inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { + return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); + } + + inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){ + return a->calc_distance(*P) > b->calc_distance(*P); + } + +private: + R *P; +}; } diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h new file mode 100644 index 0000000..451ddd2 --- /dev/null +++ b/include/framework/interface/Scheduler.h @@ -0,0 +1,19 @@ +/* + * include/framework/interface/Scheduler.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include "framework/scheduling/Task.h" + +template <typename S> +concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) { + {S(i, i)}; + {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>; + {s.shutdown()}; + {s.print_statistics()}; +}; diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h new file mode 100644 index 0000000..c4a9180 --- /dev/null +++ b/include/framework/interface/Shard.h @@ -0,0 +1,36 @@ +/* + * include/framework/interface/Shard.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include "framework/ShardRequirements.h" + +namespace de { + +template <typename S, typename R> +concept ShardInterface = RecordInterface<R> && requires(S s, std::vector<S*> spp, void *p, bool b, size_t i, BufferView<R> bv, R r) { + {S(spp)}; + {S(std::move(bv))}; + + {s.point_lookup(r, b) } -> std::same_as<Wrapped<R>*>; + {s.get_data()} -> std::same_as<Wrapped<R>*>; + + {s.get_record_count()} -> std::convertible_to<size_t>; + {s.get_tombstone_count()} -> std::convertible_to<size_t>; + {s.get_memory_usage()} -> std::convertible_to<size_t>; + {s.get_aux_memory_usage()} -> std::convertible_to<size_t>; +}; + +template <typename S, typename R> +concept SortedShardInterface = ShardInterface<S, R> && requires(S s, R r, R *rp, size_t i) { + {s.lower_bound(r)} -> std::convertible_to<size_t>; + {s.upper_bound(r)} -> std::convertible_to<size_t>; + {s.get_record_at(i)} -> std::same_as<Wrapped<R>*>; +}; + +} diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h new file mode 100644 index 0000000..9377fb0 --- /dev/null +++ b/include/framework/scheduling/Epoch.h @@ -0,0 +1,143 @@ +/* + * include/framework/scheduling/Epoch.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include <condition_variable> +#include <mutex> + +#include "framework/structure/MutableBuffer.h" +#include "framework/structure/ExtensionStructure.h" +#include "framework/structure/BufferView.h" + +namespace de { + + +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> +class Epoch { +private: + typedef MutableBuffer<R> Buffer; + typedef ExtensionStructure<R, S, Q, L> Structure; + typedef BufferView<R> BufView; +public: + Epoch(size_t number=0) + : m_buffer(nullptr) + , m_structure(nullptr) + , m_active_merge(false) + , m_epoch_number(number) + , m_buffer_head(0) + {} + + Epoch(size_t number, Structure *structure, Buffer *buff, size_t head) + : m_buffer(buff) + , m_structure(structure) + , m_active_merge(false) + , m_epoch_number(number) + , m_buffer_head(head) + { + structure->take_reference(); + } + + ~Epoch() { + if (m_structure) { + m_structure->release_reference(); + } + + if (m_structure->get_reference_count() == 0) { + delete m_structure; + } + + } + + /* + * Epochs are *not* copyable or movable. Only one can exist, and all users + * of it work with pointers + */ + Epoch(const Epoch&) = delete; + Epoch(Epoch&&) = delete; + Epoch &operator=(const Epoch&) = delete; + Epoch &operator=(Epoch&&) = delete; + + size_t get_epoch_number() { + return m_epoch_number; + } + + Structure *get_structure() { + return m_structure; + } + + BufView get_buffer() { + return m_buffer->get_buffer_view(m_buffer_head); + } + + /* + * Returns a new Epoch object that is a copy of this one. The new object + * will also contain a copy of the m_structure, rather than a reference to + * the same one. The epoch number of the new epoch will be set to the + * provided argument. + */ + Epoch *clone(size_t number) { + std::unique_lock<std::mutex> m_buffer_lock; + auto epoch = new Epoch(number); + epoch->m_buffer = m_buffer; + epoch->m_buffer_head = m_buffer_head; + + if (m_structure) { + epoch->m_structure = m_structure->copy(); + /* the copy routine returns a structure with 0 references */ + epoch->m_structure->take_reference(); + } + + return epoch; + } + + /* + * Check if a merge can be started from this Epoch. At present, without + * concurrent merging, this simply checks if there is currently a scheduled + * merge based on this Epoch. If there is, returns false. If there isn't, + * return true and set a flag indicating that there is an active merge. + */ + bool prepare_reconstruction() { + auto old = m_active_merge.load(); + if (old) { + return false; + } + + // FIXME: this needs cleaned up + while (!m_active_merge.compare_exchange_strong(old, true)) { + old = m_active_merge.load(); + if (old) { + return false; + } + } + + return true; + } + + bool advance_buffer_head(size_t head) { + m_buffer_head = head; + return m_buffer->advance_head(m_buffer_head); + } + +private: + Structure *m_structure; + Buffer *m_buffer; + + std::mutex m_buffer_lock; + std::atomic<bool> m_active_merge; + + /* + * The number of currently active jobs + * (queries/merges) operating on this + * epoch. An epoch can only be retired + * when this number is 0. + */ + size_t m_epoch_number; + size_t m_buffer_head; +}; +} diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h new file mode 100644 index 0000000..3ed4f49 --- /dev/null +++ b/include/framework/scheduling/FIFOScheduler.h @@ -0,0 +1,129 @@ +/* + * include/framework/scheduling/FIFOScheduler.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * This scheduler runs just concurrently, using a standard FIFO queue to + * determine which jobs to run next. If more jobs are scheduled than there + * are available threads, the excess will stall until a thread becomes + * available and then run in the order they were received by the scheduler. + * + * TODO: We need to set up a custom threadpool based on jthreads to support + * thread preemption for a later phase of this project. That will allow us + * to avoid blocking epoch transitions on long-running queries, or to pause + * reconstructions on demand. + */ +#pragma once + +#include <thread> +#include <condition_variable> +#include <chrono> +#include "framework/scheduling/Task.h" +#include "framework/scheduling/statistics.h" + +#include "ctpl/ctpl.h" +#include "psu-ds/LockedPriorityQueue.h" + +namespace de { + +using namespace std::literals::chrono_literals; + + +class FIFOScheduler { +private: + static const size_t DEFAULT_MAX_THREADS = 8; + +public: + FIFOScheduler(size_t memory_budget, size_t thread_cnt) + : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX) + , m_thrd_cnt((thread_cnt) ? thread_cnt: DEFAULT_MAX_THREADS) + , m_used_memory(0) + , m_used_thrds(0) + , m_shutdown(false) + { + m_sched_thrd = std::thread(&FIFOScheduler::run, this); + m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this); + m_thrd_pool.resize(m_thrd_cnt); + } + + ~FIFOScheduler() { + if (!m_shutdown.load()) { + shutdown(); + } + + m_sched_thrd.join(); + m_sched_wakeup_thrd.join(); + } + + void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) { + std::unique_lock<std::mutex> lk(m_cv_lock); + size_t ts = m_counter.fetch_add(1); + + m_stats.job_queued(ts, type, size); + m_task_queue.push(Task(size, ts, job, args, type, &m_stats)); + + m_cv.notify_all(); + } + + void shutdown() { + m_shutdown.store(true); + m_thrd_pool.stop(true); + m_cv.notify_all(); + } + + void print_statistics() { + m_stats.print_statistics(); + } + +private: + psudb::LockedPriorityQueue<Task> m_task_queue; + + size_t m_memory_budget; + size_t m_thrd_cnt; + + std::atomic<bool> m_shutdown; + + std::atomic<size_t> m_counter; + std::mutex m_cv_lock; + std::condition_variable m_cv; + + std::thread m_sched_thrd; + std::thread m_sched_wakeup_thrd; + ctpl::thread_pool m_thrd_pool; + + std::atomic<size_t> m_used_thrds; + std::atomic<size_t> m_used_memory; + + SchedulerStatistics m_stats; + + void periodic_wakeup() { + do { + std::this_thread::sleep_for(10us); + m_cv.notify_all(); + } while (!m_shutdown.load()); + } + + void schedule_next() { + assert(m_task_queue.size() > 0); + auto t = m_task_queue.pop(); + m_stats.job_scheduled(t.m_timestamp); + + m_thrd_pool.push(t); + } + + void run() { + do { + std::unique_lock<std::mutex> cv_lock(m_cv_lock); + m_cv.wait(cv_lock); + + while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) { + schedule_next(); + } + } while(!m_shutdown.load()); + } + +}; + +} diff --git a/include/framework/scheduling/SerialScheduler.h b/include/framework/scheduling/SerialScheduler.h new file mode 100644 index 0000000..ac59301 --- /dev/null +++ b/include/framework/scheduling/SerialScheduler.h @@ -0,0 +1,62 @@ +/* + * include/framework/scheduling/SerialScheduler.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * IMPORTANT: This "scheduler" is a shim implementation for allowing + * strictly serial, single-threaded operation of the framework. It should + * never be used in multi-threaded contexts. A call to the schedule_job + * function will immediately run the job and block on its completion before + * returning. + * + */ +#pragma once + +#include "framework/scheduling/Task.h" +#include "framework/scheduling/statistics.h" + +namespace de { + +class SerialScheduler { +public: + SerialScheduler(size_t memory_budget, size_t thread_cnt) + : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX) + , m_thrd_cnt((thread_cnt) ? thread_cnt: UINT64_MAX) + , m_used_memory(0) + , m_used_thrds(0) + , m_counter(0) + {} + + ~SerialScheduler() = default; + + void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) { + size_t ts = m_counter++; + m_stats.job_queued(ts, type, size); + m_stats.job_scheduled(ts); + auto t = Task(size, ts, job, args, type, &m_stats); + t(0); + } + + void shutdown() { + /* intentionally left blank */ + } + + void print_statistics() { + m_stats.print_statistics(); + } + +private: + size_t m_memory_budget; + size_t m_thrd_cnt; + + size_t m_used_thrds; + size_t m_used_memory; + + size_t m_counter; + + SchedulerStatistics m_stats; +}; + +} diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h new file mode 100644 index 0000000..d5d4266 --- /dev/null +++ b/include/framework/scheduling/Task.h @@ -0,0 +1,89 @@ +/* + * include/framework/scheduling/Task.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * An abstraction to represent a job to be scheduled. Currently the + * supported task types are queries and merges. Based on the current plan, + * simple buffer inserts will likely also be made into a task at some + * point. + * + */ +#pragma once + +#include <future> +#include <functional> +#include <chrono> + +#include "framework/util/Configuration.h" +#include "framework/scheduling/Epoch.h" +#include "framework/scheduling/statistics.h" + +namespace de { + +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> +struct ReconstructionArgs { + Epoch<R, S, Q, L> *epoch; + std::vector<ReconstructionTask> merges; + std::promise<bool> result; + bool compaction; + void *extension; +}; + +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L> +struct QueryArgs { + std::promise<std::vector<R>> result_set; + void *query_parms; + void *extension; +}; + +typedef std::function<void(void*)> Job; + +struct Task { + Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr) + : m_job(job) + , m_size(size) + , m_timestamp(ts) + , m_args(args) + , m_type(type) + , m_stats(stats) + {} + + Job m_job; + size_t m_size; + size_t m_timestamp; + void *m_args; + size_t m_type; + SchedulerStatistics *m_stats; + + friend bool operator<(const Task &self, const Task &other) { + return self.m_timestamp < other.m_timestamp; + } + + friend bool operator>(const Task &self, const Task &other) { + return self.m_timestamp > other.m_timestamp; + } + + void operator()(size_t thrd_id) { + auto start = std::chrono::high_resolution_clock::now(); + if (m_stats) { + m_stats->job_begin(m_timestamp); + } + + m_job(m_args); + + if (m_stats) { + m_stats->job_complete(m_timestamp); + } + auto stop = std::chrono::high_resolution_clock::now(); + + if (m_stats) { + auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); + m_stats->log_time_data(time, m_type); + } + } +}; + +} diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h new file mode 100644 index 0000000..6c479cd --- /dev/null +++ b/include/framework/scheduling/statistics.h @@ -0,0 +1,118 @@ +/* + * include/framework/scheduling/statistics.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * This is a stub for a statistics tracker to be used in scheduling. It + * currently only tracks simple aggregated statistics, but should be + * updated in the future for more fine-grained statistics. These will be + * used for making scheduling decisions and predicting the runtime of a + * given job. + */ +#pragma once + +#include <cstdlib> +#include <cassert> +#include <unordered_map> +#include <vector> +#include <mutex> +#include <chrono> +#include <atomic> + +namespace de { + +class SchedulerStatistics { +private: + enum class EventType { + QUEUED, + SCHEDULED, + STARTED, + FINISHED + }; + + struct Event { + size_t id; + EventType type; + }; + + struct JobInfo { + size_t id; + size_t size; + size_t type; + }; + + +public: + SchedulerStatistics() = default; + ~SchedulerStatistics() = default; + + void job_queued(size_t id, size_t type, size_t size) { + auto time = std::chrono::high_resolution_clock::now(); + } + + void job_scheduled(size_t id) { + std::unique_lock<std::mutex> lk(m_mutex); + + } + + void job_begin(size_t id) { + + } + + void job_complete(size_t id) { + + } + + /* FIXME: This is just a temporary approach */ + void log_time_data(size_t length, size_t type) { + assert(type == 1 || type == 2); + + if (type == 1) { + m_type_1_cnt.fetch_add(1); + m_type_1_total_time.fetch_add(length); + + if (length > m_type_1_largest_time) { + m_type_1_largest_time.store(length); + } + } else { + m_type_2_cnt.fetch_add(1); + m_type_2_total_time.fetch_add(length); + + if (length > m_type_2_largest_time) { + m_type_2_largest_time.store(length); + } + } + } + + void print_statistics() { + if (m_type_1_cnt > 0) { + fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", + m_type_1_cnt.load(), + m_type_1_total_time.load() / m_type_1_cnt.load(), + m_type_1_largest_time.load()); + } + if (m_type_2_cnt > 0) { + fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n", + m_type_2_cnt.load(), + m_type_2_total_time.load() / m_type_2_cnt.load(), + m_type_2_largest_time.load()); + } + } + +private: + std::mutex m_mutex; + std::unordered_map<size_t, JobInfo> m_jobs; + std::vector<Event> m_event_log; + + std::atomic<size_t> m_type_1_cnt; + std::atomic<size_t> m_type_1_total_time; + + std::atomic<size_t> m_type_2_cnt; + std::atomic<size_t> m_type_2_total_time; + + std::atomic<size_t> m_type_1_largest_time; + std::atomic<size_t> m_type_2_largest_time; +}; +} diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h new file mode 100644 index 0000000..9e0872b --- /dev/null +++ b/include/framework/structure/BufferView.h @@ -0,0 +1,170 @@ +/* + * include/framework/structure/BufferView.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * TODO: This file is very poorly commented. + */ +#pragma once + +#include <cstdlib> +#include <cassert> +#include <functional> +#include <utility> + +#include "psu-util/alignment.h" +#include "psu-ds/BloomFilter.h" +#include "framework/interface/Record.h" + +namespace de { + +typedef std::_Bind<void (*(void*, long unsigned int))(void*, long unsigned int)> ReleaseFunction; + +template <RecordInterface R> +class BufferView { +public: + BufferView() = default; + + /* + * the BufferView's lifetime is tightly linked to buffer versioning, and so + * copying and assignment are disabled. + */ + BufferView(const BufferView&) = delete; + BufferView &operator=(BufferView &) = delete; + + BufferView(BufferView &&other) + : m_data(std::exchange(other.m_data, nullptr)) + , m_release(std::move(other.m_release)) + , m_head(std::exchange(other.m_head, 0)) + , m_tail(std::exchange(other.m_tail, 0)) + , m_start(std::exchange(other.m_start, 0)) + , m_stop(std::exchange(other.m_stop, 0)) + , m_cap(std::exchange(other.m_cap, 0)) + , m_approx_ts_cnt(std::exchange(other.m_approx_ts_cnt, 0)) + , m_tombstone_filter(std::exchange(other.m_tombstone_filter, nullptr)) + , m_active(std::exchange(other.m_active, false)) {} + + BufferView &operator=(BufferView &&other) = delete; + + + BufferView(Wrapped<R> *buffer, size_t cap, size_t head, size_t tail, size_t tombstone_cnt, psudb::BloomFilter<R> *filter, + ReleaseFunction release) + : m_data(buffer) + , m_release(release) + , m_head(head) + , m_tail(tail) + , m_start(m_head % cap) + , m_stop(m_tail % cap) + , m_cap(cap) + , m_approx_ts_cnt(tombstone_cnt) + , m_tombstone_filter(filter) + , m_active(true) {} + + ~BufferView() { + if (m_active) { + m_release(); + } + } + + bool check_tombstone(const R& rec) { + if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) return false; + + for (size_t i=0; i<get_record_count(); i++) { + if (m_data[to_idx(i)].rec == rec && m_data[to_idx(i)].is_tombstone()) { + return true; + } + } + + return false; + } + + bool delete_record(const R& rec) { + if (m_start < m_stop) { + for (size_t i=m_start; i<m_stop; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; + } + } + } else { + for (size_t i=m_start; i<m_cap; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; + } + } + + for (size_t i=0; i<m_stop; i++) { + if (m_data[i].rec == rec) { + m_data[i].set_delete(); + return true; + } + + } + + } + + return false; + } + + size_t get_record_count() { + return m_tail - m_head; + } + + /* + * NOTE: This function returns an upper bound on the number + * of tombstones within the view. There may be less than + * this, due to synchronization issues during view creation. + */ + size_t get_tombstone_count() { + return m_approx_ts_cnt; + } + + Wrapped<R> *get(size_t i) { + assert(i < get_record_count()); + return m_data + to_idx(i); + } + + void copy_to_buffer(psudb::byte *buffer) { + /* check if the region to be copied circles back to start. If so, do it in two steps */ + if (m_start > m_stop) { + size_t split_idx = m_cap - m_start; + + memcpy(buffer, (std::byte*) (m_data + m_start), split_idx* sizeof(Wrapped<R>)); + memcpy(buffer + (split_idx * sizeof(Wrapped<R>)), (std::byte*) m_data, m_stop * sizeof(Wrapped<R>)); + } else { + memcpy(buffer, (std::byte*) (m_data + m_start), get_record_count() * sizeof(Wrapped<R>)); + } + } + + size_t get_tail() { + return m_tail; + } + + size_t get_head() { + return m_head; + } + +private: + Wrapped<R>* m_data; + ReleaseFunction m_release; + size_t m_head; + size_t m_tail; + size_t m_start; + size_t m_stop; + size_t m_cap; + size_t m_approx_ts_cnt; + psudb::BloomFilter<R> *m_tombstone_filter; + bool m_active; + + size_t to_idx(size_t i) { + size_t idx = (m_start + i >= m_cap) ? i = (m_cap - m_start) + : m_start + i; + assert(idx < m_cap); + return idx; + } +}; + +} diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h new file mode 100644 index 0000000..4802bc1 --- /dev/null +++ b/include/framework/structure/ExtensionStructure.h @@ -0,0 +1,495 @@ +/* + * include/framework/structure/ExtensionStructure.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include <atomic> +#include <cstdio> +#include <vector> + +#include "framework/structure/BufferView.h" +#include "framework/structure/InternalLevel.h" + +#include "framework/util/Configuration.h" + +#include "psu-util/timer.h" + +namespace de { + +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING> +class ExtensionStructure { + typedef S Shard; + typedef BufferView<R> BuffView; + +public: + ExtensionStructure(size_t buffer_size, size_t scale_factor, double max_delete_prop) + : m_scale_factor(scale_factor) + , m_max_delete_prop(max_delete_prop) + , m_buffer_size(buffer_size) + {} + + ~ExtensionStructure() = default; + + /* + * Create a shallow copy of this extension structure. The copy will share + * references to the same levels/shards as the original, but will have its + * own lists. As all of the shards are immutable (with the exception of + * deletes), the copy can be restructured with reconstructions and flushes + * without affecting the original. The copied structure will be returned + * with a reference count of 0; generally you will want to immediately call + * take_reference() on it. + * + * NOTE: When using tagged deletes, a delete of a record in the original + * structure will affect the copy, so long as the copy retains a reference + * to the same shard as the original. This could cause synchronization + * problems under tagging with concurrency. Any deletes in this context will + * need to be forwarded to the appropriate structures manually. + */ + ExtensionStructure<R, S, Q, L> *copy() { + auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, + m_max_delete_prop); + for (size_t i=0; i<m_levels.size(); i++) { + new_struct->m_levels.push_back(m_levels[i]->clone()); + } + + new_struct->m_refcnt = 0; + + return new_struct; + } + + /* + * Search for a record matching the argument and mark it deleted by + * setting the delete bit in its wrapped header. Returns 1 if a matching + * record was found and deleted, and 0 if a matching record was not found. + * + * This function will stop after finding the first matching record. It is + * assumed that no duplicate records exist. In the case of duplicates, this + * function will still "work", but in the sense of "delete first match". + */ + int tagged_delete(const R &rec) { + for (auto level : m_levels) { + if (level && level->delete_record(rec)) { + return 1; + } + } + + /* + * If the record to be erased wasn't found, return 0. The + * DynamicExtension itself will then search the active + * Buffers. + */ + return 0; + } + + /* + * Flush a buffer into the extension structure, performing any necessary + * reconstructions to free up room in L0. + * + * FIXME: arguably, this should be a method attached to the buffer that + * takes a structure as input. + */ + inline bool flush_buffer(BuffView buffer) { + assert(can_reconstruct_with(0, buffer.get_record_count())); + + flush_buffer_into_l0(std::move(buffer)); + + return true; + } + + /* + * Return the total number of records (including tombstones) within all + * of the levels of the structure. + */ + size_t get_record_count() { + size_t cnt = 0; + + for (size_t i=0; i<m_levels.size(); i++) { + if (m_levels[i]) cnt += m_levels[i]->get_record_count(); + } + + return cnt; + } + + /* + * Return the total number of tombstones contained within all of the + * levels of the structure. + */ + size_t get_tombstone_count() { + size_t cnt = 0; + + for (size_t i=0; i<m_levels.size(); i++) { + if (m_levels[i]) cnt += m_levels[i]->get_tombstone_count(); + } + + return cnt; + } + + /* + * Return the number of levels within the structure. Note that not + * all of these levels are necessarily populated. + */ + size_t get_height() { + return m_levels.size(); + } + + /* + * Return the amount of memory (in bytes) used by the shards within the + * structure for storing the primary data structure and raw data. + */ + size_t get_memory_usage() { + size_t cnt = 0; + for (size_t i=0; i<m_levels.size(); i++) { + if (m_levels[i]) cnt += m_levels[i]->get_memory_usage(); + } + + return cnt; + } + + /* + * Return the amount of memory (in bytes) used by the shards within the + * structure for storing auxiliary data structures. This total does not + * include memory used for the main data structure, or raw data. + */ + size_t get_aux_memory_usage() { + size_t cnt = 0; + for (size_t i=0; i<m_levels.size(); i++) { + if (m_levels[i]) { + cnt += m_levels[i]->get_aux_memory_usage(); + } + } + + return cnt; + } + + /* + * Validate that no level in the structure exceeds its maximum tombstone + * capacity. This is used to trigger preemptive compactions at the end of + * the reconstruction process. + */ + bool validate_tombstone_proportion() { + long double ts_prop; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) { + ts_prop = (long double)m_levels[i]->get_tombstone_count() / + (long double)calc_level_record_capacity(i); + if (ts_prop > (long double)m_max_delete_prop) { + return false; + } + } + } + + return true; + } + + bool validate_tombstone_proportion(level_index level) { + long double ts_prop = (long double) m_levels[level]->get_tombstone_count() / (long double) calc_level_record_capacity(level); + return ts_prop <= (long double) m_max_delete_prop; + } + + /* + * Return a reference to the underlying vector of levels within the + * structure. + */ + std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> &get_levels() { + return m_levels; + } + + std::vector<ReconstructionTask> get_compaction_tasks() { + std::vector<ReconstructionTask> tasks; + + /* if the tombstone/delete invariant is satisfied, no need for compactions */ + if (validate_tombstone_proportion()) { + return tasks; + } + + /* locate the first level to violate the invariant */ + level_index violation_idx = -1; + for (level_index i=0; i<m_levels.size(); i++) { + if (!validate_tombstone_proportion(i)) { + violation_idx = i; + break; + } + } + + assert(violation_idx != -1); + + level_index base_level = find_reconstruction_target(violation_idx); + if (base_level == -1) { + base_level = grow(); + } + + for (level_index i=base_level; i>0; i--) { + ReconstructionTask task = {i-1, i}; + + /* + * The amount of storage required for the reconstruction accounts + * for the cost of storing the new records, along with the + * cost of retaining the old records during the process + * (hence the 2x multiplier). + * + * FIXME: currently does not account for the *actual* size + * of the shards, only the storage for the records + * themselves. + */ + size_t reccnt = m_levels[i - 1]->get_record_count(); + if constexpr (L == LayoutPolicy::LEVELING) { + if (can_reconstruct_with(i, reccnt)) { + reccnt += m_levels[i]->get_record_count(); + } + } + //task.m_size = 2* reccnt * sizeof(R); + + tasks.push_back(task); + } + + return tasks; + } + + /* + * + */ + std::vector<ReconstructionTask> get_reconstruction_tasks(size_t buffer_reccnt) { + std::vector<ReconstructionTask> reconstructions; + + /* + * The buffer flush is not included so if that can be done without any + * other change, just return an empty list. + */ + if (can_reconstruct_with(0, buffer_reccnt)) { + return std::move(reconstructions); + } + + level_index base_level = find_reconstruction_target(0); + if (base_level == -1) { + base_level = grow(); + } + + for (level_index i=base_level; i>0; i--) { + ReconstructionTask task = {i-1, i}; + + /* + * The amount of storage required for the reconstruction accounts + * for the cost of storing the new records, along with the + * cost of retaining the old records during the process + * (hence the 2x multiplier). + * + * FIXME: currently does not account for the *actual* size + * of the shards, only the storage for the records + * themselves. + */ + size_t reccnt = m_levels[i-1]->get_record_count(); + if constexpr (L == LayoutPolicy::LEVELING) { + if (can_reconstruct_with(i, reccnt)) { + reccnt += m_levels[i]->get_record_count(); + } + } + //task.m_size = 2* reccnt * sizeof(R); + + reconstructions.push_back(task); + } + + return std::move(reconstructions); + } + + + /* + * + */ + std::vector<ReconstructionTask> get_reconstruction_tasks_from_level(level_index source_level) { + std::vector<ReconstructionTask> reconstructions; + + level_index base_level = find_reconstruction_target(source_level); + if (base_level == -1) { + base_level = grow(); + } + + for (level_index i=base_level; i>source_level; i--) { + ReconstructionTask task = {i - 1, i}; + /* + * The amount of storage required for the reconstruction accounts + * for the cost of storing the new records, along with the + * cost of retaining the old records during the process + * (hence the 2x multiplier). + * + * FIXME: currently does not account for the *actual* size + * of the shards, only the storage for the records + * themselves. + */ + size_t reccnt = m_levels[i-1]->get_record_count(); + if constexpr (L == LayoutPolicy::LEVELING) { + if (can_reconstruct_with(i, reccnt)) { + reccnt += m_levels[i]->get_record_count(); + } + } +// task.m_size = 2* reccnt * sizeof(R); + + reconstructions.push_back(task); + } + + return reconstructions; + } + + /* + * Combine incoming_level with base_level and reconstruct the shard, + * placing it in base_level. The two levels should be sequential--i.e. no + * levels are skipped in the reconstruction process--otherwise the + * tombstone ordering invariant may be violated. + */ + inline void reconstruction(level_index base_level, level_index incoming_level) { + if constexpr (L == LayoutPolicy::LEVELING) { + /* if the base level has a shard, merge the base and incoming together to make a new one */ + if (m_levels[base_level]->get_shard_count() > 0) { + m_levels[base_level] = InternalLevel<R, Shard, Q>::reconstruction(m_levels[base_level].get(), m_levels[incoming_level].get()); + /* otherwise, we can just move the incoming to the base */ + } else { + m_levels[base_level] = m_levels[incoming_level]; + } + } else { + m_levels[base_level]->append_level(m_levels[incoming_level].get()); + m_levels[base_level]->finalize(); + } + + /* place a new, empty level where the incoming level used to be */ + m_levels[incoming_level] = std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor)); + } + + bool take_reference() { + m_refcnt.fetch_add(1); + return true; + } + + bool release_reference() { + assert(m_refcnt.load() > 0); + m_refcnt.fetch_add(-1); + return true; + } + + size_t get_reference_count() { + return m_refcnt.load(); + } + + std::vector<void *> get_query_states(std::vector<std::pair<ShardID, Shard*>> &shards, void *parms) { + std::vector<void*> states; + + for (auto &level : m_levels) { + level->get_query_states(shards, states, parms); + } + + return states; + } + +private: + size_t m_scale_factor; + double m_max_delete_prop; + size_t m_buffer_size; + + std::atomic<size_t> m_refcnt; + + std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> m_levels; + + /* + * Add a new level to the structure and return its index. + */ + inline level_index grow() { + level_index new_idx = m_levels.size(); + size_t new_shard_cnt = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + m_levels.emplace_back(std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(new_idx, new_shard_cnt))); + return new_idx; + } + + /* + * Find the first level below the level indicated by idx that + * is capable of sustaining a reconstruction and return its + * level index. If no such level exists, returns -1. Also + * returns -1 if idx==0, and no such level exists, to simplify + * the logic of the first buffer flush. + */ + inline level_index find_reconstruction_target(level_index idx) { + + if (idx == 0 && m_levels.size() == 0) return -1; + + size_t incoming_rec_cnt = get_level_record_count(idx); + for (level_index i=idx+1; i<m_levels.size(); i++) { + if (can_reconstruct_with(i, incoming_rec_cnt)) { + return i; + } + + incoming_rec_cnt = get_level_record_count(i); + } + + return -1; + } + + inline void flush_buffer_into_l0(BuffView buffer) { + assert(m_levels[0]); + if constexpr (L == LayoutPolicy::LEVELING) { + // FIXME: Kludgey implementation due to interface constraints. + auto old_level = m_levels[0].get(); + auto temp_level = new InternalLevel<R, Shard, Q>(0, 1); + temp_level->append_buffer(std::move(buffer)); + + if (old_level->get_shard_count() > 0) { + m_levels[0] = InternalLevel<R, Shard, Q>::reconstruction(old_level, temp_level); + delete temp_level; + } else { + m_levels[0] = std::shared_ptr<InternalLevel<R, Shard, Q>>(temp_level); + } + } else { + m_levels[0]->append_buffer(std::move(buffer)); + } + } + + /* + * Mark a given memory level as no-longer in use by the tree. For now this + * will just free the level. In future, this will be more complex as the + * level may not be able to immediately be deleted, depending upon who + * else is using it. + */ + inline void mark_as_unused(std::shared_ptr<InternalLevel<R, Shard, Q>> level) { + level.reset(); + } + + /* + * Assume that level "0" should be larger than the buffer. The buffer + * itself is index -1, which should return simply the buffer capacity. + */ + inline size_t calc_level_record_capacity(level_index idx) { + return m_buffer_size * pow(m_scale_factor, idx+1); + } + + /* + * Returns the number of records present on a specified level. + */ + inline size_t get_level_record_count(level_index idx) { + return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0; + } + + /* + * Determines if a level can sustain a reconstruction with incoming_rec_cnt + * additional records without exceeding its capacity. + */ + inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt) { + if (idx >= m_levels.size() || !m_levels[idx]) { + return false; + } + + if (L == LayoutPolicy::LEVELING) { + return m_levels[idx]->get_record_count() + incoming_rec_cnt <= calc_level_record_capacity(idx); + } else { + return m_levels[idx]->get_shard_count() < m_scale_factor; + } + + /* unreachable */ + assert(true); + } +}; + +} + diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h new file mode 100644 index 0000000..db38946 --- /dev/null +++ b/include/framework/structure/InternalLevel.h @@ -0,0 +1,271 @@ +/* + * include/framework/structure/InternalLevel.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + * The word `Internal` in this class's name refers to memory. The current + * model, inherited from the framework in Practical Dynamic Extension for + * Sampling Indexes, would use a different ExternalLevel for shards stored + * on external storage. This is a distinction that can probably be avoided + * with some more thought being put into interface design. + * + */ +#pragma once + +#include <vector> +#include <memory> + +#include "util/types.h" +#include "framework/interface/Shard.h" +#include "framework/interface/Query.h" +#include "framework/interface/Record.h" +#include "framework/structure/BufferView.h" + +namespace de { +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q> +class InternalLevel; + + + +template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q> +class InternalLevel { + typedef S Shard; + typedef BufferView<R> BuffView; +public: + InternalLevel(ssize_t level_no, size_t shard_cap) + : m_level_no(level_no) + , m_shard_cnt(0) + , m_shards(shard_cap, nullptr) + , m_pending_shard(nullptr) + {} + + ~InternalLevel() { + delete m_pending_shard; + } + + /* + * Create a new shard combining the records from base_level and new_level, + * and return a shared_ptr to a new level containing this shard. This is used + * for reconstructions under the leveling layout policy. + * + * No changes are made to the levels provided as arguments. + */ + static std::shared_ptr<InternalLevel> reconstruction(InternalLevel* base_level, InternalLevel* new_level) { + assert(base_level->m_level_no > new_level->m_level_no || (base_level->m_level_no == 0 && new_level->m_level_no == 0)); + auto res = new InternalLevel(base_level->m_level_no, 1); + res->m_shard_cnt = 1; + std::vector<Shard *> shards = {base_level->m_shards[0].get(), + new_level->m_shards[0].get()}; + + res->m_shards[0] = std::make_shared<S>(shards); + return std::shared_ptr<InternalLevel>(res); + } + + /* + * Create a new shard combining the records from all of + * the shards in level, and append this new shard into + * this level. This is used for reconstructions under + * the tiering layout policy. + * + * No changes are made to the level provided as an argument. + */ + void append_level(InternalLevel* level) { + // FIXME: that this is happening probably means that + // something is going terribly wrong earlier in the + // reconstruction logic. + if (level->get_shard_count() == 0) { + return; + } + + std::vector<S*> shards; + for (auto shard : level->m_shards) { + if (shard) shards.emplace_back(shard.get()); + } + + if (m_shard_cnt == m_shards.size()) { + m_pending_shard = new S(shards); + return; + } + + auto tmp = new S(shards); + m_shards[m_shard_cnt] = std::shared_ptr<S>(tmp); + + ++m_shard_cnt; + } + + /* + * Create a new shard using the records in the + * provided buffer, and append this new shard + * into this level. This is used for buffer + * flushes under the tiering layout policy. + */ + void append_buffer(BuffView buffer) { + if (m_shard_cnt == m_shards.size()) { + assert(m_pending_shard == nullptr); + m_pending_shard = new S(std::move(buffer)); + return; + } + + m_shards[m_shard_cnt] = std::make_shared<S>(std::move(buffer)); + ++m_shard_cnt; + } + + void finalize() { + if (m_pending_shard) { + for (size_t i=0; i<m_shards.size(); i++) { + m_shards[i] = nullptr; + } + + m_shards[0] = std::shared_ptr<S>(m_pending_shard); + m_pending_shard = nullptr; + m_shard_cnt = 1; + } + } + + /* + * Create a new shard containing the combined records + * from all shards on this level and return it. + * + * No changes are made to this level. + */ + Shard *get_combined_shard() { + if (m_shard_cnt == 0) { + return nullptr; + } + + std::vector<Shard *> shards; + for (auto shard : m_shards) { + if (shard) shards.emplace_back(shard.get()); + } + + return new S(shards); + } + + void get_query_states(std::vector<std::pair<ShardID, Shard *>> &shards, std::vector<void*>& shard_states, void *query_parms) { + for (size_t i=0; i<m_shard_cnt; i++) { + if (m_shards[i]) { + auto shard_state = Q::get_query_state(m_shards[i].get(), query_parms); + shards.push_back({{m_level_no, (ssize_t) i}, m_shards[i].get()}); + shard_states.emplace_back(shard_state); + } + } + } + + bool check_tombstone(size_t shard_stop, const R& rec) { + if (m_shard_cnt == 0) return false; + + for (int i = m_shard_cnt - 1; i >= (ssize_t) shard_stop; i--) { + if (m_shards[i]) { + auto res = m_shards[i]->point_lookup(rec, true); + if (res && res->is_tombstone()) { + return true; + } + } + } + return false; + } + + bool delete_record(const R &rec) { + if (m_shard_cnt == 0) return false; + + for (size_t i = 0; i < m_shards.size(); ++i) { + if (m_shards[i]) { + auto res = m_shards[i]->point_lookup(rec); + if (res) { + res->set_delete(); + return true; + } + } + } + + return false; + } + + Shard* get_shard(size_t idx) { + return m_shards[idx].get(); + } + + size_t get_shard_count() { + return m_shard_cnt; + } + + size_t get_record_count() { + size_t cnt = 0; + for (size_t i=0; i<m_shard_cnt; i++) { + if (m_shards[i]) { + cnt += m_shards[i]->get_record_count(); + } + } + + return cnt; + } + + size_t get_tombstone_count() { + size_t res = 0; + for (size_t i = 0; i < m_shard_cnt; ++i) { + if (m_shards[i]) { + res += m_shards[i]->get_tombstone_count(); + } + } + return res; + } + + size_t get_aux_memory_usage() { + size_t cnt = 0; + for (size_t i=0; i<m_shard_cnt; i++) { + if (m_shards[i]){ + cnt += m_shards[i]->get_aux_memory_usage(); + } + } + + return cnt; + } + + size_t get_memory_usage() { + size_t cnt = 0; + for (size_t i=0; i<m_shard_cnt; i++) { + if (m_shards[i]) { + cnt += m_shards[i]->get_memory_usage(); + } + } + + return cnt; + } + + double get_tombstone_prop() { + size_t tscnt = 0; + size_t reccnt = 0; + for (size_t i=0; i<m_shard_cnt; i++) { + if (m_shards[i]) { + tscnt += m_shards[i]->get_tombstone_count(); + reccnt += m_shards[i]->get_record_count(); + } + } + + return (double) tscnt / (double) (tscnt + reccnt); + } + + std::shared_ptr<InternalLevel> clone() { + auto new_level = std::make_shared<InternalLevel>(m_level_no, m_shards.size()); + for (size_t i=0; i<m_shard_cnt; i++) { + new_level->m_shards[i] = m_shards[i]; + } + new_level->m_shard_cnt = m_shard_cnt; + + return new_level; + } + +private: + ssize_t m_level_no; + + size_t m_shard_cnt; + size_t m_shard_size_cap; + + std::vector<std::shared_ptr<Shard>> m_shards; + Shard *m_pending_shard; +}; + +} diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h new file mode 100644 index 0000000..415c95a --- /dev/null +++ b/include/framework/structure/MutableBuffer.h @@ -0,0 +1,313 @@ +/* + * include/framework/structure/MutableBuffer.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + * NOTE: Concerning the tombstone count. One possible approach + * would be to track the number of tombstones below and above the + * low water mark--this would be straightforward to do. Then, if we + * *require* that the head only advance up to the LWM, we can get a + * correct view on the number of tombstones in the active buffer at + * any point in time, and the BufferView will have a pretty good + * approximation as well (potentially with a few extra if new inserts + * happen between when the tail pointer and tombstone count are fetched) + * + */ +#pragma once + +#include <cstdlib> +#include <atomic> +#include <cassert> +#include <immintrin.h> + +#include "psu-util/alignment.h" +#include "util/bf_config.h" +#include "psu-ds/BloomFilter.h" +#include "framework/interface/Record.h" +#include "framework/structure/BufferView.h" + +using psudb::CACHELINE_SIZE; + +namespace de { + +template <RecordInterface R> +class MutableBuffer { + friend class BufferView<R>; + + struct buffer_head { + size_t head_idx; + size_t refcnt; + }; + +public: + MutableBuffer(size_t low_watermark, size_t high_watermark, size_t capacity=0) + : m_lwm(low_watermark) + , m_hwm(high_watermark) + , m_cap((capacity == 0) ? 2 * high_watermark : capacity) + , m_tail(0) + , m_head({0, 0}) + , m_old_head({high_watermark, 0}) + , m_data((Wrapped<R> *) psudb::sf_aligned_alloc(CACHELINE_SIZE, m_cap * sizeof(Wrapped<R>))) + , m_tombstone_filter(new psudb::BloomFilter<R>(BF_FPR, m_hwm, BF_HASH_FUNCS)) + , m_tscnt(0) + , m_old_tscnt(0) + , m_active_head_advance(false) + { + assert(m_cap > m_hwm); + assert(m_hwm > m_lwm); + } + + ~MutableBuffer() { + free(m_data); + delete m_tombstone_filter; + } + + int append(const R &rec, bool tombstone=false) { + int32_t tail = 0; + if ((tail = try_advance_tail()) == -1) { + return 0; + } + + Wrapped<R> wrec; + wrec.rec = rec; + wrec.header = 0; + if (tombstone) wrec.set_tombstone(); + + size_t pos = tail % m_cap; + + m_data[pos] = wrec; + m_data[pos].header |= (pos << 2); + + if (tombstone) { + m_tscnt.fetch_add(1); + if (m_tombstone_filter) m_tombstone_filter->insert(rec); + } + + return 1; + } + + bool truncate() { + m_tscnt.store(0); + m_tail.store(0); + if (m_tombstone_filter) m_tombstone_filter->clear(); + + return true; + } + + size_t get_record_count() { + return m_tail.load() - m_head.load().head_idx; + } + + size_t get_capacity() { + return m_cap; + } + + bool is_full() { + return get_record_count() >= m_hwm; + } + + bool is_at_low_watermark() { + return get_record_count() >= m_lwm; + } + + size_t get_tombstone_count() { + return m_tscnt.load(); + } + + bool delete_record(const R& rec) { + return get_buffer_view().delete_record(rec); + } + + bool check_tombstone(const R& rec) { + return get_buffer_view().check_tombstone(rec); + } + + size_t get_memory_usage() { + return m_cap * sizeof(Wrapped<R>); + } + + size_t get_aux_memory_usage() { + return m_tombstone_filter->get_memory_usage(); + } + + BufferView<R> get_buffer_view(size_t target_head) { + size_t head = get_head(target_head); + auto f = std::bind(release_head_reference, (void *) this, head); + + return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f); + } + + BufferView<R> get_buffer_view() { + size_t head = get_head(m_head.load().head_idx); + auto f = std::bind(release_head_reference, (void *) this, head); + + return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f); + } + + /* + * Advance the buffer following a reconstruction. Move current + * head and head_refcnt into old_head and old_head_refcnt, then + * assign new_head to old_head. + */ + bool advance_head(size_t new_head) { + assert(new_head > m_head.load().head_idx); + assert(new_head <= m_tail.load()); + + /* refuse to advance head while there is an old with one references */ + if (m_old_head.load().refcnt > 0) { + //fprintf(stderr, "[W]: Refusing to advance head due to remaining reference counts\n"); + return false; + } + + m_active_head_advance.store(true); + + buffer_head new_hd = {new_head, 0}; + buffer_head cur_hd; + + /* replace current head with new head */ + do { + cur_hd = m_head.load(); + } while(!m_head.compare_exchange_strong(cur_hd, new_hd)); + + /* move the current head into the old head */ + m_old_head.store(cur_hd); + + m_active_head_advance.store(false); + return true; + } + + /* + * FIXME: If target_head does not match *either* the old_head or the + * current_head, this routine will loop infinitely. + */ + size_t get_head(size_t target_head) { + buffer_head cur_hd, new_hd; + bool head_acquired = false; + + do { + if (m_old_head.load().head_idx == target_head) { + cur_hd = m_old_head.load(); + cur_hd.head_idx = target_head; + new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; + head_acquired = m_old_head.compare_exchange_strong(cur_hd, new_hd); + } else if (m_head.load().head_idx == target_head){ + cur_hd = m_head.load(); + cur_hd.head_idx = target_head; + new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1}; + head_acquired = m_head.compare_exchange_strong(cur_hd, new_hd); + } + } while(!head_acquired); + + return new_hd.head_idx; + } + + void set_low_watermark(size_t lwm) { + assert(lwm < m_hwm); + m_lwm = lwm; + } + + size_t get_low_watermark() { + return m_lwm; + } + + void set_high_watermark(size_t hwm) { + assert(hwm > m_lwm); + assert(hwm < m_cap); + m_hwm = hwm; + } + + size_t get_high_watermark() { + return m_hwm; + } + + size_t get_tail() { + return m_tail.load(); + } + + /* + * Note: this returns the available physical storage capacity, + * *not* now many more records can be inserted before the + * HWM is reached. It considers the old_head to be "free" + * when it has no remaining references. This should be true, + * but a buggy framework implementation may violate the + * assumption. + */ + size_t get_available_capacity() { + if (m_old_head.load().refcnt == 0) { + return m_cap - (m_tail.load() - m_head.load().head_idx); + } + + return m_cap - (m_tail.load() - m_old_head.load().head_idx); + } + +private: + int64_t try_advance_tail() { + size_t old_value = m_tail.load(); + + /* if full, fail to advance the tail */ + if (old_value - m_head.load().head_idx >= m_hwm) { + return -1; + } + + while (!m_tail.compare_exchange_strong(old_value, old_value+1)) { + /* if full, stop trying and fail to advance the tail */ + if (m_tail.load() >= m_hwm) { + return -1; + } + + _mm_pause(); + } + + return old_value; + } + + size_t to_idx(size_t i, size_t head) { + return (head + i) % m_cap; + } + + static void release_head_reference(void *buff, size_t head) { + MutableBuffer<R> *buffer = (MutableBuffer<R> *) buff; + + buffer_head cur_hd, new_hd; + do { + if (buffer->m_old_head.load().head_idx == head) { + cur_hd = buffer->m_old_head; + if (cur_hd.refcnt == 0) continue; + new_hd = {cur_hd.head_idx, cur_hd.refcnt-1}; + if (buffer->m_old_head.compare_exchange_strong(cur_hd, new_hd)) { + break; + } + } else { + cur_hd = buffer->m_head; + if (cur_hd.refcnt == 0) continue; + new_hd = {cur_hd.head_idx, cur_hd.refcnt-1}; + + if (buffer->m_head.compare_exchange_strong(cur_hd, new_hd)) { + break; + } + } + _mm_pause(); + } while(true); + } + + size_t m_lwm; + size_t m_hwm; + size_t m_cap; + + alignas(64) std::atomic<size_t> m_tail; + + alignas(64) std::atomic<buffer_head> m_head; + alignas(64) std::atomic<buffer_head> m_old_head; + + Wrapped<R>* m_data; + psudb::BloomFilter<R>* m_tombstone_filter; + alignas(64) std::atomic<size_t> m_tscnt; + size_t m_old_tscnt; + + alignas(64) std::atomic<bool> m_active_head_advance; +}; + +} diff --git a/include/framework/util/Configuration.h b/include/framework/util/Configuration.h new file mode 100644 index 0000000..65ca181 --- /dev/null +++ b/include/framework/util/Configuration.h @@ -0,0 +1,49 @@ +/* + * include/framework/util/Configuration.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#pragma once + +#include <cstdlib> +#include <utility> + +namespace de { + +static thread_local size_t sampling_attempts = 0; +static thread_local size_t sampling_rejections = 0; +static thread_local size_t deletion_rejections = 0; +static thread_local size_t bounds_rejections = 0; +static thread_local size_t tombstone_rejections = 0; +static thread_local size_t buffer_rejections = 0; + +/* + * thread_local size_t various_sampling_times go here. + */ +static thread_local size_t sample_range_time = 0; +static thread_local size_t alias_time = 0; +static thread_local size_t alias_query_time = 0; +static thread_local size_t rejection_check_time = 0; +static thread_local size_t buffer_sample_time = 0; +static thread_local size_t memlevel_sample_time = 0; +static thread_local size_t disklevel_sample_time = 0; +static thread_local size_t sampling_bailouts = 0; + + +enum class LayoutPolicy { + LEVELING, + TEIRING +}; + +enum class DeletePolicy { + TOMBSTONE, + TAGGING +}; + +typedef ssize_t level_index; +typedef std::pair<level_index, level_index> ReconstructionTask; + +} diff --git a/include/query/irs.h b/include/query/irs.h new file mode 100644 index 0000000..e2d9325 --- /dev/null +++ b/include/query/irs.h @@ -0,0 +1,223 @@ +/* + * include/query/irs.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for independent range sampling. This query requires + * that the shard support get_lower_bound(key), get_upper_bound(key), + * and get_record_at(index). + */ +#pragma once + +#include "framework/QueryRequirements.h" +#include "psu-ds/Alias.h" + +namespace de { namespace irs { + +template <RecordInterface R> +struct Parms { + decltype(R::key) lower_bound; + decltype(R::key) upper_bound; + size_t sample_size; + gsl_rng *rng; +}; + + +template <RecordInterface R> +struct State { + size_t lower_bound; + size_t upper_bound; + size_t sample_size; + size_t total_weight; +}; + +template <RecordInterface R> +struct BufferState { + size_t cutoff; + std::vector<Wrapped<R>> records; + size_t sample_size; + BufferView<R> *buffer; + + BufferState(BufferView<R> *buffer) : buffer(buffer) {} +}; + +template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + + static void *get_query_state(S *shard, void *parms) { + auto res = new State<R>(); + decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound; + decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound; + + res->lower_bound = shard->get_lower_bound(lower_key); + res->upper_bound = shard->get_upper_bound(upper_key); + + if (res->lower_bound == shard->get_record_count()) { + res->total_weight = 0; + } else { + res->total_weight = res->upper_bound - res->lower_bound; + } + + res->sample_size = 0; + return res; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + auto res = new BufferState<R>(buffer); + + res->cutoff = res->buffer->get_record_count(); + res->sample_size = 0; + + if constexpr (Rejection) { + return res; + } + + auto lower_key = ((Parms<R> *) parms)->lower_bound; + auto upper_key = ((Parms<R> *) parms)->upper_bound; + + for (size_t i=0; i<res->cutoff; i++) { + if ((res->buffer->get(i)->rec.key >= lower_key) && (buffer->get(i)->rec.key <= upper_key)) { + res->records.emplace_back(*(res->buffer->get(i))); + } + } + + return res; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buffer_state) { + auto p = (Parms<R> *) query_parms; + auto bs = (buffer_state) ? (BufferState<R> *) buffer_state : nullptr; + + std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0); + size_t buffer_sz = 0; + + std::vector<size_t> weights; + if constexpr (Rejection) { + weights.push_back((bs) ? bs->cutoff : 0); + } else { + weights.push_back((bs) ? bs->records.size() : 0); + } + + size_t total_weight = 0; + for (auto &s : shard_states) { + auto state = (State<R> *) s; + total_weight += state->total_weight; + weights.push_back(state->total_weight); + } + + // if no valid records fall within the query range, just + // set all of the sample sizes to 0 and bail out. + if (total_weight == 0) { + for (size_t i=0; i<shard_states.size(); i++) { + auto state = (State<R> *) shard_states[i]; + state->sample_size = 0; + } + + return; + } + + std::vector<double> normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double) w / (double) total_weight); + } + + auto shard_alias = psudb::Alias(normalized_weights); + for (size_t i=0; i<p->sample_size; i++) { + auto idx = shard_alias.get(p->rng); + if (idx == 0) { + buffer_sz++; + } else { + shard_sample_sizes[idx - 1]++; + } + } + + if (bs) { + bs->sample_size = buffer_sz; + } + for (size_t i=0; i<shard_states.size(); i++) { + auto state = (State<R> *) shard_states[i]; + state->sample_size = shard_sample_sizes[i+1]; + } + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + auto lower_key = ((Parms<R> *) parms)->lower_bound; + auto upper_key = ((Parms<R> *) parms)->upper_bound; + auto rng = ((Parms<R> *) parms)->rng; + + auto state = (State<R> *) q_state; + auto sample_sz = state->sample_size; + + std::vector<Wrapped<R>> result_set; + + if (sample_sz == 0 || state->lower_bound == shard->get_record_count()) { + return result_set; + } + + size_t attempts = 0; + size_t range_length = state->upper_bound - state->lower_bound; + do { + attempts++; + size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0; + result_set.emplace_back(*shard->get_record_at(state->lower_bound + idx)); + } while (attempts < sample_sz); + + return result_set; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto st = (BufferState<R> *) state; + auto p = (Parms<R> *) parms; + + std::vector<Wrapped<R>> result; + result.reserve(st->sample_size); + + if constexpr (Rejection) { + for (size_t i=0; i<st->sample_size; i++) { + auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); + auto rec = st->buffer->get(idx); + + if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { + result.emplace_back(*rec); + } + } + + return result; + } + + for (size_t i=0; i<st->sample_size; i++) { + auto idx = gsl_rng_uniform_int(p->rng, st->records.size()); + result.emplace_back(st->records[idx]); + } + + return result; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<R> output; + + for (size_t i=0; i<results.size(); i++) { + for (size_t j=0; j<results[i].size(); j++) { + output.emplace_back(results[i][j].rec); + } + } + + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; +}} diff --git a/include/query/knn.h b/include/query/knn.h new file mode 100644 index 0000000..19dcf5c --- /dev/null +++ b/include/query/knn.h @@ -0,0 +1,159 @@ +/* + * include/query/knn.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for k-NN queries, designed for use with the VPTree + * shard. + * + * FIXME: no support for tombstone deletes just yet. This would require a + * query resumption mechanism, most likely. + */ +#pragma once + +#include "framework/QueryRequirements.h" +#include "psu-ds/PriorityQueue.h" + +namespace de { namespace knn { + +using psudb::PriorityQueue; + +template <NDRecordInterface R> +struct Parms { + R point; + size_t k; +}; + +template <NDRecordInterface R> +struct State { + size_t k; +}; + +template <NDRecordInterface R> +struct BufferState { + BufferView<R> *buffer; + + BufferState(BufferView<R> *buffer) + : buffer(buffer) {} +}; + +template <NDRecordInterface R, ShardInterface<R> S> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + + static void *get_query_state(S *shard, void *parms) { + return nullptr; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + return new BufferState<R>(buffer); + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { + return; + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + std::vector<Wrapped<R>> results; + Parms<R> *p = (Parms<R> *) parms; + Wrapped<R> wrec; + wrec.rec = p->point; + wrec.header = 0; + + PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(p->k, &wrec); + + shard->search(p->point, p->k, pq); + + while (pq.size() > 0) { + results.emplace_back(*pq.peek().data); + pq.pop(); + } + + return results; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + Parms<R> *p = (Parms<R> *) parms; + BufferState<R> *s = (BufferState<R> *) state; + Wrapped<R> wrec; + wrec.rec = p->point; + wrec.header = 0; + + size_t k = p->k; + + PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(k, &wrec); + for (size_t i=0; i<s->buffer->get_record_count(); i++) { + // Skip over deleted records (under tagging) + if (s->buffer->get(i)->is_deleted()) { + continue; + } + + if (pq.size() < k) { + pq.push(s->buffer->get(i)); + } else { + double head_dist = pq.peek().data->rec.calc_distance(wrec.rec); + double cur_dist = (s->buffer->get(i))->rec.calc_distance(wrec.rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(s->buffer->get(i)); + } + } + } + + std::vector<Wrapped<R>> results; + while (pq.size() > 0) { + results.emplace_back(*(pq.peek().data)); + pq.pop(); + } + + return results; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + Parms<R> *p = (Parms<R> *) parms; + R rec = p->point; + size_t k = p->k; + + PriorityQueue<R, DistCmpMax<R>> pq(k, &rec); + for (size_t i=0; i<results.size(); i++) { + for (size_t j=0; j<results[i].size(); j++) { + if (pq.size() < k) { + pq.push(&results[i][j].rec); + } else { + double head_dist = pq.peek().data->calc_distance(rec); + double cur_dist = results[i][j].rec.calc_distance(rec); + + if (cur_dist < head_dist) { + pq.pop(); + pq.push(&results[i][j].rec); + } + } + } + } + + std::vector<R> output; + while (pq.size() > 0) { + output.emplace_back(*pq.peek().data); + pq.pop(); + } + + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; + +}} diff --git a/include/query/rangecount.h b/include/query/rangecount.h new file mode 100644 index 0000000..6c57809 --- /dev/null +++ b/include/query/rangecount.h @@ -0,0 +1,165 @@ +/* + * include/query/rangecount.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for single dimensional range count queries. This query + * requires that the shard support get_lower_bound(key) and + * get_record_at(index). + */ +#pragma once + +#include "framework/QueryRequirements.h" + +namespace de { namespace rc { + +template <RecordInterface R> +struct Parms { + decltype(R::key) lower_bound; + decltype(R::key) upper_bound; +}; + +template <RecordInterface R> +struct State { + size_t start_idx; + size_t stop_idx; +}; + +template <RecordInterface R> +struct BufferState { + BufferView<R> *buffer; + + BufferState(BufferView<R> *buffer) + : buffer(buffer) {} +}; + +template <KVPInterface R, ShardInterface<R> S> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + + static void *get_query_state(S *shard, void *parms) { + auto res = new State<R>(); + auto p = (Parms<R> *) parms; + + res->start_idx = shard->get_lower_bound(p->lower_bound); + res->stop_idx = shard->get_record_count(); + + return res; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + auto res = new BufferState<R>(buffer); + + return res; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { + return; + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + std::vector<Wrapped<R>> records; + auto p = (Parms<R> *) parms; + auto s = (State<R> *) q_state; + + size_t reccnt = 0; + size_t tscnt = 0; + + Wrapped<R> res; + res.rec.key= 0; // records + res.rec.value = 0; // tombstones + records.emplace_back(res); + + /* + * if the returned index is one past the end of the + * records for the PGM, then there are not records + * in the index falling into the specified range. + */ + if (s->start_idx == shard->get_record_count()) { + return records; + } + + auto ptr = shard->get_record_at(s->start_idx); + + /* + * roll the pointer forward to the first record that is + * greater than or equal to the lower bound. + */ + while(ptr < shard->get_data() + s->stop_idx && ptr->rec.key < p->lower_bound) { + ptr++; + } + + while (ptr < shard->get_data() + s->stop_idx && ptr->rec.key <= p->upper_bound) { + if (!ptr->is_deleted()) { + if (ptr->is_tombstone()) { + records[0].rec.value++; + } else { + records[0].rec.key++; + } + } + + ptr++; + } + + return records; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto p = (Parms<R> *) parms; + auto s = (BufferState<R> *) state; + + std::vector<Wrapped<R>> records; + + Wrapped<R> res; + res.rec.key= 0; // records + res.rec.value = 0; // tombstones + records.emplace_back(res); + + for (size_t i=0; i<s->buffer->get_record_count(); i++) { + auto rec = s->buffer->get(i); + if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound + && !rec->is_deleted()) { + if (rec->is_tombstone()) { + records[0].rec.value++; + } else { + records[0].rec.key++; + } + } + } + + return records; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + + R res; + res.key = 0; + res.value = 0; + std::vector<R> output; + output.emplace_back(res); + + for (size_t i=0; i<results.size(); i++) { + output[0].key += results[i][0].rec.key; // records + output[0].value += results[i][0].rec.value; // tombstones + } + + output[0].key -= output[0].value; + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; + +}} diff --git a/include/query/rangequery.h b/include/query/rangequery.h new file mode 100644 index 0000000..24b38ec --- /dev/null +++ b/include/query/rangequery.h @@ -0,0 +1,174 @@ +/* + * include/query/rangequery.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for single dimensional range queries. This query requires + * that the shard support get_lower_bound(key) and get_record_at(index). + */ +#pragma once + +#include "framework/QueryRequirements.h" +#include "psu-ds/PriorityQueue.h" +#include "util/Cursor.h" + +namespace de { namespace rq { + +template <RecordInterface R> +struct Parms { + decltype(R::key) lower_bound; + decltype(R::key) upper_bound; +}; + +template <RecordInterface R> +struct State { + size_t start_idx; + size_t stop_idx; +}; + +template <RecordInterface R> +struct BufferState { + BufferView<R> *buffer; + + BufferState(BufferView<R> *buffer) + : buffer(buffer) {} +}; + +template <RecordInterface R, ShardInterface<R> S> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=true; + + static void *get_query_state(S *shard, void *parms) { + auto res = new State<R>(); + auto p = (Parms<R> *) parms; + + res->start_idx = shard->get_lower_bound(p->lower_bound); + res->stop_idx = shard->get_record_count(); + + return res; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + auto res = new BufferState<R>(buffer); + + return res; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { + return; + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + std::vector<Wrapped<R>> records; + auto p = (Parms<R> *) parms; + auto s = (State<R> *) q_state; + + /* + * if the returned index is one past the end of the + * records for the PGM, then there are not records + * in the index falling into the specified range. + */ + if (s->start_idx == shard->get_record_count()) { + return records; + } + + auto ptr = shard->get_record_at(s->start_idx); + + /* + * roll the pointer forward to the first record that is + * greater than or equal to the lower bound. + */ + while(ptr < shard->get_data() + s->stop_idx && ptr->rec.key < p->lower_bound) { + ptr++; + } + + while (ptr < shard->get_data() + s->stop_idx && ptr->rec.key <= p->upper_bound) { + records.emplace_back(*ptr); + ptr++; + } + + return records; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto p = (Parms<R> *) parms; + auto s = (BufferState<R> *) state; + + std::vector<Wrapped<R>> records; + for (size_t i=0; i<s->buffer->get_record_count(); i++) { + auto rec = s->buffer->get(i); + if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { + records.emplace_back(*rec); + } + } + + return records; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(results.size()); + + psudb::PriorityQueue<Wrapped<R>> pq(results.size()); + size_t total = 0; + size_t tmp_n = results.size(); + + + for (size_t i = 0; i < tmp_n; ++i) + if (results[i].size() > 0){ + auto base = results[i].data(); + cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); + assert(i == cursors.size() - 1); + total += results[i].size(); + pq.push(cursors[i].ptr, tmp_n - i - 1); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } + + if (total == 0) { + return std::vector<R>(); + } + + std::vector<R> output; + output.reserve(total); + + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : psudb::queue_record<Wrapped<R>>{nullptr, 0}; + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[tmp_n - now.version - 1]; + auto& cursor2 = cursors[tmp_n - next.version - 1]; + if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[tmp_n - now.version - 1]; + if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); + + pq.pop(); + + if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); + } + } + + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; + +}} diff --git a/include/query/wirs.h b/include/query/wirs.h new file mode 100644 index 0000000..ae82194 --- /dev/null +++ b/include/query/wirs.h @@ -0,0 +1,244 @@ +/* + * include/query/wirs.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for weighted independent range sampling. This + * class is tightly coupled with include/shard/AugBTree.h, and + * so is probably of limited general utility. + */ +#pragma once + +#include "framework/QueryRequirements.h" +#include "psu-ds/Alias.h" + +namespace de { namespace wirs { + +template <WeightedRecordInterface R> +struct Parms { + decltype(R::key) lower_bound; + decltype(R::key) upper_bound; + size_t sample_size; + gsl_rng *rng; +}; + +template <WeightedRecordInterface R> +struct State { + decltype(R::weight) total_weight; + std::vector<void*> nodes; + psudb::Alias* top_level_alias; + size_t sample_size; + + State() { + total_weight = 0; + top_level_alias = nullptr; + } + + ~State() { + if (top_level_alias) delete top_level_alias; + } +}; + +template <RecordInterface R> +struct BufferState { + size_t cutoff; + psudb::Alias* alias; + std::vector<Wrapped<R>> records; + decltype(R::weight) max_weight; + size_t sample_size; + decltype(R::weight) total_weight; + BufferView<R> *buffer; + + ~BufferState() { + delete alias; + } +}; + +template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + + static void *get_query_state(S *shard, void *parms) { + auto res = new State<R>(); + decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound; + decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound; + + std::vector<decltype(R::weight)> weights; + res->total_weight = shard->find_covering_nodes(lower_key, upper_key, res->nodes, weights); + + std::vector<double> normalized_weights; + for (auto weight : weights) { + normalized_weights.emplace_back(weight / res->total_weight); + } + + res->top_level_alias = new psudb::Alias(normalized_weights); + res->sample_size = 0; + + return res; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + BufferState<R> *state = new BufferState<R>(); + auto parameters = (Parms<R>*) parms; + + if constexpr (Rejection) { + state->cutoff = buffer->get_record_count() - 1; + state->max_weight = buffer->get_max_weight(); + state->total_weight = buffer->get_total_weight(); + state->sample_size = 0; + state->buffer = buffer; + return state; + } + + std::vector<decltype(R::weight)> weights; + + state->buffer = buffer; + decltype(R::weight) total_weight = 0; + + for (size_t i = 0; i <= buffer->get_record_count(); i++) { + auto rec = buffer->get(i); + + if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) { + weights.push_back(rec->rec.weight); + state->records.push_back(*rec); + total_weight += rec->rec.weight; + } + } + + std::vector<double> normalized_weights; + for (size_t i = 0; i < weights.size(); i++) { + normalized_weights.push_back(weights[i] / total_weight); + } + + state->total_weight = total_weight; + state->alias = new psudb::Alias(normalized_weights); + state->sample_size = 0; + + return state; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) { + auto p = (Parms<R> *) query_parms; + + std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0); + size_t buffer_sz = 0; + + std::vector<decltype(R::weight)> weights; + + decltype(R::weight) total_weight = 0; + for (auto &s : buffer_states) { + auto bs = (BufferState<R> *) s; + total_weight += bs->total_weight; + weights.push_back(bs->total_weight); + } + + for (auto &s : shard_states) { + auto state = (State<R> *) s; + total_weight += state->total_weight; + weights.push_back(state->total_weight); + } + + std::vector<double> normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double) w / (double) total_weight); + } + + auto shard_alias = psudb::Alias(normalized_weights); + for (size_t i=0; i<p->sample_size; i++) { + auto idx = shard_alias.get(p->rng); + + if (idx < buffer_states.size()) { + auto state = (BufferState<R> *) buffer_states[idx]; + state->sample_size++; + } else { + auto state = (State<R> *) shard_states[idx - buffer_states.size()]; + state->sample_size++; + } + } + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + auto lower_key = ((Parms<R> *) parms)->lower_bound; + auto upper_key = ((Parms<R> *) parms)->upper_bound; + auto rng = ((Parms<R> *) parms)->rng; + + auto state = (State<R> *) q_state; + auto sample_size = state->sample_size; + + std::vector<Wrapped<R>> result_set; + + if (sample_size == 0) { + return result_set; + } + size_t cnt = 0; + size_t attempts = 0; + + for (size_t i=0; i<sample_size; i++) { + auto rec = shard->get_weighted_sample(lower_key, upper_key, + state->nodes[state->top_level_alias->get(rng)], + rng); + if (rec) { + result_set.emplace_back(*rec); + } + } + + return result_set; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto st = (BufferState<R> *) state; + auto p = (Parms<R> *) parms; + auto buffer = st->buffer; + + std::vector<Wrapped<R>> result; + result.reserve(st->sample_size); + + if constexpr (Rejection) { + for (size_t i=0; i<st->sample_size; i++) { + auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); + auto rec = buffer->get(idx); + + auto test = gsl_rng_uniform(p->rng) * st->max_weight; + + if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { + result.emplace_back(*rec); + } + } + return result; + } + + for (size_t i=0; i<st->sample_size; i++) { + auto idx = st->alias->get(p->rng); + result.emplace_back(st->records[idx]); + } + + return result; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<R> output; + + for (size_t i=0; i<results.size(); i++) { + for (size_t j=0; j<results[i].size(); j++) { + output.emplace_back(results[i][j].rec); + } + } + + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; +}} diff --git a/include/query/wss.h b/include/query/wss.h new file mode 100644 index 0000000..8797035 --- /dev/null +++ b/include/query/wss.h @@ -0,0 +1,209 @@ +/* + * include/query/wss.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for weighted set sampling. This + * class is tightly coupled with include/shard/Alias.h, + * and so is probably of limited general utility. + */ +#pragma once + +#include "framework/QueryRequirements.h" +#include "psu-ds/Alias.h" + +namespace de { namespace wss { + +template <WeightedRecordInterface R> +struct Parms { + size_t sample_size; + gsl_rng *rng; +}; + +template <WeightedRecordInterface R> +struct State { + decltype(R::weight) total_weight; + size_t sample_size; + + State() { + total_weight = 0; + } +}; + +template <RecordInterface R> +struct BufferState { + size_t cutoff; + size_t sample_size; + psudb::Alias *alias; + decltype(R::weight) max_weight; + decltype(R::weight) total_weight; + BufferView<R> *buffer; + + ~BufferState() { + delete alias; + } +}; + +template <RecordInterface R, ShardInterface<R> S, bool Rejection=true> +class Query { +public: + constexpr static bool EARLY_ABORT=false; + constexpr static bool SKIP_DELETE_FILTER=false; + + static void *get_query_state(S *shard, void *parms) { + auto res = new State<R>(); + res->total_weight = shard->get_total_weight(); + res->sample_size = 0; + + return res; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + BufferState<R> *state = new BufferState<R>(); + auto parameters = (Parms<R>*) parms; + if constexpr (Rejection) { + state->cutoff = buffer->get_record_count() - 1; + state->max_weight = buffer->get_max_weight(); + state->total_weight = buffer->get_total_weight(); + state->buffer = buffer; + return state; + } + + std::vector<double> weights; + + double total_weight = 0.0; + state->buffer = buffer; + + for (size_t i = 0; i <= buffer->get_record_count(); i++) { + auto rec = buffer->get_data(i); + weights.push_back(rec->rec.weight); + total_weight += rec->rec.weight; + } + + for (size_t i = 0; i < weights.size(); i++) { + weights[i] = weights[i] / total_weight; + } + + state->alias = new psudb::Alias(weights); + state->total_weight = total_weight; + + return state; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) { + auto p = (Parms<R> *) query_parms; + + std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0); + size_t buffer_sz = 0; + + std::vector<decltype(R::weight)> weights; + + decltype(R::weight) total_weight = 0; + for (auto &s : buffer_states) { + auto bs = (BufferState<R> *) s; + total_weight += bs->total_weight; + weights.push_back(bs->total_weight); + } + + for (auto &s : shard_states) { + auto state = (State<R> *) s; + total_weight += state->total_weight; + weights.push_back(state->total_weight); + } + + std::vector<double> normalized_weights; + for (auto w : weights) { + normalized_weights.push_back((double) w / (double) total_weight); + } + + auto shard_alias = psudb::Alias(normalized_weights); + for (size_t i=0; i<p->sample_size; i++) { + auto idx = shard_alias.get(p->rng); + + if (idx < buffer_states.size()) { + auto state = (BufferState<R> *) buffer_states[idx]; + state->sample_size++; + } else { + auto state = (State<R> *) shard_states[idx - buffer_states.size()]; + state->sample_size++; + } + } + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + auto rng = ((Parms<R> *) parms)->rng; + + auto state = (State<R> *) q_state; + auto sample_size = state->sample_size; + + std::vector<Wrapped<R>> result_set; + + if (sample_size == 0) { + return result_set; + } + size_t attempts = 0; + do { + attempts++; + size_t idx = shard->get_weighted_sample(rng); + result_set.emplace_back(*shard->get_record_at(idx)); + } while (attempts < sample_size); + + return result_set; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto st = (BufferState<R> *) state; + auto p = (Parms<R> *) parms; + auto buffer = st->buffer; + + std::vector<Wrapped<R>> result; + result.reserve(st->sample_size); + + if constexpr (Rejection) { + for (size_t i=0; i<st->sample_size; i++) { + auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); + auto rec = buffer->get(idx); + + auto test = gsl_rng_uniform(p->rng) * st->max_weight; + + if (test <= rec->rec.weight) { + result.emplace_back(*rec); + } + } + return result; + } + + for (size_t i=0; i<st->sample_size; i++) { + auto idx = st->alias->get(p->rng); + result.emplace_back(*(buffer->get_data() + idx)); + } + + return result; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<R> output; + + for (size_t i=0; i<results.size(); i++) { + for (size_t j=0; j<results[i].size(); j++) { + output.emplace_back(results[i][j].rec); + } + } + + return output; + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; + +}} diff --git a/include/shard/Alex.h b/include/shard/Alex.h deleted file mode 100644 index 9f794dc..0000000 --- a/include/shard/Alex.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * include/shard/Alex.h - * - * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - - -#include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> - -#include "alex.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; -using psudb::Alias; - -namespace de { - -template <RecordInterface R> -struct alex_range_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; -}; - -template <RecordInterface R> -class AlexRangeQuery; - -template <RecordInterface R> -struct AlexState { - size_t start_idx; - size_t stop_idx; -}; - -template <RecordInterface R> -struct AlexBufferState { - size_t cutoff; - Alias* alias; - - ~AlexBufferState() { - delete alias; - } -}; - - -template <RecordInterface R, size_t epsilon=128> -class Alex { -private: - typedef decltype(R::key) K; - typedef decltype(R::value) V; - -public: - - // FIXME: there has to be a better way to do this - friend class AlexRangeQuery<R>; - - Alex(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - std::vector<std::pair<K, V>> temp_records; - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - K min_key = base->rec.key; - K max_key = (stop - 1)->rec.key; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - temp_records.push_back({base->rec.key, base->rec.value}); - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } - - if (m_reccnt > 0) { - m_alex = alex::Alex<K, V>(); - m_alex.set_expected_insert_frac(0); - m_alex.bulkload(temp_records.data(), temp_records.size()); - } - } - - Alex(Alex** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector<std::pair<K, V>> temp_records; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cur5sor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - temp_records.pushback({cursor.ptr->rec.key, cursor.ptr->rec.value}); - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - if (m_reccnt > 0) { - m_alex = alex::Alex<K, V>(); - m_alex.set_expected_insert_frac(0); - m_alex.bulkload(temp_records.data(), temp_records.size()); - } - } - - ~Alex() { - if (m_data) free(m_data); - if (m_bf) delete m_bf; - - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - - size_t get_memory_usage() { - return m_alex.size_in_bytes() + m_alloc_size; - } - - alex::Alex<K, V>::Iterator get_lower_bound(const K& key) const { - auto bound = m_alex.find(key); - while (bound != m_alex.end() && bound.key() < key) { - bound++; - } - - return bound; - } - -private: - Wrapped<R>* m_data; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_alloc_size; - K m_max_key; - K m_min_key; - alex::Alex<K, V> m_alex; - BloomFilter<R> *m_bf; -}; - - -template <RecordInterface R> -class AlexRangeQuery { -public: - static void *get_query_state(Alex<R> *ts, void *parms) { - auto res = new AlexState<R>(); - auto p = (alex_range_query_parms<R> *) parms; - - res->start_idx = ts->get_lower_bound(p->lower_bound); - res->stop_idx = ts->get_record_count(); - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - auto res = new AlexBufferState<R>(); - res->cutoff = buffer->get_record_count(); - - return res; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(Alex<R> *ts, void *q_state, void *parms) { - std::vector<Wrapped<R>> records; - auto p = (alex_range_query_parms<R> *) parms; - auto s = (AlexState<R> *) q_state; - - // if the returned index is one past the end of the - // records for the Alex, then there are not records - // in the index falling into the specified range. - if (s->start_idx == ts->get_record_count()) { - return records; - } - - auto ptr = ts->get_record_at(s->start_idx); - - // roll the pointer forward to the first record that is - // greater than or equal to the lower bound. - while(ptr->rec.key < p->lower_bound) { - ptr++; - } - - while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { - records.emplace_back(*ptr); - ptr++; - } - - return records; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto p = (alex_range_query_parms<R> *) parms; - auto s = (AlexBufferState<R> *) state; - - std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->cutoff; i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - records.emplace_back(*rec); - } - } - - return records; - } - - static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) { - size_t total = 0; - for (size_t i=0; i<results.size(); i++) { - total += results[i].size(); - } - - if (total == 0) { - return std::vector<R>(); - } - - std::vector<R> output; - output.reserve(total); - - for (size_t i=0; i<results.size(); i++) { - std::move(results[i].begin(), results[i].end(), std::back_inserter(output)); - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (AlexState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (AlexBufferState<R> *) state; - delete s; - } -}; - -; - -} diff --git a/include/shard/Alias.h b/include/shard/Alias.h new file mode 100644 index 0000000..9275952 --- /dev/null +++ b/include/shard/Alias.h @@ -0,0 +1,207 @@ +/* + * include/shard/Alias.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A shard shim around the psudb::Alias Walker's Alias + * structure. Designed to be used along side the WSS + * query in include/query/wss.h + * + * TODO: The code in this file is very poorly commented. + */ +#pragma once + +#include <vector> + +#include "framework/ShardRequirements.h" + +#include "psu-ds/Alias.h" +#include "psu-ds/BloomFilter.h" +#include "util/bf_config.h" +#include "util/SortedMerge.h" + +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::byte; + +namespace de { + +static thread_local size_t wss_cancelations = 0; + +template <WeightedRecordInterface R> +class Alias { +private: + typedef decltype(R::key) K; + typedef decltype(R::value) V; + typedef decltype(R::weight) W; + +public: + Alias(BufferView<R> buffer) + : m_data(nullptr) + , m_alias(nullptr) + , m_total_weight(0) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) { + + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + std::vector<W> weights; + for (size_t i=0; i<m_reccnt; i++) { + weights.emplace_back(m_data[i].rec.weight); + m_total_weight += m_data[i].rec.weight; + } + + build_alias_structure(weights); + } + } + + Alias(std::vector<Alias*> &shards) + : m_data(nullptr) + , m_alias(nullptr) + , m_total_weight(0) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_bf(nullptr) { + + size_t attemp_reccnt = 0; + size_t tombstone_count = 0; + auto cursors = build_cursor_vec<R, Alias>(shards, &attemp_reccnt, &tombstone_count); + + m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + std::vector<W> weights; + for (size_t i=0; i<m_reccnt; i++) { + weights.emplace_back(m_data[i].rec.weight); + m_total_weight += m_data[i].rec.weight; + } + + build_alias_structure(weights); + } + } + + ~Alias() { + free(m_data); + delete m_alias; + delete m_bf; + } + + Wrapped<R> *point_lookup(const R &rec, bool filter=false) { + if (filter && !m_bf->lookup(rec)) { + return nullptr; + } + + size_t idx = get_lower_bound(rec.key); + if (idx >= m_reccnt) { + return nullptr; + } + + while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; + + if (m_data[idx].rec == rec) { + return m_data + idx; + } + + return nullptr; + } + + Wrapped<R>* get_data() const { + return m_data; + } + + size_t get_record_count() const { + return m_reccnt; + } + + size_t get_tombstone_count() const { + return m_tombstone_cnt; + } + + const Wrapped<R>* get_record_at(size_t idx) const { + if (idx >= m_reccnt) return nullptr; + return m_data + idx; + } + + + size_t get_memory_usage() { + return m_alloc_size; + } + + size_t get_aux_memory_usage() { + return (m_bf) ? m_bf->memory_usage() : 0; + } + + W get_total_weight() { + return m_total_weight; + } + + size_t get_weighted_sample(gsl_rng *rng) const { + return m_alias->get(rng); + } + + size_t get_lower_bound(const K& key) const { + size_t min = 0; + size_t max = m_reccnt - 1; + + const char * record_key; + while (min < max) { + size_t mid = (min + max) / 2; + + if (key > m_data[mid].rec.key) { + min = mid + 1; + } else { + max = mid; + } + } + + return min; + } + +private: + + void build_alias_structure(std::vector<W> &weights) { + + // normalize the weights vector + std::vector<double> norm_weights(weights.size()); + + for (size_t i=0; i<weights.size(); i++) { + norm_weights[i] = (double) weights[i] / (double) m_total_weight; + } + + // build the alias structure + m_alias = new psudb::Alias(norm_weights); + } + + Wrapped<R>* m_data; + psudb::Alias *m_alias; + W m_total_weight; + size_t m_reccnt; + size_t m_tombstone_cnt; + size_t m_alloc_size; + BloomFilter<R> *m_bf; +}; +} diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h new file mode 100644 index 0000000..54931bd --- /dev/null +++ b/include/shard/AugBTree.h @@ -0,0 +1,311 @@ +/* + * include/shard/AugBTree.h + * + * Copyright (C) 2023 Dong Xie <dongx@psu.edu> + * Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A shard shim around the alias augmented B-tree. Designed to be + * used along side the WIRS query in include/query/wirs.h, but + * also supports the necessary methods for other common query + * types. + * + * TODO: The code in this file is very poorly commented. + */ +#pragma once + + +#include <vector> +#include <cassert> + +#include "framework/ShardRequirements.h" + +#include "psu-ds/Alias.h" +#include "psu-ds/BloomFilter.h" +#include "util/bf_config.h" +#include "util/SortedMerge.h" + +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::Alias; +using psudb::byte; + +namespace de { + +template <WeightedRecordInterface R> +struct AugBTreeNode { + struct AugBTreeNode<R> *left, *right; + decltype(R::key) low, high; + decltype(R::weight) weight; + Alias* alias; +}; + +template <WeightedRecordInterface R> +class AugBTree { +private: + typedef decltype(R::key) K; + typedef decltype(R::value) V; + typedef decltype(R::weight) W; + +public: + AugBTree(BufferView<R> buffer) + : m_data(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_group_size(0) + , m_alloc_size(0) + , m_node_cnt(0) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + { + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_wirs_structure(); + } + } + + AugBTree(std::vector<AugBTree*> shards) + : m_data(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_group_size(0) + , m_alloc_size(0) + , m_node_cnt(0) + , m_bf(nullptr) + { + size_t attemp_reccnt = 0; + size_t tombstone_count = 0; + auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count); + + m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_wirs_structure(); + } + } + + ~AugBTree() { + free(m_data); + for (size_t i=0; i<m_alias.size(); i++) { + delete m_alias[i]; + } + + delete m_bf; + free_tree(m_root); + } + + Wrapped<R> *point_lookup(const R &rec, bool filter=false) { + if (filter && !m_bf->lookup(rec)) { + return nullptr; + } + + size_t idx = get_lower_bound(rec.key); + if (idx >= m_reccnt) { + return nullptr; + } + + while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx; + + if (m_data[idx].rec == rec) { + return m_data + idx; + } + + return nullptr; + } + + Wrapped<R>* get_data() const { + return m_data; + } + + size_t get_record_count() const { + return m_reccnt; + } + + size_t get_tombstone_count() const { + return m_tombstone_cnt; + } + + const Wrapped<R>* get_record_at(size_t idx) const { + if (idx >= m_reccnt) return nullptr; + return m_data + idx; + } + + size_t get_memory_usage() { + return m_alloc_size + m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>); + } + + size_t get_aux_memory_usage() { + return (m_bf) ? m_bf->memory_usage() : 0; + } + + size_t get_lower_bound(const K& key) const { + size_t min = 0; + size_t max = m_reccnt - 1; + + const char * record_key; + while (min < max) { + size_t mid = (min + max) / 2; + + if (key > m_data[mid].rec.key) { + min = mid + 1; + } else { + max = mid; + } + } + + return min; + } + + W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) { + W total_weight = 0; + + /* Simulate a stack to unfold recursion. */ + struct AugBTreeNode<R>* st[64] = {0}; + st[0] = m_root; + size_t top = 1; + while(top > 0) { + auto now = st[--top]; + if (covered_by(now, lower_key, upper_key) || + (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) { + nodes.emplace_back(now); + weights.emplace_back(now->weight); + total_weight += now->weight; + } else { + if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left; + if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right; + } + } + + + return total_weight; + } + + Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) { + /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */ + + /* first level */ + auto node = (AugBTreeNode<R>*) internal_node; + + /* second level */ + auto fat_point = node->low + node->alias->get(rng); + + /* third level */ + size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng); + auto record = m_data + rec_offset; + + /* bounds rejection */ + if (lower_key > record->rec.key || upper_key < record->rec.key) { + return nullptr; + } + + return record; + } + +private: + + bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { + auto low_index = node->low * m_group_size; + auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); + return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key; + } + + bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) { + auto low_index = node->low * m_group_size; + auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); + return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key; + } + + void build_wirs_structure() { + m_group_size = std::ceil(std::log(m_reccnt)); + size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size); + + // Fat point construction + low level alias.... + double sum_weight = 0.0; + std::vector<W> weights; + std::vector<double> group_norm_weight; + size_t i = 0; + size_t group_no = 0; + while (i < m_reccnt) { + double group_weight = 0.0; + group_norm_weight.clear(); + for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) { + auto w = m_data[i].rec.weight; + group_norm_weight.emplace_back(w); + group_weight += w; + sum_weight += w; + } + + for (auto& w: group_norm_weight) + if (group_weight) w /= group_weight; + else w = 1.0 / group_norm_weight.size(); + m_alias.emplace_back(new Alias(group_norm_weight)); + + + weights.emplace_back(group_weight); + } + + assert(weights.size() == n_groups); + + m_root = construct_AugBTreeNode(weights, 0, n_groups-1); + } + + struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) { + if (low == high) { + return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})}; + } else if (low > high) return nullptr; + + std::vector<double> node_weights; + W sum = 0; + for (size_t i = low; i < high; ++i) { + node_weights.emplace_back(weights[i]); + sum += weights[i]; + } + + for (auto& w: node_weights) + if (sum) w /= sum; + else w = 1.0 / node_weights.size(); + + m_node_cnt += 1; + size_t mid = (low + high) / 2; + return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid), + construct_AugBTreeNode(weights, mid + 1, high), + low, high, sum, new Alias(node_weights)}; + } + + void free_tree(struct AugBTreeNode<R>* node) { + if (node) { + delete node->alias; + free_tree(node->left); + free_tree(node->right); + delete node; + } + } + + Wrapped<R>* m_data; + std::vector<Alias *> m_alias; + AugBTreeNode<R>* m_root; + size_t m_reccnt; + size_t m_tombstone_cnt; + size_t m_group_size; + size_t m_alloc_size; + size_t m_node_cnt; + BloomFilter<R> *m_bf; +}; +} diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h new file mode 100644 index 0000000..3763271 --- /dev/null +++ b/include/shard/ISAMTree.h @@ -0,0 +1,277 @@ +/* + * include/shard/ISAMTree.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A shard shim around an in-memory ISAM tree. + * + * TODO: The code in this file is very poorly commented. + */ +#pragma once + +#include <vector> +#include <cassert> + +#include "framework/ShardRequirements.h" + +#include "util/bf_config.h" +#include "psu-ds/BloomFilter.h" +#include "util/SortedMerge.h" + +using psudb::CACHELINE_SIZE; +using psudb::BloomFilter; +using psudb::PriorityQueue; +using psudb::queue_record; +using psudb::byte; + +namespace de { + +template <KVPInterface R> +class ISAMTree { +private: + +typedef decltype(R::key) K; +typedef decltype(R::value) V; + +constexpr static size_t NODE_SZ = 256; +constexpr static size_t INTERNAL_FANOUT = NODE_SZ / (sizeof(K) + sizeof(byte*)); + +struct InternalNode { + K keys[INTERNAL_FANOUT]; + byte* child[INTERNAL_FANOUT]; +}; + +static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match"); + +constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R); + + +public: + ISAMTree(BufferView<R> buffer) + : m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + , m_isam_nodes(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_internal_node_cnt(0) + , m_deleted_cnt(0) + , m_alloc_size(0) + , m_data(nullptr) + { + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + /* + * without this, gcc seems to hoist the building of the array + * _above_ its allocation under -O3, resulting in memfaults. + */ + asm volatile ("" ::: "memory"); + + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_internal_levels(); + } + } + + ISAMTree(std::vector<ISAMTree*> &shards) + : m_bf(nullptr) + , m_isam_nodes(nullptr) + , m_root(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_internal_node_cnt(0) + , m_deleted_cnt(0) + , m_alloc_size(0) + , m_data(nullptr) + { + size_t attemp_reccnt = 0; + size_t tombstone_count = 0; + auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count); + + m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; + + if (m_reccnt > 0) { + build_internal_levels(); + } + } + + ~ISAMTree() { + free(m_data); + free(m_isam_nodes); + delete m_bf; + } + + Wrapped<R> *point_lookup(const R &rec, bool filter=false) { + if (filter && !m_bf->lookup(rec)) { + return nullptr; + } + + size_t idx = get_lower_bound(rec.key); + if (idx >= m_reccnt) { + return nullptr; + } + + while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; + + if (m_data[idx].rec == rec) { + return m_data + idx; + } + + return nullptr; + } + + Wrapped<R>* get_data() const { + return m_data; + } + + size_t get_record_count() const { + return m_reccnt; + } + + size_t get_tombstone_count() const { + return m_tombstone_cnt; + } + + + size_t get_memory_usage() { + return m_alloc_size + m_internal_node_cnt * NODE_SZ; + } + + size_t get_aux_memory_usage() { + return (m_bf) ? m_bf->memory_usage() : 0; + } + + /* SortedShardInterface methods */ + size_t get_lower_bound(const K& key) const { + const InternalNode* now = m_root; + while (!is_leaf(reinterpret_cast<const byte*>(now))) { + const InternalNode* next = nullptr; + for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { + if (now->child[i + 1] == nullptr || key <= now->keys[i]) { + next = reinterpret_cast<InternalNode*>(now->child[i]); + break; + } + } + + now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]); + } + + const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); + while (pos < m_data + m_reccnt && pos->rec.key < key) pos++; + + return pos - m_data; + } + + size_t get_upper_bound(const K& key) const { + const InternalNode* now = m_root; + while (!is_leaf(reinterpret_cast<const byte*>(now))) { + const InternalNode* next = nullptr; + for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) { + if (now->child[i + 1] == nullptr || key < now->keys[i]) { + next = reinterpret_cast<InternalNode*>(now->child[i]); + break; + } + } + + now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]); + } + + const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); + while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++; + + return pos - m_data; + } + + const Wrapped<R>* get_record_at(size_t idx) const { + return (idx < m_reccnt) ? m_data + idx : nullptr; + } + +private: + void build_internal_levels() { + size_t n_leaf_nodes = m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0); + + size_t level_node_cnt = n_leaf_nodes; + size_t node_cnt = 0; + do { + level_node_cnt = level_node_cnt / INTERNAL_FANOUT + (level_node_cnt % INTERNAL_FANOUT != 0); + node_cnt += level_node_cnt; + } while (level_node_cnt > 1); + + m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, (byte**) &m_isam_nodes); + m_internal_node_cnt = node_cnt; + + InternalNode* current_node = m_isam_nodes; + + const Wrapped<R>* leaf_base = m_data; + const Wrapped<R>* leaf_stop = m_data + m_reccnt; + while (leaf_base < leaf_stop) { + size_t fanout = 0; + for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { + auto rec_ptr = leaf_base + LEAF_FANOUT * i; + if (rec_ptr >= leaf_stop) break; + const Wrapped<R>* sep_key = std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1); + current_node->keys[i] = sep_key->rec.key; + current_node->child[i] = (byte*)rec_ptr; + ++fanout; + } + current_node++; + leaf_base += fanout * LEAF_FANOUT; + } + + auto level_start = m_isam_nodes; + auto level_stop = current_node; + auto current_level_node_cnt = level_stop - level_start; + while (current_level_node_cnt > 1) { + auto now = level_start; + while (now < level_stop) { + size_t child_cnt = 0; + for (size_t i = 0; i < INTERNAL_FANOUT; ++i) { + auto node_ptr = now + i; + ++child_cnt; + if (node_ptr >= level_stop) break; + current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1]; + current_node->child[i] = (byte*)node_ptr; + } + now += child_cnt; + current_node++; + } + level_start = level_stop; + level_stop = current_node; + current_level_node_cnt = level_stop - level_start; + } + + assert(current_level_node_cnt == 1); + m_root = level_start; + } + + bool is_leaf(const byte* ptr) const { + return ptr >= (const byte*)m_data && ptr < (const byte*)(m_data + m_reccnt); + } + + psudb::BloomFilter<R> *m_bf; + InternalNode* m_isam_nodes; + InternalNode* m_root; + size_t m_reccnt; + size_t m_tombstone_cnt; + size_t m_internal_node_cnt; + size_t m_deleted_cnt; + size_t m_alloc_size; + + Wrapped<R>* m_data; +}; +} diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h deleted file mode 100644 index a220792..0000000 --- a/include/shard/MemISAM.h +++ /dev/null @@ -1,697 +0,0 @@ -/* - * include/shard/MemISAM.h - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include <vector> -#include <cassert> -#include <queue> -#include <memory> - -#include "framework/MutableBuffer.h" -#include "util/bf_config.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-util/timer.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; -using psudb::Alias; - -namespace de { - -thread_local size_t mrun_cancelations = 0; - -template <RecordInterface R> -struct irs_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; - size_t sample_size; - gsl_rng *rng; -}; - -template <RecordInterface R, bool Rejection> -class IRSQuery; - -template <RecordInterface R> -struct IRSState { - size_t lower_bound; - size_t upper_bound; - size_t sample_size; - size_t total_weight; -}; - -template <RecordInterface R> -struct IRSBufferState { - size_t cutoff; - std::vector<Wrapped<R>> records; - size_t sample_size; -}; - -template <RecordInterface R> -struct ISAMRangeQueryParms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; -}; - -template <RecordInterface R> -class ISAMRangeQuery; - -template <RecordInterface R> -struct ISAMRangeQueryState { - size_t start_idx; - size_t stop_idx; -}; - -template <RecordInterface R> -struct RangeQueryBufferState { - size_t cutoff; -}; - -template <RecordInterface R> -class MemISAM { -private: - friend class IRSQuery<R, true>; - friend class IRSQuery<R, false>; - friend class ISAMRangeQuery<R>; - -typedef decltype(R::key) K; -typedef decltype(R::value) V; - -constexpr static size_t inmem_isam_node_size = 256; -constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*)); - -struct InMemISAMNode { - K keys[inmem_isam_fanout]; - char* child[inmem_isam_fanout]; -}; - -constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R); -constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout; - -static_assert(sizeof(InMemISAMNode) == inmem_isam_node_size, "node size does not match"); - -public: - MemISAM(MutableBuffer<R>* buffer) - :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) { - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - TIMER_INIT(); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - TIMER_START(); - std::sort(base, stop, std::less<Wrapped<R>>()); - TIMER_STOP(); - auto sort_time = TIMER_RESULT(); - - TIMER_START(); - while (base < stop) { - if (!base->is_tombstone() && (base + 1 < stop) - && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - mrun_cancelations++; - continue; - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - if (m_bf && base->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(base->rec); - } - - base++; - } - TIMER_STOP(); - auto copy_time = TIMER_RESULT(); - - TIMER_START(); - if (m_reccnt > 0) { - build_internal_levels(); - } - TIMER_STOP(); - auto level_time = TIMER_RESULT(); - } - - MemISAM(MemISAM** runs, size_t len) - : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (runs[i]) { - auto base = runs[i]->get_data(); - cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()}); - attemp_reccnt += runs[i]->get_record_count(); - tombstone_count += runs[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - size_t offset = 0; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - if (cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); - } - } - - if (m_reccnt > 0) { - build_internal_levels(); - } - } - - ~MemISAM() { - if (m_data) free(m_data); - if (m_isam_nodes) free(m_isam_nodes); - if (m_bf) delete m_bf; - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - return (idx < m_reccnt) ? m_data + idx : nullptr; - } - - size_t get_memory_usage() { - return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size; - } - -private: - size_t get_lower_bound(const K& key) const { - const InMemISAMNode* now = m_root; - while (!is_leaf(reinterpret_cast<const char*>(now))) { - const InMemISAMNode* next = nullptr; - for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) { - if (now->child[i + 1] == nullptr || key <= now->keys[i]) { - next = reinterpret_cast<InMemISAMNode*>(now->child[i]); - break; - } - } - - now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]); - } - - const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); - while (pos < m_data + m_reccnt && pos->rec.key < key) pos++; - - return pos - m_data; - } - - size_t get_upper_bound(const K& key) const { - const InMemISAMNode* now = m_root; - while (!is_leaf(reinterpret_cast<const char*>(now))) { - const InMemISAMNode* next = nullptr; - for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) { - if (now->child[i + 1] == nullptr || key < now->keys[i]) { - next = reinterpret_cast<InMemISAMNode*>(now->child[i]); - break; - } - } - - now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]); - } - - const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now); - while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++; - - return pos - m_data; - } - - void build_internal_levels() { - size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0); - size_t level_node_cnt = n_leaf_nodes; - size_t node_cnt = 0; - do { - level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0); - node_cnt += level_node_cnt; - } while (level_node_cnt > 1); - - m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - - m_isam_nodes = (InMemISAMNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - m_internal_node_cnt = node_cnt; - memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size); - - InMemISAMNode* current_node = m_isam_nodes; - - const Wrapped<R>* leaf_base = m_data; - const Wrapped<R>* leaf_stop = m_data + m_reccnt; - while (leaf_base < leaf_stop) { - size_t fanout = 0; - for (size_t i = 0; i < inmem_isam_fanout; ++i) { - auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i; - if (rec_ptr >= leaf_stop) break; - const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1); - current_node->keys[i] = sep_key->rec.key; - current_node->child[i] = (char*)rec_ptr; - ++fanout; - } - current_node++; - leaf_base += fanout * inmem_isam_leaf_fanout; - } - - auto level_start = m_isam_nodes; - auto level_stop = current_node; - auto current_level_node_cnt = level_stop - level_start; - while (current_level_node_cnt > 1) { - auto now = level_start; - while (now < level_stop) { - size_t child_cnt = 0; - for (size_t i = 0; i < inmem_isam_fanout; ++i) { - auto node_ptr = now + i; - ++child_cnt; - if (node_ptr >= level_stop) break; - current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1]; - current_node->child[i] = (char*)node_ptr; - } - now += child_cnt; - current_node++; - } - level_start = level_stop; - level_stop = current_node; - current_level_node_cnt = level_stop - level_start; - } - - assert(current_level_node_cnt == 1); - m_root = level_start; - } - - bool is_leaf(const char* ptr) const { - return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt); - } - - // Members: sorted data, internal ISAM levels, reccnt; - Wrapped<R>* m_data; - psudb::BloomFilter<R> *m_bf; - InMemISAMNode* m_isam_nodes; - InMemISAMNode* m_root; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_internal_node_cnt; - size_t m_deleted_cnt; - size_t m_alloc_size; -}; - -template <RecordInterface R, bool Rejection=true> -class IRSQuery { -public: - - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(MemISAM<R> *isam, void *parms) { - auto res = new IRSState<R>(); - decltype(R::key) lower_key = ((irs_query_parms<R> *) parms)->lower_bound; - decltype(R::key) upper_key = ((irs_query_parms<R> *) parms)->upper_bound; - - res->lower_bound = isam->get_lower_bound(lower_key); - res->upper_bound = isam->get_upper_bound(upper_key); - - if (res->lower_bound == isam->get_record_count()) { - res->total_weight = 0; - } else { - res->total_weight = res->upper_bound - res->lower_bound; - } - - res->sample_size = 0; - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - auto res = new IRSBufferState<R>(); - - res->cutoff = buffer->get_record_count(); - res->sample_size = 0; - - if constexpr (Rejection) { - return res; - } - - auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound; - auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound; - - for (size_t i=0; i<res->cutoff; i++) { - if (((buffer->get_data() + i)->rec.key >= lower_key) && ((buffer->get_data() + i)->rec.key <= upper_key)) { - res->records.emplace_back(*(buffer->get_data() + i)); - } - } - - return res; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - auto p = (irs_query_parms<R> *) query_parms; - auto bs = (buff_state) ? (IRSBufferState<R> *) buff_state : nullptr; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0); - size_t buffer_sz = 0; - - std::vector<size_t> weights; - if constexpr (Rejection) { - weights.push_back((bs) ? bs->cutoff : 0); - } else { - weights.push_back((bs) ? bs->records.size() : 0); - } - - size_t total_weight = 0; - for (auto &s : shard_states) { - auto state = (IRSState<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - // if no valid records fall within the query range, just - // set all of the sample sizes to 0 and bail out. - if (total_weight == 0) { - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (IRSState<R> *) shard_states[i]; - state->sample_size = 0; - } - - return; - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = Alias(normalized_weights); - for (size_t i=0; i<p->sample_size; i++) { - auto idx = shard_alias.get(p->rng); - if (idx == 0) { - buffer_sz++; - } else { - shard_sample_sizes[idx - 1]++; - } - } - - if (bs) { - bs->sample_size = buffer_sz; - } - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (IRSState<R> *) shard_states[i]; - state->sample_size = shard_sample_sizes[i+1]; - } - } - - static std::vector<Wrapped<R>> query(MemISAM<R> *isam, void *q_state, void *parms) { - auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound; - auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound; - auto rng = ((irs_query_parms<R> *) parms)->rng; - - auto state = (IRSState<R> *) q_state; - auto sample_sz = state->sample_size; - - std::vector<Wrapped<R>> result_set; - - if (sample_sz == 0 || state->lower_bound == isam->get_record_count()) { - return result_set; - } - - size_t attempts = 0; - size_t range_length = state->upper_bound - state->lower_bound; - do { - attempts++; - size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0; - result_set.emplace_back(*isam->get_record_at(state->lower_bound + idx)); - } while (attempts < sample_sz); - - return result_set; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto st = (IRSBufferState<R> *) state; - auto p = (irs_query_parms<R> *) parms; - - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); - - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get_data() + idx; - - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - result.emplace_back(*rec); - } - } - - return result; - } - - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->records.size()); - result.emplace_back(st->records[idx]); - } - - return result; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - std::vector<R> output; - - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (IRSState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (IRSBufferState<R> *) state; - delete s; - } -}; - - -template <RecordInterface R> -class ISAMRangeQuery { -public: - - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(MemISAM<R> *ts, void *parms) { - auto res = new ISAMRangeQueryState<R>(); - auto p = (ISAMRangeQueryParms<R> *) parms; - - res->start_idx = ts->get_lower_bound(p->lower_bound); - res->stop_idx = ts->get_record_count(); - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - auto res = new RangeQueryBufferState<R>(); - res->cutoff = buffer->get_record_count(); - - return res; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(MemISAM<R> *ts, void *q_state, void *parms) { - std::vector<Wrapped<R>> records; - auto p = (ISAMRangeQueryParms<R> *) parms; - auto s = (ISAMRangeQueryState<R> *) q_state; - - // if the returned index is one past the end of the - // records for the PGM, then there are not records - // in the index falling into the specified range. - if (s->start_idx == ts->get_record_count()) { - return records; - } - - auto ptr = ts->get_record_at(s->start_idx); - - // roll the pointer forward to the first record that is - // greater than or equal to the lower bound. - while(ptr->rec.key < p->lower_bound) { - ptr++; - } - - while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { - records.emplace_back(*ptr); - ptr++; - } - - return records; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto p = (ISAMRangeQueryParms<R> *) parms; - auto s = (RangeQueryBufferState<R> *) state; - - std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->cutoff; i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - records.emplace_back(*rec); - } - } - - return records; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(results.size()); - - PriorityQueue<Wrapped<R>> pq(results.size()); - size_t total = 0; - size_t tmp_n = results.size(); - - - for (size_t i = 0; i < tmp_n; ++i) - if (results[i].size() > 0){ - auto base = results[i].data(); - cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); - assert(i == cursors.size() - 1); - total += results[i].size(); - pq.push(cursors[i].ptr, tmp_n - i - 1); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - - if (total == 0) { - return std::vector<R>(); - } - - std::vector<R> output; - output.reserve(total); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[tmp_n - now.version - 1]; - auto& cursor2 = cursors[tmp_n - next.version - 1]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[tmp_n - now.version - 1]; - if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (ISAMRangeQueryState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (RangeQueryBufferState<R> *) state; - delete s; - } -}; - - - -} diff --git a/include/shard/PGM.h b/include/shard/PGM.h index 2cd153e..e2752ef 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -2,205 +2,103 @@ * include/shard/PGM.h * * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * + * A shard shim around the static version of the PGM learned + * index. + * + * TODO: The code in this file is very poorly commented. */ #pragma once #include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> + +#include "framework/ShardRequirements.h" #include "pgm/pgm_index.hpp" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/BloomFilter.h" +#include "util/SortedMerge.h" #include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::Alias; +using psudb::byte; namespace de { -template <RecordInterface R> -struct pgm_range_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; -}; - -template <RecordInterface R> -struct PGMPointLookupParms { - decltype(R::key) target_key; -}; - -template <RecordInterface R> -class PGMRangeQuery; - -template <RecordInterface R> -class PGMPointLookup; - -template <RecordInterface R> -struct PGMState { - size_t start_idx; - size_t stop_idx; -}; - -template <RecordInterface R> -struct PGMBufferState { - size_t cutoff; -}; - template <RecordInterface R, size_t epsilon=128> class PGM { private: typedef decltype(R::key) K; typedef decltype(R::value) V; - public: + PGM(BufferView<R> buffer) + : m_data(nullptr) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) { + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - // FIXME: there has to be a better way to do this - friend class PGMRangeQuery<R>; - friend class PGMPointLookup<R>; - - PGM(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - std::vector<K> keys; - - //m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - K min_key = base->rec.key; - K max_key = (stop - 1)->rec.key; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; + if (m_reccnt > 0) { + std::vector<K> keys; + for (size_t i=0; i<m_reccnt; i++) { + keys.emplace_back(m_data[i].rec.key); } - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - keys.emplace_back(base->rec.key); - - /* - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - }*/ - - base++; - } - - if (m_reccnt > 0) { m_pgm = pgm::PGMIndex<K, epsilon>(keys); } } - PGM(PGM** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - + PGM(std::vector<PGM*> shards) + : m_data(nullptr) + , m_bf(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) { + size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } + auto cursors = build_cursor_vec<R, PGM>(shards, &attemp_reccnt, &tombstone_count); - //m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector<K> keys; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - keys.emplace_back(cursor.ptr->rec.key); - /*if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - }*/ - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } + m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + std::vector<K> keys; + for (size_t i=0; i<m_reccnt; i++) { + keys.emplace_back(m_data[i].rec.key); + } + m_pgm = pgm::PGMIndex<K, epsilon>(keys); } } ~PGM() { - if (m_data) free(m_data); - //if (m_bf) delete m_bf; - + free(m_data); + delete m_bf; } Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - //if (filter && !m_bf->lookup(rec)) { - // return nullptr; - //} - size_t idx = get_lower_bound(rec.key); if (idx >= m_reccnt) { return nullptr; @@ -237,6 +135,10 @@ public: return m_pgm.size_in_bytes() + m_alloc_size; } + size_t get_aux_memory_usage() { + return (m_bf) ? m_bf->memory_usage() : 0; + } + size_t get_lower_bound(const K& key) const { auto bound = m_pgm.search(key); size_t idx = bound.lo; @@ -276,225 +178,13 @@ public: private: Wrapped<R>* m_data; + BloomFilter<R> *m_bf; size_t m_reccnt; size_t m_tombstone_cnt; size_t m_alloc_size; K m_max_key; K m_min_key; pgm::PGMIndex<K, epsilon> m_pgm; - //BloomFilter<R> *m_bf; -}; -template <RecordInterface R> -class PGMPointLookup { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(PGM<R> *ts, void *parms) { - return nullptr; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - return nullptr; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) { - std::vector<Wrapped<R>> records; - auto p = (PGMPointLookupParms<R> *) parms; - auto s = (PGMState<R> *) q_state; - - size_t idx = ts->get_lower_bound(p->target_key); - if (ts->get_record_at(idx)->rec.key == p->target_key) { - records.emplace_back(*ts->get_record_at(idx)); - } - - return records; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto p = (PGMPointLookupParms<R> *) parms; - auto s = (PGMBufferState<R> *) state; - - std::vector<Wrapped<R>> records; - for (size_t i=0; i<buffer->get_record_count(); i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key == p->target_key) { - records.emplace_back(*rec); - return records; - } - } - - return records; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - std::vector<R> output; - for (size_t i=0 ;i<results.size(); i++) { - if (results[i].size() > 0) { - output.emplace_back(results[i][0].rec); - return output; - } - } - - return output; - } - - static void delete_query_state(void *state) { - } - - static void delete_buffer_query_state(void *state) { - } }; - - -template <RecordInterface R> -class PGMRangeQuery { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(PGM<R> *ts, void *parms) { - auto res = new PGMState<R>(); - auto p = (pgm_range_query_parms<R> *) parms; - - res->start_idx = ts->get_lower_bound(p->lower_bound); - res->stop_idx = ts->get_record_count(); - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - auto res = new PGMBufferState<R>(); - res->cutoff = buffer->get_record_count(); - - return res; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) { - size_t tot = 0; - //std::vector<Wrapped<R>> records; - auto p = (pgm_range_query_parms<R> *) parms; - auto s = (PGMState<R> *) q_state; - - // if the returned index is one past the end of the - // records for the PGM, then there are not records - // in the index falling into the specified range. - if (s->start_idx == ts->get_record_count()) { - return {}; - } - - auto ptr = ts->get_record_at(s->start_idx); - - // roll the pointer forward to the first record that is - // greater than or equal to the lower bound. - while(ptr->rec.key < p->lower_bound) { - ptr++; - } - - while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { - if (ptr->is_tombstone()) --tot; - else if (!ptr->is_deleted()) ++tot; - //records.emplace_back(*ptr); - ptr++; - } - - return {Wrapped<R>{0, {tot, 0}}}; - //return records; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - size_t tot = 0; - auto p = (pgm_range_query_parms<R> *) parms; - auto s = (PGMBufferState<R> *) state; - - //std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->cutoff; i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - if (rec->is_tombstone()) --tot; - else if (!rec->is_deleted()) ++tot; - //records.emplace_back(*rec); - } - } - - return {Wrapped<R>{0, {tot, 0}}}; - //return records; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - /*std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(results.size()); - - PriorityQueue<Wrapped<R>> pq(results.size()); - size_t total = 0; - size_t tmp_n = results.size(); - - - for (size_t i = 0; i < tmp_n; ++i) - if (results[i].size() > 0){ - auto base = results[i].data(); - cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); - assert(i == cursors.size() - 1); - total += results[i].size(); - pq.push(cursors[i].ptr, tmp_n - i - 1); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - - if (total == 0) { - return std::vector<R>(); - } - - std::vector<R> output; - output.reserve(total); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[tmp_n - now.version - 1]; - auto& cursor2 = cursors[tmp_n - next.version - 1]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[tmp_n - now.version - 1]; - if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - }*/ - - size_t tot = 0; - for (auto& result: results) - if (result.size() > 0) tot += result[0].rec.key; - - return {{tot, 0}}; - } - - static void delete_query_state(void *state) { - auto s = (PGMState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (PGMBufferState<R> *) state; - delete s; - } -}; - -; - } diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index 69fcfbc..2a432e8 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -3,213 +3,107 @@ * * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * + * A shard shim around the TrieSpline learned index. + * + * TODO: The code in this file is very poorly commented. */ #pragma once #include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> +#include "framework/ShardRequirements.h" #include "ts/builder.h" -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" #include "psu-ds/BloomFilter.h" #include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" +#include "util/SortedMerge.h" using psudb::CACHELINE_SIZE; using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::Alias; +using psudb::byte; namespace de { -template <RecordInterface R> -struct ts_range_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; -}; - -template <RecordInterface R> -class TrieSplineRangeQuery; - -template <RecordInterface R> -struct TrieSplineState { - size_t start_idx; - size_t stop_idx; -}; - -template <RecordInterface R> -struct TrieSplineBufferState { - size_t cutoff; - Alias* alias; - - ~TrieSplineBufferState() { - delete alias; - } - -}; - -template <RecordInterface R, size_t E=1024> +template <KVPInterface R, size_t E=1024> class TrieSpline { private: typedef decltype(R::key) K; typedef decltype(R::value) V; public: + TrieSpline(BufferView<R> buffer) + : m_data(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_max_key(0) + , m_min_key(0) + , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) + { + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; - // FIXME: there has to be a better way to do this - friend class TrieSplineRangeQuery<R>; - - TrieSpline(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - K min_key = base->rec.key; - K max_key = (stop - 1)->rec.key; - - auto bldr = ts::Builder<K>(min_key, max_key, E); - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } + if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; - if (m_reccnt == 0) { - m_max_key = m_min_key = base->rec.key; - } else if (base->rec.key > m_max_key) { - m_max_key = base->rec.key; - } else if (base->rec.key < m_min_key) { - m_min_key = base->rec.key; + auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); + for (size_t i=0; i<m_reccnt; i++) { + bldr.AddKey(m_data[i].rec.key); } - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - bldr.AddKey(base->rec.key); - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } - - if (m_reccnt > 0) { m_ts = bldr.Finalize(); } } - TrieSpline(TrieSpline** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - + TrieSpline(std::vector<TrieSpline*> &shards) + : m_data(nullptr) + , m_reccnt(0) + , m_tombstone_cnt(0) + , m_alloc_size(0) + , m_max_key(0) + , m_min_key(0) + , m_bf(nullptr) + { size_t attemp_reccnt = 0; size_t tombstone_count = 0; - - // initialize m_max_key and m_min_key using the values from the - // first shard. These will later be updated when building - // the initial priority queue to their true values. - m_max_key = shards[0]->m_max_key; - m_min_key = shards[0]->m_min_key; + auto cursors = build_cursor_vec<R, TrieSpline>(shards, &attemp_reccnt, &tombstone_count); - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - - if (shards[i]->m_max_key > m_max_key) { - m_max_key = shards[i]->m_max_key; - } - - if (shards[i]->m_min_key < m_min_key) { - m_min_key = shards[i]->m_min_key; - } - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - bldr.AddKey(cursor.ptr->rec.key); - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); + + auto res = sorted_array_merge<R>(cursors, m_data, m_bf); + m_reccnt = res.record_count; + m_tombstone_cnt = res.tombstone_count; if (m_reccnt > 0) { + m_min_key = m_data[0].rec.key; + m_max_key = m_data[m_reccnt-1].rec.key; + + auto bldr = ts::Builder<K>(m_min_key, m_max_key, E); + for (size_t i=0; i<m_reccnt; i++) { + bldr.AddKey(m_data[i].rec.key); + } + m_ts = bldr.Finalize(); } - } + } ~TrieSpline() { - if (m_data) free(m_data); - if (m_bf) delete m_bf; - + free(m_data); + delete m_bf; } Wrapped<R> *point_lookup(const R &rec, bool filter=false) { @@ -253,7 +147,9 @@ public: return m_ts.GetSize() + m_alloc_size; } -private: + size_t get_aux_memory_usage() { + return (m_bf) ? m_bf->memory_usage() : 0; + } size_t get_lower_bound(const K& key) const { auto bound = m_ts.GetSearchBound(key); @@ -282,16 +178,21 @@ private: max = mid; } } + } + if (idx == m_reccnt) { + return m_reccnt; } if (m_data[idx].rec.key > key && idx > 0 && m_data[idx-1].rec.key <= key) { return idx-1; } - return (m_data[idx].rec.key <= key) ? idx : m_reccnt; + return idx; } +private: + Wrapped<R>* m_data; size_t m_reccnt; size_t m_tombstone_cnt; @@ -301,154 +202,4 @@ private: ts::TrieSpline<K> m_ts; BloomFilter<R> *m_bf; }; - - -template <RecordInterface R> -class TrieSplineRangeQuery { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(TrieSpline<R> *ts, void *parms) { - auto res = new TrieSplineState<R>(); - auto p = (ts_range_query_parms<R> *) parms; - - res->start_idx = ts->get_lower_bound(p->lower_bound); - res->stop_idx = ts->get_record_count(); - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - auto res = new TrieSplineBufferState<R>(); - res->cutoff = buffer->get_record_count(); - - return res; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(TrieSpline<R> *ts, void *q_state, void *parms) { - //std::vector<Wrapped<R>> records; - size_t tot = 0; - auto p = (ts_range_query_parms<R> *) parms; - auto s = (TrieSplineState<R> *) q_state; - - // if the returned index is one past the end of the - // records for the TrieSpline, then there are not records - // in the index falling into the specified range. - if (s->start_idx == ts->get_record_count()) { - return {}; - } - - auto ptr = ts->get_record_at(s->start_idx); - - // roll the pointer forward to the first record that is - // greater than or equal to the lower bound. - while(ptr->rec.key < p->lower_bound) { - ptr++; - } - - - while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { - if (ptr->is_tombstone()) --tot; - else if (!ptr->is_deleted()) ++tot; - //records.emplace_back(*ptr); - ptr++; - } - - return {Wrapped<R>{0, {tot, 0}}}; - //return records; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - size_t tot = 0; - auto p = (ts_range_query_parms<R> *) parms; - auto s = (TrieSplineBufferState<R> *) state; - - //std::vector<Wrapped<R>> records; - for (size_t i=0; i<s->cutoff; i++) { - auto rec = buffer->get_data() + i; - if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - if (rec->is_tombstone()) --tot; - else if (!rec->is_deleted()) ++tot; - //records.emplace_back(*rec); - } - - } - - return {Wrapped<R>{0, {tot, 0}}}; - //return records; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { -/* - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(results.size()); - - PriorityQueue<Wrapped<R>> pq(results.size()); - size_t total = 0; - size_t tmp_n = results.size(); - - - for (size_t i = 0; i < tmp_n; ++i) - if (results[i].size() > 0){ - auto base = results[i].data(); - cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()}); - assert(i == cursors.size() - 1); - total += results[i].size(); - pq.push(cursors[i].ptr, tmp_n - i - 1); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - - if (total == 0) { - return std::vector<R>(); - } - - std::vector<R> output; - output.reserve(total); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[tmp_n - now.version - 1]; - auto& cursor2 = cursors[tmp_n - next.version - 1]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[tmp_n - now.version - 1]; - if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec); - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - return output;*/ - - size_t tot = 0; - for (auto& result: results) - if (result.size() > 0) tot += result[0].rec.key; - - return {{tot, 0}}; - } - - static void delete_query_state(void *state) { - auto s = (TrieSplineState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (TrieSplineBufferState<R> *) state; - delete s; - } -}; - } diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h index 8feec84..b342fe6 100644 --- a/include/shard/VPTree.h +++ b/include/shard/VPTree.h @@ -1,97 +1,31 @@ /* * include/shard/VPTree.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * - * All outsides reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * + * A shard shim around a VPTree for high-dimensional metric similarity + * search. + * + * FIXME: Does not yet support the tombstone delete policy. + * TODO: The code in this file is very poorly commented. */ #pragma once #include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> -#include <map> +#include <unordered_map> +#include "framework/ShardRequirements.h" #include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; using psudb::PriorityQueue; using psudb::queue_record; -using psudb::Alias; +using psudb::byte; namespace de { -template <NDRecordInterface R> -struct KNNQueryParms { - R point; - size_t k; -}; - -template <NDRecordInterface R> -class KNNQuery; - -template <NDRecordInterface R> -struct KNNState { - size_t k; - - KNNState() { - k = 0; - } -}; - -template <NDRecordInterface R> -struct KNNBufferState { - -}; - - -template <typename R> -class KNNDistCmpMax { -public: - KNNDistCmpMax(R *baseline) : P(baseline) {} - - inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { - return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); - } - - inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){ - return a->calc_distance(*P) > b->calc_distance(*P); - } - -private: - R *P; -}; - -template <typename R> -class KNNDistCmpMin { -public: - KNNDistCmpMin(R *baseline) : P(baseline) {} - - inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> { - return a->rec.calc_distance(P->rec) < b->rec.calc_distance(P->rec); - } - - inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){ - return a->calc_distance(*P) < b->calc_distance(*P); - } - -private: - R *P; -}; - - - template <NDRecordInterface R, size_t LEAFSZ=100, bool HMAP=false> class VPTree { private: @@ -112,16 +46,19 @@ private: } }; -public: - friend class KNNQuery<R>; - VPTree(MutableBuffer<R>* buffer) + +public: + VPTree(BufferView<R> buffer) : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) { - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - m_ptrs = new Wrapped<R>*[buffer->get_record_count()]; + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + buffer.get_record_count() * + sizeof(Wrapped<R>), + (byte**) &m_data); + + m_ptrs = new Wrapped<R>*[buffer.get_record_count()]; size_t offset = 0; m_reccnt = 0; @@ -130,8 +67,8 @@ public: // this one will likely require the multi-pass // approach, as otherwise we'll need to sort the // records repeatedly on each reconstruction. - for (size_t i=0; i<buffer->get_record_count(); i++) { - auto rec = buffer->get_data() + i; + for (size_t i=0; i<buffer.get_record_count(); i++) { + auto rec = buffer.get(i); if (rec->is_deleted()) { continue; @@ -149,25 +86,24 @@ public: } } - VPTree(VPTree** shards, size_t len) + VPTree(std::vector<VPTree*> shards) : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) { size_t attemp_reccnt = 0; - - for (size_t i=0; i<len; i++) { + for (size_t i=0; i<shards.size(); i++) { attemp_reccnt += shards[i]->get_record_count(); } - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); + + m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, + attemp_reccnt * sizeof(Wrapped<R>), + (byte **) &m_data); m_ptrs = new Wrapped<R>*[attemp_reccnt]; // FIXME: will eventually need to figure out tombstones // this one will likely require the multi-pass // approach, as otherwise we'll need to sort the // records repeatedly on each reconstruction. - for (size_t i=0; i<len; i++) { + for (size_t i=0; i<shards.size(); i++) { for (size_t j=0; j<shards[i]->get_record_count(); j++) { if (shards[i]->get_record_at(j)->is_deleted()) { continue; @@ -186,9 +122,9 @@ public: } ~VPTree() { - if (m_data) free(m_data); - if (m_root) delete m_root; - if (m_ptrs) delete[] m_ptrs; + free(m_data); + delete m_root; + delete[] m_ptrs; } Wrapped<R> *point_lookup(const R &rec, bool filter=false) { @@ -242,7 +178,28 @@ public: return m_node_cnt * sizeof(vpnode) + m_reccnt * sizeof(R*) + m_alloc_size; } + size_t get_aux_memory_usage() { + // FIXME: need to return the size of the unordered_map + return 0; + } + + void search(const R &point, size_t k, PriorityQueue<Wrapped<R>, + DistCmpMax<Wrapped<R>>> &pq) { + double farthest = std::numeric_limits<double>::max(); + + internal_search(m_root, point, k, pq, &farthest); + } + private: + Wrapped<R>* m_data; + Wrapped<R>** m_ptrs; + std::unordered_map<R, size_t, RecordHash<R>> m_lookup_map; + size_t m_reccnt; + size_t m_tombstone_cnt; + size_t m_node_cnt; + size_t m_alloc_size; + + vpnode *m_root; vpnode *build_vptree() { if (m_reccnt == 0) { @@ -277,13 +234,15 @@ private: } vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) { - // base-case: sometimes happens (probably because of the +1 and -1 - // in the first recursive call) + /* + * base-case: sometimes happens (probably because of the +1 and -1 + * in the first recursive call) + */ if (start > stop) { return nullptr; } - // base-case: create a leaf node + /* base-case: create a leaf node */ if (stop - start <= LEAFSZ) { vpnode *node = new vpnode(); node->start = start; @@ -294,26 +253,30 @@ private: return node; } - // select a random element to be the root of the - // subtree + /* + * select a random element to be the root of the + * subtree + */ auto i = start + gsl_rng_uniform_int(rng, stop - start + 1); swap(start, i); - // partition elements based on their distance from the start, - // with those elements with distance falling below the median - // distance going into the left sub-array and those above - // the median in the right. This is easily done using QuickSelect. + /* + * partition elements based on their distance from the start, + * with those elements with distance falling below the median + * distance going into the left sub-array and those above + * the median in the right. This is easily done using QuickSelect. + */ auto mid = (start + 1 + stop) / 2; quickselect(start + 1, stop, mid, m_ptrs[start], rng); - // Create a new node based on this partitioning + /* Create a new node based on this partitioning */ vpnode *node = new vpnode(); node->start = start; - // store the radius of the circle used for partitioning the node. + /* store the radius of the circle used for partitioning the node. */ node->radius = m_ptrs[start]->rec.calc_distance(m_ptrs[mid]->rec); - // recursively construct the left and right subtrees + /* recursively construct the left and right subtrees */ node->inside = build_subtree(start + 1, mid-1, rng); node->outside = build_subtree(mid, stop, rng); @@ -322,7 +285,8 @@ private: return node; } - + // TODO: The quickselect code can probably be generalized and moved out + // to psudb-common instead. void quickselect(size_t start, size_t stop, size_t k, Wrapped<R> *p, gsl_rng *rng) { if (start == stop) return; @@ -335,7 +299,8 @@ private: } } - + // TODO: The quickselect code can probably be generalized and moved out + // to psudb-common instead. size_t partition(size_t start, size_t stop, Wrapped<R> *p, gsl_rng *rng) { auto pivot = start + gsl_rng_uniform_int(rng, stop - start); double pivot_dist = p->rec.calc_distance(m_ptrs[pivot]->rec); @@ -354,15 +319,15 @@ private: return j; } - void swap(size_t idx1, size_t idx2) { auto tmp = m_ptrs[idx1]; m_ptrs[idx1] = m_ptrs[idx2]; m_ptrs[idx2] = tmp; } + void internal_search(vpnode *node, const R &point, size_t k, PriorityQueue<Wrapped<R>, + DistCmpMax<Wrapped<R>>> &pq, double *farthest) { - void search(vpnode *node, const R &point, size_t k, PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> &pq, double *farthest) { if (node == nullptr) return; if (node->leaf) { @@ -398,151 +363,21 @@ private: if (d < node->radius) { if (d - (*farthest) <= node->radius) { - search(node->inside, point, k, pq, farthest); + internal_search(node->inside, point, k, pq, farthest); } if (d + (*farthest) >= node->radius) { - search(node->outside, point, k, pq, farthest); + internal_search(node->outside, point, k, pq, farthest); } } else { if (d + (*farthest) >= node->radius) { - search(node->outside, point, k, pq, farthest); + internal_search(node->outside, point, k, pq, farthest); } if (d - (*farthest) <= node->radius) { - search(node->inside, point, k, pq, farthest); + internal_search(node->inside, point, k, pq, farthest); } } } - - Wrapped<R>* m_data; - Wrapped<R>** m_ptrs; - std::unordered_map<R, size_t, RecordHash<R>> m_lookup_map; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_node_cnt; - size_t m_alloc_size; - - vpnode *m_root; -}; - - -template <NDRecordInterface R> -class KNNQuery { -public: - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=true; - - static void *get_query_state(VPTree<R> *wss, void *parms) { - return nullptr; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - return nullptr; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - return; - } - - static std::vector<Wrapped<R>> query(VPTree<R> *wss, void *q_state, void *parms) { - std::vector<Wrapped<R>> results; - KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms; - Wrapped<R> wrec; - wrec.rec = p->point; - wrec.header = 0; - - PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> pq(p->k, &wrec); - - double farthest = std::numeric_limits<double>::max(); - - wss->search(wss->m_root, p->point, p->k, pq, &farthest); - - while (pq.size() > 0) { - results.emplace_back(*pq.peek().data); - pq.pop(); - } - - return results; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms; - Wrapped<R> wrec; - wrec.rec = p->point; - wrec.header = 0; - - size_t k = p->k; - - PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> pq(k, &wrec); - for (size_t i=0; i<buffer->get_record_count(); i++) { - // Skip over deleted records (under tagging) - if ((buffer->get_data())[i].is_deleted()) { - continue; - } - - if (pq.size() < k) { - pq.push(buffer->get_data() + i); - } else { - double head_dist = pq.peek().data->rec.calc_distance(wrec.rec); - double cur_dist = (buffer->get_data() + i)->rec.calc_distance(wrec.rec); - - if (cur_dist < head_dist) { - pq.pop(); - pq.push(buffer->get_data() + i); - } - } - } - - std::vector<Wrapped<R>> results; - while (pq.size() > 0) { - results.emplace_back(*(pq.peek().data)); - pq.pop(); - } - - return results; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms; - R rec = p->point; - size_t k = p->k; - - PriorityQueue<R, KNNDistCmpMax<R>> pq(k, &rec); - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - if (pq.size() < k) { - pq.push(&results[i][j].rec); - } else { - double head_dist = pq.peek().data->calc_distance(rec); - double cur_dist = results[i][j].rec.calc_distance(rec); - - if (cur_dist < head_dist) { - pq.pop(); - pq.push(&results[i][j].rec); - } - } - } - } - - std::vector<R> output; - while (pq.size() > 0) { - output.emplace_back(*pq.peek().data); - pq.pop(); - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (KNNState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (KNNBufferState<R> *) state; - delete s; - } -}; - + }; } diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h deleted file mode 100644 index 19d3eea..0000000 --- a/include/shard/WIRS.h +++ /dev/null @@ -1,590 +0,0 @@ -/* - * include/shard/WIRS.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - - -#include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> - -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-ds/Alias.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; -using psudb::Alias; - -namespace de { - -thread_local size_t wirs_cancelations = 0; - -template <WeightedRecordInterface R> -struct wirs_query_parms { - decltype(R::key) lower_bound; - decltype(R::key) upper_bound; - size_t sample_size; - gsl_rng *rng; -}; - -template <WeightedRecordInterface R, bool Rejection> -class WIRSQuery; - -template <WeightedRecordInterface R> -struct wirs_node { - struct wirs_node<R> *left, *right; - decltype(R::key) low, high; - decltype(R::weight) weight; - Alias* alias; -}; - -template <WeightedRecordInterface R> -struct WIRSState { - decltype(R::weight) total_weight; - std::vector<wirs_node<R>*> nodes; - Alias* top_level_alias; - size_t sample_size; - - WIRSState() { - total_weight = 0; - top_level_alias = nullptr; - } - - ~WIRSState() { - if (top_level_alias) delete top_level_alias; - } -}; - -template <WeightedRecordInterface R> -struct WIRSBufferState { - size_t cutoff; - Alias* alias; - std::vector<Wrapped<R>> records; - decltype(R::weight) max_weight; - size_t sample_size; - decltype(R::weight) total_weight; - - ~WIRSBufferState() { - delete alias; - } - -}; - -template <WeightedRecordInterface R> -class WIRS { -private: - - typedef decltype(R::key) K; - typedef decltype(R::value) V; - typedef decltype(R::weight) W; - -public: - - // FIXME: there has to be a better way to do this - friend class WIRSQuery<R, true>; - friend class WIRSQuery<R, false>; - - WIRS(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - wirs_cancelations++; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - m_total_weight+= base->rec.weight; - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - WIRS(WIRS** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - m_total_weight += cursor.ptr->rec.weight; - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - if (m_reccnt > 0) { - build_wirs_structure(); - } - } - - ~WIRS() { - if (m_data) free(m_data); - for (size_t i=0; i<m_alias.size(); i++) { - if (m_alias[i]) delete m_alias[i]; - } - - if (m_bf) delete m_bf; - - free_tree(m_root); - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - - size_t get_memory_usage() { - return m_alloc_size + m_node_cnt * sizeof(wirs_node<Wrapped<R>>); - } - -private: - - size_t get_lower_bound(const K& key) const { - size_t min = 0; - size_t max = m_reccnt - 1; - - const char * record_key; - while (min < max) { - size_t mid = (min + max) / 2; - - if (key > m_data[mid].rec.key) { - min = mid + 1; - } else { - max = mid; - } - } - - return min; - } - - bool covered_by(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key; - } - - bool intersects(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) { - auto low_index = node->low * m_group_size; - auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1); - return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key; - } - - void build_wirs_structure() { - m_group_size = std::ceil(std::log(m_reccnt)); - size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size); - - // Fat point construction + low level alias.... - double sum_weight = 0.0; - std::vector<W> weights; - std::vector<double> group_norm_weight; - size_t i = 0; - size_t group_no = 0; - while (i < m_reccnt) { - double group_weight = 0.0; - group_norm_weight.clear(); - for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) { - auto w = m_data[i].rec.weight; - group_norm_weight.emplace_back(w); - group_weight += w; - sum_weight += w; - } - - for (auto& w: group_norm_weight) - if (group_weight) w /= group_weight; - else w = 1.0 / group_norm_weight.size(); - m_alias.emplace_back(new Alias(group_norm_weight)); - - - weights.emplace_back(group_weight); - } - - assert(weights.size() == n_groups); - - m_root = construct_wirs_node(weights, 0, n_groups-1); - } - - struct wirs_node<R>* construct_wirs_node(const std::vector<W>& weights, size_t low, size_t high) { - if (low == high) { - return new wirs_node<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})}; - } else if (low > high) return nullptr; - - std::vector<double> node_weights; - W sum = 0; - for (size_t i = low; i < high; ++i) { - node_weights.emplace_back(weights[i]); - sum += weights[i]; - } - - for (auto& w: node_weights) - if (sum) w /= sum; - else w = 1.0 / node_weights.size(); - - m_node_cnt += 1; - size_t mid = (low + high) / 2; - return new wirs_node<R>{construct_wirs_node(weights, low, mid), - construct_wirs_node(weights, mid + 1, high), - low, high, sum, new Alias(node_weights)}; - } - - void free_tree(struct wirs_node<R>* node) { - if (node) { - delete node->alias; - free_tree(node->left); - free_tree(node->right); - delete node; - } - } - - Wrapped<R>* m_data; - std::vector<Alias *> m_alias; - wirs_node<R>* m_root; - W m_total_weight; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_group_size; - size_t m_alloc_size; - size_t m_node_cnt; - BloomFilter<R> *m_bf; -}; - - -template <WeightedRecordInterface R, bool Rejection=true> -class WIRSQuery { -public: - - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(WIRS<R> *wirs, void *parms) { - auto res = new WIRSState<R>(); - decltype(R::key) lower_key = ((wirs_query_parms<R> *) parms)->lower_bound; - decltype(R::key) upper_key = ((wirs_query_parms<R> *) parms)->upper_bound; - - // Simulate a stack to unfold recursion. - double total_weight = 0.0; - struct wirs_node<R>* st[64] = {0}; - st[0] = wirs->m_root; - size_t top = 1; - while(top > 0) { - auto now = st[--top]; - if (wirs->covered_by(now, lower_key, upper_key) || - (now->left == nullptr && now->right == nullptr && wirs->intersects(now, lower_key, upper_key))) { - res->nodes.emplace_back(now); - total_weight += now->weight; - } else { - if (now->left && wirs->intersects(now->left, lower_key, upper_key)) st[top++] = now->left; - if (now->right && wirs->intersects(now->right, lower_key, upper_key)) st[top++] = now->right; - } - } - - std::vector<double> weights; - for (const auto& node: res->nodes) { - weights.emplace_back(node->weight / total_weight); - } - res->total_weight = total_weight; - res->top_level_alias = new Alias(weights); - res->sample_size = 0; - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - WIRSBufferState<R> *state = new WIRSBufferState<R>(); - auto parameters = (wirs_query_parms<R>*) parms; - if constexpr (Rejection) { - state->cutoff = buffer->get_record_count() - 1; - state->max_weight = buffer->get_max_weight(); - state->total_weight = buffer->get_total_weight(); - state->sample_size = 0; - return state; - } - - std::vector<double> weights; - - state->cutoff = buffer->get_record_count() - 1; - double total_weight = 0.0; - - for (size_t i = 0; i <= state->cutoff; i++) { - auto rec = buffer->get_data() + i; - - if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) { - weights.push_back(rec->rec.weight); - state->records.push_back(*rec); - total_weight += rec->rec.weight; - } - } - - for (size_t i = 0; i < weights.size(); i++) { - weights[i] = weights[i] / total_weight; - } - - state->total_weight = total_weight; - state->alias = new Alias(weights); - state->sample_size = 0; - - return state; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - auto p = (wirs_query_parms<R> *) query_parms; - auto bs = (WIRSBufferState<R> *) buff_state; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0); - size_t buffer_sz = 0; - - std::vector<decltype(R::weight)> weights; - weights.push_back(bs->total_weight); - - decltype(R::weight) total_weight = 0; - for (auto &s : shard_states) { - auto state = (WIRSState<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = Alias(normalized_weights); - for (size_t i=0; i<p->sample_size; i++) { - auto idx = shard_alias.get(p->rng); - if (idx == 0) { - buffer_sz++; - } else { - shard_sample_sizes[idx - 1]++; - } - } - - - bs->sample_size = buffer_sz; - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (WIRSState<R> *) shard_states[i]; - state->sample_size = shard_sample_sizes[i+1]; - } - } - - - - static std::vector<Wrapped<R>> query(WIRS<R> *wirs, void *q_state, void *parms) { - auto lower_key = ((wirs_query_parms<R> *) parms)->lower_bound; - auto upper_key = ((wirs_query_parms<R> *) parms)->upper_bound; - auto rng = ((wirs_query_parms<R> *) parms)->rng; - - auto state = (WIRSState<R> *) q_state; - auto sample_size = state->sample_size; - - std::vector<Wrapped<R>> result_set; - - if (sample_size == 0) { - return result_set; - } - // k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. - size_t cnt = 0; - size_t attempts = 0; - do { - ++attempts; - // first level.... - auto node = state->nodes[state->top_level_alias->get(rng)]; - // second level... - auto fat_point = node->low + node->alias->get(rng); - // third level... - size_t rec_offset = fat_point * wirs->m_group_size + wirs->m_alias[fat_point]->get(rng); - auto record = wirs->m_data + rec_offset; - - // bounds rejection - if (lower_key > record->rec.key || upper_key < record->rec.key) { - continue; - } - - result_set.emplace_back(*record); - cnt++; - } while (attempts < sample_size); - - return result_set; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto st = (WIRSBufferState<R> *) state; - auto p = (wirs_query_parms<R> *) parms; - - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); - - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get_data() + idx; - - auto test = gsl_rng_uniform(p->rng) * st->max_weight; - - if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { - result.emplace_back(*rec); - } - } - return result; - } - - for (size_t i=0; i<st->sample_size; i++) { - auto idx = st->alias->get(p->rng); - result.emplace_back(st->records[idx]); - } - - return result; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - std::vector<R> output; - - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (WIRSState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (WIRSBufferState<R> *) state; - delete s; - } - - - //{q.get_buffer_query_state(p, p)}; - //{q.buffer_query(p, p)}; - -}; - -} diff --git a/include/shard/WSS.h b/include/shard/WSS.h deleted file mode 100644 index c0af573..0000000 --- a/include/shard/WSS.h +++ /dev/null @@ -1,451 +0,0 @@ -/* - * include/shard/WSS.h - * - * Copyright (C) 2023 Dong Xie <dongx@psu.edu> - * Douglas Rumbaugh <drumbaugh@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - - -#include <vector> -#include <cassert> -#include <queue> -#include <memory> -#include <concepts> - -#include "psu-ds/PriorityQueue.h" -#include "util/Cursor.h" -#include "psu-ds/Alias.h" -#include "psu-ds/BloomFilter.h" -#include "util/bf_config.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" -#include "framework/ShardInterface.h" -#include "framework/QueryInterface.h" - -using psudb::CACHELINE_SIZE; -using psudb::BloomFilter; -using psudb::PriorityQueue; -using psudb::queue_record; -using psudb::Alias; - -namespace de { - -thread_local size_t wss_cancelations = 0; - -template <WeightedRecordInterface R> -struct wss_query_parms { - size_t sample_size; - gsl_rng *rng; -}; - -template <WeightedRecordInterface R, bool Rejection> -class WSSQuery; - -template <WeightedRecordInterface R> -struct WSSState { - decltype(R::weight) total_weight; - size_t sample_size; - - WSSState() { - total_weight = 0; - } -}; - -template <WeightedRecordInterface R> -struct WSSBufferState { - size_t cutoff; - size_t sample_size; - Alias* alias; - decltype(R::weight) max_weight; - decltype(R::weight) total_weight; - - ~WSSBufferState() { - delete alias; - } - -}; - -template <WeightedRecordInterface R> -class WSS { -private: - typedef decltype(R::key) K; - typedef decltype(R::value) V; - typedef decltype(R::weight) W; - -public: - - // FIXME: there has to be a better way to do this - friend class WSSQuery<R, true>; - friend class WSSQuery<R, false>; - - WSS(MutableBuffer<R>* buffer) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - - m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS); - - size_t offset = 0; - m_reccnt = 0; - auto base = buffer->get_data(); - auto stop = base + buffer->get_record_count(); - - std::sort(base, stop, std::less<Wrapped<R>>()); - - std::vector<W> weights; - - while (base < stop) { - if (!(base->is_tombstone()) && (base + 1) < stop) { - if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { - base += 2; - wss_cancelations++; - continue; - } - } else if (base->is_deleted()) { - base += 1; - continue; - } - - // FIXME: this shouldn't be necessary, but the tagged record - // bypass doesn't seem to be working on this code-path, so this - // ensures that tagged records from the buffer are able to be - // dropped, eventually. It should only need to be &= 1 - base->header &= 3; - m_data[m_reccnt++] = *base; - m_total_weight+= base->rec.weight; - weights.push_back(base->rec.weight); - - if (m_bf && base->is_tombstone()) { - m_tombstone_cnt++; - m_bf->insert(base->rec); - } - - base++; - } - - if (m_reccnt > 0) { - build_alias_structure(weights); - } - } - - WSS(WSS** shards, size_t len) - : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) { - std::vector<Cursor<Wrapped<R>>> cursors; - cursors.reserve(len); - - PriorityQueue<Wrapped<R>> pq(len); - - size_t attemp_reccnt = 0; - size_t tombstone_count = 0; - - for (size_t i = 0; i < len; ++i) { - if (shards[i]) { - auto base = shards[i]->get_data(); - cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); - attemp_reccnt += shards[i]->get_record_count(); - tombstone_count += shards[i]->get_tombstone_count(); - pq.push(cursors[i].ptr, i); - } else { - cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); - } - } - - m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS); - - m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE); - assert(m_alloc_size % CACHELINE_SIZE == 0); - m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size); - - std::vector<W> weights; - - while (pq.size()) { - auto now = pq.peek(); - auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; - if (!now.data->is_tombstone() && next.data != nullptr && - now.data->rec == next.data->rec && next.data->is_tombstone()) { - - pq.pop(); pq.pop(); - auto& cursor1 = cursors[now.version]; - auto& cursor2 = cursors[next.version]; - if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version); - if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version); - } else { - auto& cursor = cursors[now.version]; - if (!cursor.ptr->is_deleted()) { - m_data[m_reccnt++] = *cursor.ptr; - m_total_weight += cursor.ptr->rec.weight; - weights.push_back(cursor.ptr->rec.weight); - if (m_bf && cursor.ptr->is_tombstone()) { - ++m_tombstone_cnt; - if (m_bf) m_bf->insert(cursor.ptr->rec); - } - } - pq.pop(); - - if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version); - } - } - - if (m_reccnt > 0) { - build_alias_structure(weights); - } - } - - ~WSS() { - if (m_data) free(m_data); - if (m_alias) delete m_alias; - if (m_bf) delete m_bf; - - } - - Wrapped<R> *point_lookup(const R &rec, bool filter=false) { - if (filter && !m_bf->lookup(rec)) { - return nullptr; - } - - size_t idx = get_lower_bound(rec.key); - if (idx >= m_reccnt) { - return nullptr; - } - - while (idx < m_reccnt && m_data[idx].rec < rec) ++idx; - - if (m_data[idx].rec == rec) { - return m_data + idx; - } - - return nullptr; - } - - Wrapped<R>* get_data() const { - return m_data; - } - - size_t get_record_count() const { - return m_reccnt; - } - - size_t get_tombstone_count() const { - return m_tombstone_cnt; - } - - const Wrapped<R>* get_record_at(size_t idx) const { - if (idx >= m_reccnt) return nullptr; - return m_data + idx; - } - - - size_t get_memory_usage() { - return m_alloc_size; - } - -private: - - size_t get_lower_bound(const K& key) const { - size_t min = 0; - size_t max = m_reccnt - 1; - - const char * record_key; - while (min < max) { - size_t mid = (min + max) / 2; - - if (key > m_data[mid].rec.key) { - min = mid + 1; - } else { - max = mid; - } - } - - return min; - } - - void build_alias_structure(std::vector<W> &weights) { - - // normalize the weights vector - std::vector<double> norm_weights(weights.size()); - - for (size_t i=0; i<weights.size(); i++) { - norm_weights[i] = (double) weights[i] / (double) m_total_weight; - } - - // build the alias structure - m_alias = new Alias(norm_weights); - } - - Wrapped<R>* m_data; - Alias *m_alias; - W m_total_weight; - size_t m_reccnt; - size_t m_tombstone_cnt; - size_t m_group_size; - size_t m_alloc_size; - BloomFilter<R> *m_bf; -}; - - -template <WeightedRecordInterface R, bool Rejection=true> -class WSSQuery { -public: - - constexpr static bool EARLY_ABORT=false; - constexpr static bool SKIP_DELETE_FILTER=false; - - static void *get_query_state(WSS<R> *wss, void *parms) { - auto res = new WSSState<R>(); - res->total_weight = wss->m_total_weight; - res->sample_size = 0; - - return res; - } - - static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) { - WSSBufferState<R> *state = new WSSBufferState<R>(); - auto parameters = (wss_query_parms<R>*) parms; - if constexpr (Rejection) { - state->cutoff = buffer->get_record_count() - 1; - state->max_weight = buffer->get_max_weight(); - state->total_weight = buffer->get_total_weight(); - return state; - } - - std::vector<double> weights; - - state->cutoff = buffer->get_record_count() - 1; - double total_weight = 0.0; - - for (size_t i = 0; i <= state->cutoff; i++) { - auto rec = buffer->get_data() + i; - weights.push_back(rec->rec.weight); - total_weight += rec->rec.weight; - } - - for (size_t i = 0; i < weights.size(); i++) { - weights[i] = weights[i] / total_weight; - } - - state->alias = new Alias(weights); - state->total_weight = total_weight; - - return state; - } - - static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) { - auto p = (wss_query_parms<R> *) query_parms; - auto bs = (WSSBufferState<R> *) buff_state; - - std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0); - size_t buffer_sz = 0; - - std::vector<decltype(R::weight)> weights; - weights.push_back(bs->total_weight); - - decltype(R::weight) total_weight = 0; - for (auto &s : shard_states) { - auto state = (WSSState<R> *) s; - total_weight += state->total_weight; - weights.push_back(state->total_weight); - } - - std::vector<double> normalized_weights; - for (auto w : weights) { - normalized_weights.push_back((double) w / (double) total_weight); - } - - auto shard_alias = Alias(normalized_weights); - for (size_t i=0; i<p->sample_size; i++) { - auto idx = shard_alias.get(p->rng); - if (idx == 0) { - buffer_sz++; - } else { - shard_sample_sizes[idx - 1]++; - } - } - - - bs->sample_size = buffer_sz; - for (size_t i=0; i<shard_states.size(); i++) { - auto state = (WSSState<R> *) shard_states[i]; - state->sample_size = shard_sample_sizes[i+1]; - } - } - - static std::vector<Wrapped<R>> query(WSS<R> *wss, void *q_state, void *parms) { - auto rng = ((wss_query_parms<R> *) parms)->rng; - - auto state = (WSSState<R> *) q_state; - auto sample_size = state->sample_size; - - std::vector<Wrapped<R>> result_set; - - if (sample_size == 0) { - return result_set; - } - size_t attempts = 0; - do { - attempts++; - size_t idx = wss->m_alias->get(rng); - result_set.emplace_back(*wss->get_record_at(idx)); - } while (attempts < sample_size); - - return result_set; - } - - static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) { - auto st = (WSSBufferState<R> *) state; - auto p = (wss_query_parms<R> *) parms; - - std::vector<Wrapped<R>> result; - result.reserve(st->sample_size); - - if constexpr (Rejection) { - for (size_t i=0; i<st->sample_size; i++) { - auto idx = gsl_rng_uniform_int(p->rng, st->cutoff); - auto rec = buffer->get_data() + idx; - - auto test = gsl_rng_uniform(p->rng) * st->max_weight; - - if (test <= rec->rec.weight) { - result.emplace_back(*rec); - } - } - return result; - } - - for (size_t i=0; i<st->sample_size; i++) { - auto idx = st->alias->get(p->rng); - result.emplace_back(*(buffer->get_data() + idx)); - } - - return result; - } - - static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { - std::vector<R> output; - - for (size_t i=0; i<results.size(); i++) { - for (size_t j=0; j<results[i].size(); j++) { - output.emplace_back(results[i][j].rec); - } - } - - return output; - } - - static void delete_query_state(void *state) { - auto s = (WSSState<R> *) state; - delete s; - } - - static void delete_buffer_query_state(void *state) { - auto s = (WSSBufferState<R> *) state; - delete s; - } -}; - -} diff --git a/include/util/Cursor.h b/include/util/Cursor.h index 1b0b8ed..e8ba53d 100644 --- a/include/util/Cursor.h +++ b/include/util/Cursor.h @@ -1,19 +1,24 @@ /* * include/util/Cursor.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * + * A simple record cursor type with associated methods for help in + * merging record sets when constructing shards. Iterates an array + * of records in order, and provides facilities to make sorted merges + * easier. + * + * TODO: Prior versions of this module included automatic support for + * working with data stored in PagedFiles as well. That should be + * reintroduced at some point. */ #pragma once -#include "framework/RecordInterface.h" - -#include "psu-ds/BloomFilter.h" -#include "psu-ds/PriorityQueue.h" -#include "psu-util/alignment.h" +#include <cstdlib> +#include <vector> namespace de { template<typename R> @@ -62,6 +67,8 @@ template <typename R> inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) { const R *min_rec = nullptr; Cursor<R> *result = nullptr; + // FIXME: for large cursor vectors, it may be worth it to use a + // PriorityQueue here instead of scanning. for (size_t i=0; i< cursors.size(); i++) { if (cursors[i] == (Cursor<R>) {0} ) continue; diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h new file mode 100644 index 0000000..8a1e782 --- /dev/null +++ b/include/util/SortedMerge.h @@ -0,0 +1,205 @@ +/* + * include/util/SortedMerge.h + * + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A sorted array merge routine for use in Shard construction, as many + * shards will use a sorted array to represent their data. Also encapsulates + * the necessary tombstone-cancellation logic. + * + * FIXME: include generic per-record processing functionality for Shards that + * need it, to avoid needing to reprocess the array in the shard after + * creation. + */ +#pragma once + +#include "util/Cursor.h" +#include "framework/interface/Shard.h" +#include "psu-ds/PriorityQueue.h" + +namespace de { + +using psudb::PriorityQueue; +using psudb::BloomFilter; +using psudb::queue_record; +using psudb::byte; +using psudb::CACHELINE_SIZE; + +/* + * A simple struct to return record_count and tombstone_count information + * back to the caller. Could've been an std::pair, but I like the more + * explicit names. + */ +struct merge_info { + size_t record_count; + size_t tombstone_count; +}; + +/* + * Build a vector of cursors corresponding to the records contained within + * a vector of shards. The cursor at index i in the output will correspond + * to the shard at index i in the input. + * + * The values of reccnt and tscnt will be updated with the sum of the + * records contained within the shards. Note that these counts include deleted + * records that may be removed during shard construction, and so constitute + * upper bounds only. + */ +template <RecordInterface R, ShardInterface<R> S> +static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) { + std::vector<Cursor<Wrapped<R>>> cursors; + cursors.reserve(shards.size()); + + *reccnt = 0; + *tscnt = 0; + + for (size_t i = 0; i < shards.size(); ++i) { + if (shards[i]) { + auto base = shards[i]->get_data(); + cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()}); + *reccnt += shards[i]->get_record_count(); + *tscnt += shards[i]->get_tombstone_count(); + } else { + cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0}); + } + } + + return cursors; +} + +/* + * Build a sorted array of records based on the contents of a BufferView. + * This routine does not alter the buffer view, but rather copies the + * records out and then sorts them. The provided buffer must be large + * enough to store the records from the BufferView, or the behavior of the + * function is undefined. + * + * It allocates a temporary buffer for the sorting, and execution of the + * program will be aborted if the allocation fails. + */ +template <RecordInterface R> +static merge_info sorted_array_from_bufferview(BufferView<R> bv, + Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf=nullptr) { + /* + * Copy the contents of the buffer view into a temporary buffer, and + * sort them. We still need to iterate over these temporary records to + * apply tombstone/deleted record filtering, as well as any possible + * per-record processing that is required by the shard being built. + */ + auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, + bv.get_record_count(), + sizeof(Wrapped<R>)); + bv.copy_to_buffer((byte *) temp_buffer); + + auto base = temp_buffer; + auto stop = base + bv.get_record_count(); + std::sort(base, stop, std::less<Wrapped<R>>()); + + merge_info info = {0, 0}; + + /* + * Iterate over the temporary buffer to process the records, copying + * them into buffer as needed + */ + while (base < stop) { + if (!base->is_tombstone() && (base + 1 < stop) + && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) { + base += 2; + continue; + } else if (base->is_deleted()) { + base += 1; + continue; + } + + // FIXME: this shouldn't be necessary, but the tagged record + // bypass doesn't seem to be working on this code-path, so this + // ensures that tagged records from the buffer are able to be + // dropped, eventually. It should only need to be &= 1 + base->header &= 3; + buffer[info.record_count++] = *base; + + if (base->is_tombstone()) { + info.tombstone_count++; + if (bf){ + bf->insert(base->rec); + } + } + + base++; + } + + free(temp_buffer); + return info; +} + +/* + * Perform a sorted merge of the records within cursors into the provided + * buffer. Includes tombstone and tagged delete cancellation logic, and + * will insert tombstones into a bloom filter, if one is provided. + * + * The behavior of this function is undefined if the provided buffer does + * not have space to contain all of the records within the input cursors. + */ +template <RecordInterface R> +static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors, + Wrapped<R> *buffer, + psudb::BloomFilter<R> *bf=nullptr) { + + // FIXME: For smaller cursor arrays, it may be more efficient to skip + // the priority queue and just do a scan. + PriorityQueue<Wrapped<R>> pq(cursors.size()); + for (size_t i=0; i<cursors.size(); i++) { + pq.push(cursors[i].ptr, i); + } + + merge_info info = {0, 0}; + while (pq.size()) { + auto now = pq.peek(); + auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0}; + /* + * if the current record is not a tombstone, and the next record is + * a tombstone that matches the current one, then the current one + * has been deleted, and both it and its tombstone can be skipped + * over. + */ + if (!now.data->is_tombstone() && next.data != nullptr && + now.data->rec == next.data->rec && next.data->is_tombstone()) { + + pq.pop(); pq.pop(); + auto& cursor1 = cursors[now.version]; + auto& cursor2 = cursors[next.version]; + if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version); + if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version); + } else { + auto& cursor = cursors[now.version]; + /* skip over records that have been deleted via tagging */ + if (!cursor.ptr->is_deleted()) { + buffer[info.record_count++] = *cursor.ptr; + + /* + * if the record is a tombstone, increment the ts count and + * insert it into the bloom filter if one has been + * provided. + */ + if (cursor.ptr->is_tombstone()) { + info.tombstone_count++; + if (bf) { + bf->insert(cursor.ptr->rec); + } + } + } + pq.pop(); + + if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version); + } + } + + return info; +} + + + +} diff --git a/include/util/bf_config.h b/include/util/bf_config.h index 2390643..9f29ed7 100644 --- a/include/util/bf_config.h +++ b/include/util/bf_config.h @@ -1,25 +1,42 @@ /* * include/util/bf_config.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. + * + * Global parameters for configuring bloom filters used as auxiliary + * structures on shards within the framework. The bloom filter class + * can be found in + * + * $PROJECT_ROOT/external/psudb-common/cpp/include/psu-ds/BloomFilter.h * */ #pragma once -#include "psu-util/alignment.h" +#include <cstdlib> namespace de { +/* global variable for specifying bloom filter FPR */ static double BF_FPR = .01; + +/* global variable for specifying number of BF hash functions (k) */ static size_t BF_HASH_FUNCS = 7; +/* + * Adjust the value of BF_FPR. The argument must be on the interval + * (0, 1), or the behavior of bloom filters is undefined. + */ static void BF_SET_FPR(double fpr) { BF_FPR = fpr; } +/* + * Adjust the value of BF_HASH_FUNCS. The argument must be on the interval + * (0, INT64_MAX], or the behavior of bloom filters is undefined. + */ static void BF_SET_HASHFUNC(size_t func_cnt) { BF_HASH_FUNCS = func_cnt; } diff --git a/include/util/types.h b/include/util/types.h index 3010e78..a13bd95 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -1,54 +1,62 @@ /* * include/util/types.h * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * - * A centralized header file for various datatypes used throughout the + * A centralized header file for various data types used throughout the * code base. There are a few very specific types, such as header formats, * that are defined within the header files that make direct use of them, * but all generally usable, simple types are defined here. * + * Many of these types were used in the Practical Dynamic Extension for + * Sampling Indexes work, particularly for external storage and buffer + * pool systems. They aren't used now, but we're leaving them here to use + * them in the future, when we add this functionality into this system too. */ #pragma once -#include <cstdlib> #include <cstdint> -#include <cstddef> -#include <string> +#include <cstdlib> namespace de { -using std::byte; - -// Represents a page offset within a specific file (physical or virtual) +/* Represents a page offset within a specific file (physical or virtual) */ typedef uint32_t PageNum; -// Byte offset within a page. Also used for lengths of records, etc., -// within the codebase. size_t isn't necessary, as the maximum offset -// is only parm::PAGE_SIZE +/* + * Byte offset within a page. Also used for lengths of records, etc., + * within the codebase. size_t isn't necessary, as the maximum offset + * is only parm::PAGE_SIZE + */ typedef uint16_t PageOffset; -// A unique identifier for a frame within a buffer or cache. +/* A unique identifier for a frame within a buffer or cache */ typedef int32_t FrameId; -// A unique timestamp for use in MVCC concurrency control. Currently stored in -// record headers, but not used by anything. +/* + * A unique timestamp for use in MVCC concurrency control. Currently stored in + * record headers, but not used by anything. + */ typedef uint32_t Timestamp; const Timestamp TIMESTAMP_MIN = 0; const Timestamp TIMESTAMP_MAX = UINT32_MAX; -// Invalid values for various IDs. Used throughout the code base to indicate -// uninitialized values and error conditions. +/* + * Invalid values for various IDs. Used throughout the code base to indicate + * uninitialized values and error conditions. + */ const PageNum INVALID_PNUM = 0; const FrameId INVALID_FRID = -1; -// An ID for a given shard within the index. The level_idx is the index -// in the memory_levels and disk_levels vectors corresponding to the -// shard, and the shard_idx is the index with the level (always 0 in the -// case of leveling). Note that the two vectors of levels are treated -// as a contiguous index space. +/* + * An ID for a given shard within the index. The level_idx is the index + * in the memory_levels and disk_levels vectors corresponding to the + * shard, and the shard_idx is the index with the level (always 0 in the + * case of leveling). Note that the two vectors of levels are treated + * as a contiguous index space. + */ struct ShardID { ssize_t level_idx; ssize_t shard_idx; @@ -58,12 +66,7 @@ struct ShardID { } }; +/* A placeholder for an invalid shard--also used to indicate the mutable buffer */ const ShardID INVALID_SHID = {-1, -1}; -struct SampleRange { - ShardID shid; - size_t low; - size_t high; -}; - } diff --git a/tests/alias_tests.cpp b/tests/alias_tests.cpp new file mode 100644 index 0000000..98d0c63 --- /dev/null +++ b/tests/alias_tests.cpp @@ -0,0 +1,61 @@ +/* + * tests/alias_tests.cpp + * + * Unit tests for Alias shard + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/Alias.h" +#include "query/wss.h" +#include "framework/structure/MutableBuffer.h" +#include "include/testing.h" + + +#include <check.h> + +using namespace de; + +typedef WRec R; +typedef Alias<R> Shard; + + +#include "include/shard_standard.h" +#include "include/rangequery.h" + +Suite *unit_testing() +{ + Suite *unit = suite_create("ISAMTree Shard Unit Testing"); + + inject_rangequery_tests(unit); + inject_shard_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} + diff --git a/tests/augbtree_tests.cpp b/tests/augbtree_tests.cpp new file mode 100644 index 0000000..c7a0885 --- /dev/null +++ b/tests/augbtree_tests.cpp @@ -0,0 +1,55 @@ +/* + * tests/isam_tests.cpp + * + * Unit tests for ISAM Tree shard + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/AugBTree.h" +#include "include/testing.h" +#include <check.h> + +using namespace de; + +typedef WRec R; +typedef AugBTree<R> Shard; + +#include "include/shard_standard.h" +#include "include/rangequery.h" + +Suite *unit_testing() +{ + Suite *unit = suite_create("Alias-augmented B+Tree Shard Unit Testing"); + + inject_rangequery_tests(unit); + inject_shard_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_level_concurrent.cpp b/tests/de_level_concurrent.cpp new file mode 100644 index 0000000..2039efb --- /dev/null +++ b/tests/de_level_concurrent.cpp @@ -0,0 +1,58 @@ +/* + * tests/de_level_tomb.cpp + * + * Unit tests for Dynamic Extension Framework + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#include <set> +#include <random> +#include <algorithm> + +#include "include/testing.h" +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" + +#include <check.h> +using namespace de; + +typedef Rec R; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; + +#include "include/concurrent_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tombstone Leveling Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_level_tag.cpp b/tests/de_level_tag.cpp index 91f158c..75131c4 100644 --- a/tests/de_level_tag.cpp +++ b/tests/de_level_tag.cpp @@ -1,25 +1,58 @@ /* - * tests/dynamic_extension_tests.cpp + * tests/de_level_tag.cpp * * Unit tests for Dynamic Extension Framework * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #include <set> #include <random> #include <algorithm> -#include "testing.h" +#include "include/testing.h" #include "framework/DynamicExtension.h" -#include "shard/WIRS.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" #include <check.h> using namespace de; -typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING> DE; +typedef Rec R; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE; -#include "dynamic_extension_tests.inc" +#include "include/dynamic_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tagged Leveling Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_level_tomb.cpp b/tests/de_level_tomb.cpp index c3dc5df..6da211d 100644 --- a/tests/de_level_tomb.cpp +++ b/tests/de_level_tomb.cpp @@ -1,25 +1,59 @@ /* - * tests/dynamic_extension_tests.cpp + * tests/de_level_tomb.cpp * * Unit tests for Dynamic Extension Framework * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #include <set> #include <random> #include <algorithm> -#include "testing.h" +#include "include/testing.h" #include "framework/DynamicExtension.h" -#include "shard/WIRS.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "shard/TrieSpline.h" #include <check.h> using namespace de; -typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE> DE; +typedef Rec R; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; -#include "dynamic_extension_tests.inc" +#include "include/dynamic_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tombstone Leveling Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_tier_concurrent.cpp b/tests/de_tier_concurrent.cpp new file mode 100644 index 0000000..722b9bd --- /dev/null +++ b/tests/de_tier_concurrent.cpp @@ -0,0 +1,58 @@ +/* + * tests/de_level_tomb.cpp + * + * Unit tests for Dynamic Extension Framework + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ +#include <set> +#include <random> +#include <algorithm> + +#include "include/testing.h" +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" + +#include <check.h> +using namespace de; + +typedef Rec R; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; + +#include "include/concurrent_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tombstone Leveling Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_tier_tag.cpp b/tests/de_tier_tag.cpp index 9b6b5a4..79bb7bf 100644 --- a/tests/de_tier_tag.cpp +++ b/tests/de_tier_tag.cpp @@ -1,25 +1,59 @@ /* - * tests/dynamic_extension_tests.cpp + * tests/de_tier_tag.cpp * * Unit tests for Dynamic Extension Framework * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #include <set> #include <random> #include <algorithm> -#include "testing.h" +#include "include/testing.h" #include "framework/DynamicExtension.h" -#include "shard/WIRS.h" +#include "framework/scheduling/SerialScheduler.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" #include <check.h> using namespace de; -typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING> DE; +typedef Rec R; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; -#include "dynamic_extension_tests.inc" +#include "include/dynamic_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tagged Tiering Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/de_tier_tomb.cpp b/tests/de_tier_tomb.cpp index 82942fd..b1387bb 100644 --- a/tests/de_tier_tomb.cpp +++ b/tests/de_tier_tomb.cpp @@ -1,25 +1,59 @@ /* - * tests/dynamic_extension_tests.cpp + * tests/de_tier_tomb.cpp * * Unit tests for Dynamic Extension Framework * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #include <set> #include <random> #include <algorithm> -#include "testing.h" +#include "include/testing.h" #include "framework/DynamicExtension.h" -#include "shard/WIRS.h" +#include "shard/ISAMTree.h" +#include "shard/TrieSpline.h" +#include "query/rangequery.h" #include <check.h> using namespace de; -typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE> DE; +typedef Rec R; +typedef DynamicExtension<Rec, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE; -#include "dynamic_extension_tests.inc" +#include "include/dynamic_extension.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("DynamicExtension: Tombstone Tiering Testing"); + inject_dynamic_extension_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/dynamic_extension_tests.inc b/tests/dynamic_extension_tests.inc deleted file mode 100644 index b9866c3..0000000 --- a/tests/dynamic_extension_tests.inc +++ /dev/null @@ -1,425 +0,0 @@ -/* - * tests/dynamic_extension_tests.cpp - * - * Unit tests for Dynamic Extension Framework - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ - -START_TEST(t_create) -{ - auto ext_wirs = new DE(100, 2, 1); - - - ck_assert_ptr_nonnull(ext_wirs); - ck_assert_int_eq(ext_wirs->get_record_count(), 0); - ck_assert_int_eq(ext_wirs->get_height(), 0); - - delete ext_wirs; -} -END_TEST - - -START_TEST(t_insert) -{ - auto ext_wirs = new DE(100, 2, 1); - - uint64_t key = 0; - uint32_t val = 0; - for (size_t i=0; i<100; i++) { - WRec r = {key, val, 1}; - ck_assert_int_eq(ext_wirs->insert(r), 1); - key++; - val++; - } - - ck_assert_int_eq(ext_wirs->get_height(), 0); - ck_assert_int_eq(ext_wirs->get_record_count(), 100); - - delete ext_wirs; -} -END_TEST - - -START_TEST(t_insert_with_mem_merges) -{ - auto ext_wirs = new DE(100, 2, 1); - - uint64_t key = 0; - uint32_t val = 0; - for (size_t i=0; i<300; i++) { - WRec r = {key, val, 1}; - ck_assert_int_eq(ext_wirs->insert(r), 1); - key++; - val++; - } - - ck_assert_int_eq(ext_wirs->get_record_count(), 300); - ck_assert_int_eq(ext_wirs->get_height(), 1); - - delete ext_wirs; -} -END_TEST - - -/* -START_TEST(t_range_sample_memtable) -{ - auto ext_wirs = new DE(100, 2, 1); - - uint64_t key = 0; - uint32_t val = 0; - for (size_t i=0; i<100; i++) { - WRec r = {key, val, 1}; - ck_assert_int_eq(ext_wirs->insert(r), 1); - key++; - val++; - } - - uint64_t lower_bound = 20; - uint64_t upper_bound = 50; - - char *buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE); - char *util_buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE); - WRec sample_set[100]; - - ext_wirs->range_sample(sample_set, lower_bound, upper_bound, 100); - - for(size_t i=0; i<100; i++) { - ck_assert_int_le(sample_set[i].key, upper_bound); - ck_assert_int_ge(sample_set[i].key, lower_bound); - } - - free(buf); - free(util_buf); - - delete ext_wirs; -} -END_TEST - - -START_TEST(t_range_sample_memlevels) -{ - auto ext_wirs = new DE(100, 2, 1); - - uint64_t key = 0; - uint32_t val = 0; - for (size_t i=0; i<300; i++) { - WRec r = {key, val, 1}; - ck_assert_int_eq(ext_wirs->insert(r), 1); - key++; - val++; - } - - uint64_t lower_bound = 100; - uint64_t upper_bound = 250; - - char *buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE); - char *util_buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE); - - WRec sample_set[100]; - ext_wirs->range_sample(sample_set, lower_bound, upper_bound, 100); - - for(size_t i=0; i<100; i++) { - ck_assert_int_le(sample_set[i].key, upper_bound); - ck_assert_int_ge(sample_set[i].key, lower_bound); - } - - free(buf); - free(util_buf); - - delete ext_wirs; -} -END_TEST -*/ - -START_TEST(t_range_sample_weighted) -{ - auto ext_wirs = new DE(100, 2, 1); - size_t n = 10000; - - std::vector<uint64_t> keys; - - uint64_t key = 1; - for (size_t i=0; i< n / 2; i++) { - keys.push_back(key); - } - - // put in a quarter of the count with weight two. - key = 2; - for (size_t i=0; i< n / 4; i++) { - keys.push_back(key); - } - - // the remaining quarter with weight four. - key = 3; - for (size_t i=0; i< n / 4; i++) { - keys.push_back(key); - } - - std::random_device rd; - std::mt19937 gen{rd()}; - std::shuffle(keys.begin(), keys.end(), gen); - - for (size_t i=0; i<keys.size(); i++) { - uint64_t weight; - if (keys[i] == 1) { - weight = 2; - } else if (keys[i] == 2) { - weight = 4; - } else { - weight = 8; - } - - WRec r = {keys[i], (uint32_t) i, weight}; - ext_wirs->insert(r); - } - size_t k = 1000; - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t cnt[3] = {0}; - size_t total_samples = 0; - - wirs_query_parms<WRec> p; - p.lower_bound = lower_key; - p.upper_bound = upper_key; - p.sample_size = k; - p.rng = gsl_rng_alloc(gsl_rng_mt19937); - - for (size_t i=0; i<1000; i++) { - - auto result = ext_wirs->query(&p); - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].key - 1]++; - } - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .03)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .03)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .03)); - - gsl_rng_free(p.rng); - delete ext_wirs; -} -END_TEST - - -START_TEST(t_tombstone_merging_01) -{ - size_t reccnt = 100000; - auto ext_wirs = new DE(100, 2, .01); - - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - - std::set<std::pair<uint64_t, uint32_t>> records; - std::set<std::pair<uint64_t, uint32_t>> to_delete; - std::set<std::pair<uint64_t, uint32_t>> deleted; - - while (records.size() < reccnt) { - uint64_t key = rand(); - uint32_t val = rand(); - - if (records.find({key, val}) != records.end()) continue; - - records.insert({key, val}); - } - - size_t deletes = 0; - size_t cnt=0; - for (auto rec : records) { - WRec r = {rec.first, rec.second, 1}; - ck_assert_int_eq(ext_wirs->insert(r), 1); - - if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<std::pair<uint64_t, uint32_t>> del_vec; - std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); - - for (size_t i=0; i<del_vec.size(); i++) { - WRec dr = {del_vec[i].first, del_vec[i].second, 1}; - ext_wirs->erase(dr); - deletes++; - to_delete.erase(del_vec[i]); - deleted.insert(del_vec[i]); - } - } - - if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { - to_delete.insert(rec); - } - - ck_assert(ext_wirs->validate_tombstone_proportion()); - } - - ck_assert(ext_wirs->validate_tombstone_proportion()); - - gsl_rng_free(rng); - delete ext_wirs; -} -END_TEST - -DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - - auto ext_wirs = new DE(1000, 2, 1); - - std::set<WRec> records; - std::set<WRec> to_delete; - std::set<WRec> deleted; - - while (records.size() < reccnt) { - uint64_t key = rand(); - uint32_t val = rand(); - - if (records.find({key, val}) != records.end()) continue; - - records.insert({key, val}); - } - - size_t deletes = 0; - for (auto rec : records) { - ck_assert_int_eq(ext_wirs->insert(rec), 1); - - if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<WRec> del_vec; - std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); - - for (size_t i=0; i<del_vec.size(); i++) { - ext_wirs->erase(del_vec[i]); - deletes++; - to_delete.erase(del_vec[i]); - deleted.insert(del_vec[i]); - } - } - - if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { - to_delete.insert(rec); - } - } - - gsl_rng_free(rng); - - return ext_wirs; -} - -START_TEST(t_static_structure) -{ - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t reccnt = 100000; - auto ext_wirs = new DE(100, 2, 1); - - std::set<WRec> records; - std::set<WRec> to_delete; - std::set<WRec> deleted; - - while (records.size() < reccnt) { - uint64_t key = rand(); - uint32_t val = rand(); - - if (records.find({key, val}) != records.end()) continue; - - records.insert({key, val, 1}); - } - - size_t deletes = 0; - for (auto rec : records) { - ck_assert_int_eq(ext_wirs->insert(rec), 1); - - if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<WRec> del_vec; - std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); - - for (size_t i=0; i<del_vec.size(); i++) { - ck_assert_int_eq(ext_wirs->erase(del_vec[i]), 1); - - deletes++; - to_delete.erase(del_vec[i]); - deleted.insert(del_vec[i]); - } - } - - if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { - to_delete.insert(rec); - } - } - - auto flat = ext_wirs->create_static_structure(); - ck_assert_int_eq(flat->get_record_count(), reccnt - deletes); - - uint64_t prev_key = 0; - for (size_t i=0; i<flat->get_record_count(); i++) { - auto k = flat->get_record_at(i)->rec.key; - ck_assert_int_ge(k, prev_key); - prev_key = k; - } - - gsl_rng_free(rng); - delete flat; - delete ext_wirs; -} -END_TEST - - -Suite *unit_testing() -{ - Suite *unit = suite_create("de::DynamicExtension Unit Testing"); - - TCase *create = tcase_create("de::DynamicExtension::constructor Testing"); - tcase_add_test(create, t_create); - suite_add_tcase(unit, create); - - TCase *insert = tcase_create("de::DynamicExtension<WIRS>::insert Testing"); - tcase_add_test(insert, t_insert); - tcase_add_test(insert, t_insert_with_mem_merges); - suite_add_tcase(unit, insert); - - TCase *sampling = tcase_create("de::DynamicExtension<WIRS>::range_sample Testing"); - - tcase_add_test(sampling, t_range_sample_weighted); - suite_add_tcase(unit, sampling); - - /* - tcase_add_test(sampling, t_range_sample_memtable); - tcase_add_test(sampling, t_range_sample_memlevels); - */ - - TCase *ts = tcase_create("de::DynamicExtension::tombstone_compaction Testing"); - tcase_add_test(ts, t_tombstone_merging_01); - tcase_set_timeout(ts, 500); - suite_add_tcase(unit, ts); - - TCase *flat = tcase_create("de::DynamicExtension::create_static_structure Testing"); - tcase_add_test(flat, t_static_structure); - tcase_set_timeout(flat, 500); - suite_add_tcase(unit, flat); - - return unit; -} - -int run_unit_tests() -{ - int failed = 0; - Suite *unit = unit_testing(); - SRunner *unit_runner = srunner_create(unit); - - srunner_run_all(unit_runner, CK_NORMAL); - failed = srunner_ntests_failed(unit_runner); - srunner_free(unit_runner); - - return failed; -} - - -int main() -{ - int unit_failed = run_unit_tests(); - - return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/include/concurrent_extension.h b/tests/include/concurrent_extension.h new file mode 100644 index 0000000..0993fac --- /dev/null +++ b/tests/include/concurrent_extension.h @@ -0,0 +1,396 @@ +/* + * tests/include/dynamic_extension.h + * + * Standardized unit tests for DynamicExtension objects + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard, Query, and R + * type. In particular, R needs to implement the key-value + * pair interface. For other types of record, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +/*#include "testing.h" +#include "framework/DynamicExtension.h" +#include "framework/scheduling/FIFOScheduler.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include <check.h> + +//using namespace de; +//typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; +*/ + + +START_TEST(t_create) +{ + auto test_de = new DE(100, 1000, 2); + + ck_assert_ptr_nonnull(test_de); + ck_assert_int_eq(test_de->get_record_count(), 0); + ck_assert_int_eq(test_de->get_height(), 0); + + delete test_de; +} +END_TEST + + +START_TEST(t_insert) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + for (size_t i=0; i<100; i++) { + R r = {key, val}; + ck_assert_int_eq(test_de->insert(r), 1); + key++; + val++; + } + + ck_assert_int_eq(test_de->get_height(), 0); + ck_assert_int_eq(test_de->get_record_count(), 100); + + delete test_de; +} +END_TEST + + +START_TEST(t_debug_insert) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + for (size_t i=0; i<1000; i++) { + R r = {key, val}; + ck_assert_int_eq(test_de->insert(r), 1); + ck_assert_int_eq(test_de->get_record_count(), i+1); + key++; + val++; + } + + delete test_de; +} +END_TEST + + +START_TEST(t_insert_with_mem_merges) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + + R r = {key, val}; + for (size_t i=0; i<1000; i++) { + ck_assert_int_eq(test_de->insert(r), 1); + r.key++; + r.value++; + } + + ck_assert_int_eq(test_de->get_record_count(), 1000); + + test_de->await_next_epoch(); + + ck_assert_int_eq(test_de->get_record_count(), 1000); + + /* + * verify that we can fill past the high water mark, potentially + * stalling to allow merges to finish as needed. + */ + size_t cnt = 0; + do { + if (test_de->insert(r)) { + r.key++; + r.value++; + cnt++; + ck_assert_int_eq(test_de->get_record_count(), cnt + 1000); + } else { + _mm_pause(); + } + } while (cnt < 100000); + + test_de->await_next_epoch(); + + ck_assert_int_eq(test_de->get_record_count(), 101000); + + delete test_de; +} +END_TEST + + +START_TEST(t_range_query) +{ + auto test_de = new DE(1000, 10000, 4); + size_t n = 10000000; + + std::vector<uint64_t> keys; + for (size_t i=0; i<n; i++) { + keys.push_back(i); + } + + std::random_device rd; + std::mt19937 gen{rd()}; + std::shuffle(keys.begin(), keys.end(), gen); + + size_t i=0; + while ( i < keys.size()) { + R r = {keys[i], (uint32_t) i}; + if (test_de->insert(r)) { + i++; + } else { + _mm_pause(); + } + } + + + test_de->await_next_epoch(); + + std::sort(keys.begin(), keys.end()); + + auto idx = rand() % (keys.size() - 250); + + uint64_t lower_key = keys[idx]; + uint64_t upper_key = keys[idx + 250]; + + rq::Parms<R> p; + p.lower_bound = lower_key; + p.upper_bound = upper_key; + + //fprintf(stderr, "query start\n"); + auto result = test_de->query(&p); + auto r = result.get(); + //fprintf(stderr, "query stop\n"); + std::sort(r.begin(), r.end()); + + ck_assert_int_eq(r.size(), 251); + + for (size_t i=0; i<r.size(); i++) { + ck_assert_int_eq(r[i].key, keys[idx + i]); + } + + delete test_de; +} +END_TEST + + +START_TEST(t_tombstone_merging_01) +{ + size_t reccnt = 100000; + auto test_de = new DE(100, 1000, 2); + + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::set<std::pair<uint64_t, uint32_t>> records; + std::set<std::pair<uint64_t, uint32_t>> to_delete; + std::set<std::pair<uint64_t, uint32_t>> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + size_t cnt=0; + for (auto rec : records) { + R r = {rec.first, rec.second}; + while (!test_de->insert(r)) { + _mm_pause(); + } + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<std::pair<uint64_t, uint32_t>> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + R dr = {del_vec[i].first, del_vec[i].second}; + while (!test_de->erase(dr)) { + _mm_pause(); + } + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + test_de->await_next_epoch(); + + ck_assert(test_de->validate_tombstone_proportion()); + + gsl_rng_free(rng); + delete test_de; +} +END_TEST + +DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto test_de = new DE(1000, 10000, 2); + + std::set<R> records; + std::set<R> to_delete; + std::set<R> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + for (auto rec : records) { + ck_assert_int_eq(test_de->insert(rec), 1); + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<R> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + test_de->erase(del_vec[i]); + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + gsl_rng_free(rng); + + return test_de; +} + +START_TEST(t_static_structure) +{ + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + size_t reccnt = 100000; + auto test_de = new DE(100, 1000, 2); + + std::set<R> records; + std::set<R> to_delete; + std::set<R> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + size_t t_reccnt = 0; + size_t k=0; + for (auto rec : records) { + k++; + while (!test_de->insert(rec)) { + _mm_pause(); + } + t_reccnt++; + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<R> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + while (!test_de->erase(del_vec[i])) { + _mm_pause(); + } + + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + + //fprintf(stderr, "Tombstones: %ld\tRords: %ld\n", test_de->get_tombstone_count(), test_de->get_record_count()); + //fprintf(stderr, "Inserts: %ld\tDeletes:%ld\tNet:%ld\n", reccnt, deletes, reccnt - deletes); + + auto flat = test_de->create_static_structure(true); + //fprintf(stderr, "Flat: Tombstones: %ld\tRords %ld\n", flat->get_tombstone_count(), flat->get_record_count()); + //ck_assert_int_eq(flat->get_record_count(), reccnt - deletes); + + uint64_t prev_key = 0; + for (size_t i=0; i<flat->get_record_count(); i++) { + auto k = flat->get_record_at(i)->rec.key; + if (flat->get_record_at(i)->is_tombstone()) { + fprintf(stderr, "%ld %ld %ld\n", flat->get_record_at(i-1)->rec.key, + flat->get_record_at(i)->rec.key, + flat->get_record_at(i+1)->rec.key); + } + // ck_assert(!flat->get_record_at(i)->is_tombstone()); + ck_assert_int_ge(k, prev_key); + prev_key = k; + } + + gsl_rng_free(rng); + delete flat; + delete test_de; +} +END_TEST + + +static void inject_dynamic_extension_tests(Suite *suite) { + TCase *create = tcase_create("de::DynamicExtension::constructor Testing"); + tcase_add_test(create, t_create); + suite_add_tcase(suite, create); + + TCase *insert = tcase_create("de::DynamicExtension::insert Testing"); + tcase_add_test(insert, t_insert); + tcase_add_test(insert, t_insert_with_mem_merges); + tcase_add_test(insert, t_debug_insert); + tcase_set_timeout(insert, 500); + suite_add_tcase(suite, insert); + + TCase *query = tcase_create("de::DynamicExtension::range_query Testing"); + tcase_add_test(query, t_range_query); + tcase_set_timeout(query, 500); + suite_add_tcase(suite, query); + + + TCase *ts = tcase_create("de::DynamicExtension::tombstone_compaction Testing"); + tcase_add_test(ts, t_tombstone_merging_01); + tcase_set_timeout(ts, 500); + suite_add_tcase(suite, ts); + + TCase *flat = tcase_create("de::DynamicExtension::create_static_structure Testing"); + tcase_add_test(flat, t_static_structure); + tcase_set_timeout(flat, 500); + suite_add_tcase(suite, flat); +} diff --git a/tests/include/dynamic_extension.h b/tests/include/dynamic_extension.h new file mode 100644 index 0000000..6e9b16c --- /dev/null +++ b/tests/include/dynamic_extension.h @@ -0,0 +1,343 @@ +/* + * tests/include/dynamic_extension.h + * + * Standardized unit tests for DynamicExtension objects + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard, Query, and R + * type. In particular, R needs to implement the key-value + * pair interface. For other types of record, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +/* +#include "testing.h" +#include "framework/DynamicExtension.h" +#include "framework/scheduling/SerialScheduler.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include <check.h> +using namespace de; +typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; +*/ + + +START_TEST(t_create) +{ + auto test_de = new DE(100, 1000, 2); + + ck_assert_ptr_nonnull(test_de); + ck_assert_int_eq(test_de->get_record_count(), 0); + ck_assert_int_eq(test_de->get_height(), 0); + + delete test_de; +} +END_TEST + + +START_TEST(t_insert) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + for (size_t i=0; i<100; i++) { + R r = {key, val}; + ck_assert_int_eq(test_de->insert(r), 1); + key++; + val++; + } + + ck_assert_int_eq(test_de->get_height(), 0); + ck_assert_int_eq(test_de->get_record_count(), 100); + + delete test_de; +} +END_TEST + + +START_TEST(t_debug_insert) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + for (size_t i=0; i<1000; i++) { + R r = {key, val}; + ck_assert_int_eq(test_de->insert(r), 1); + ck_assert_int_eq(test_de->get_record_count(), i+1); + key++; + val++; + } + + delete test_de; +} +END_TEST + + +START_TEST(t_insert_with_mem_merges) +{ + auto test_de = new DE(100, 1000, 2); + + uint64_t key = 0; + uint32_t val = 0; + for (size_t i=0; i<300; i++) { + R r = {key, val}; + ck_assert_int_eq(test_de->insert(r), 1); + key++; + val++; + } + + test_de->await_next_epoch(); + + ck_assert_int_eq(test_de->get_record_count(), 300); + ck_assert_int_eq(test_de->get_height(), 1); + + delete test_de; +} +END_TEST + + +START_TEST(t_range_query) +{ + auto test_de = new DE(100, 1000, 2); + size_t n = 10000; + + std::vector<uint64_t> keys; + for (size_t i=0; i<n; i++) { + keys.push_back(rand() % 25000); + } + + std::random_device rd; + std::mt19937 gen{rd()}; + std::shuffle(keys.begin(), keys.end(), gen); + + for (size_t i=0; i<keys.size(); i++) { + R r = {keys[i], (uint32_t) i}; + ck_assert_int_eq(test_de->insert(r), 1); + } + + test_de->await_next_epoch(); + + std::sort(keys.begin(), keys.end()); + + auto idx = rand() % (keys.size() - 250); + + uint64_t lower_key = keys[idx]; + uint64_t upper_key = keys[idx + 250]; + + rq::Parms<R> p; + p.lower_bound = lower_key; + p.upper_bound = upper_key; + + auto result = test_de->query(&p); + auto r = result.get(); + std::sort(r.begin(), r.end()); + ck_assert_int_eq(r.size(), 251); + + for (size_t i=0; i<r.size(); i++) { + ck_assert_int_eq(r[i].key, keys[idx + i]); + } + + delete test_de; +} +END_TEST + + +START_TEST(t_tombstone_merging_01) +{ + size_t reccnt = 100000; + auto test_de = new DE(100, 1000, 2); + + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::set<std::pair<uint64_t, uint32_t>> records; + std::set<std::pair<uint64_t, uint32_t>> to_delete; + std::set<std::pair<uint64_t, uint32_t>> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + size_t cnt=0; + for (auto rec : records) { + R r = {rec.first, rec.second}; + ck_assert_int_eq(test_de->insert(r), 1); + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<std::pair<uint64_t, uint32_t>> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + R dr = {del_vec[i].first, del_vec[i].second}; + test_de->erase(dr); + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + test_de->await_next_epoch(); + + ck_assert(test_de->validate_tombstone_proportion()); + + gsl_rng_free(rng); + delete test_de; +} +END_TEST + +DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto test_de = new DE(1000, 10000, 2); + + std::set<R> records; + std::set<R> to_delete; + std::set<R> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + for (auto rec : records) { + ck_assert_int_eq(test_de->insert(rec), 1); + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<R> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + test_de->erase(del_vec[i]); + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + gsl_rng_free(rng); + + return test_de; +} + +START_TEST(t_static_structure) +{ + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + size_t reccnt = 100000; + auto test_de = new DE(100, 1000, 2); + + std::set<R> records; + std::set<R> to_delete; + std::set<R> deleted; + + while (records.size() < reccnt) { + uint64_t key = rand(); + uint32_t val = rand(); + + if (records.find({key, val}) != records.end()) continue; + + records.insert({key, val}); + } + + size_t deletes = 0; + size_t t_reccnt = 0; + size_t k=0; + for (auto rec : records) { + k++; + ck_assert_int_eq(test_de->insert(rec), 1); + t_reccnt++; + + if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { + std::vector<R> del_vec; + std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); + + for (size_t i=0; i<del_vec.size(); i++) { + ck_assert_int_eq(test_de->erase(del_vec[i]), 1); + + deletes++; + to_delete.erase(del_vec[i]); + deleted.insert(del_vec[i]); + } + } + + if (gsl_rng_uniform(rng) < 0.25 && deleted.find(rec) == deleted.end()) { + to_delete.insert(rec); + } + } + + auto flat = test_de->create_static_structure(); + ck_assert_int_eq(flat->get_record_count(), reccnt - deletes); + + uint64_t prev_key = 0; + for (size_t i=0; i<flat->get_record_count(); i++) { + auto k = flat->get_record_at(i)->rec.key; + ck_assert_int_ge(k, prev_key); + prev_key = k; + } + + gsl_rng_free(rng); + delete flat; + delete test_de; +} +END_TEST + + +static void inject_dynamic_extension_tests(Suite *suite) { + TCase *create = tcase_create("de::DynamicExtension::constructor Testing"); + tcase_add_test(create, t_create); + suite_add_tcase(suite, create); + + TCase *insert = tcase_create("de::DynamicExtension::insert Testing"); + tcase_add_test(insert, t_insert); + tcase_add_test(insert, t_insert_with_mem_merges); + tcase_add_test(insert, t_debug_insert); + suite_add_tcase(suite, insert); + + TCase *query = tcase_create("de::DynamicExtension::range_query Testing"); + tcase_add_test(query, t_range_query); + suite_add_tcase(suite, query); + + TCase *ts = tcase_create("de::DynamicExtension::tombstone_compaction Testing"); + tcase_add_test(ts, t_tombstone_merging_01); + tcase_set_timeout(ts, 500); + suite_add_tcase(suite, ts); + + TCase *flat = tcase_create("de::DynamicExtension::create_static_structure Testing"); + tcase_add_test(flat, t_static_structure); + tcase_set_timeout(flat, 500); + suite_add_tcase(suite, flat); +} diff --git a/tests/include/rangecount.h b/tests/include/rangecount.h new file mode 100644 index 0000000..fdd66d9 --- /dev/null +++ b/tests/include/rangecount.h @@ -0,0 +1,162 @@ +/* + * tests/include/rangecount.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +//#include "shard/ISAMTree.h" +//#include "query/rangecount.h" +//#include "testing.h" +//#include <check.h> +//using namespace de; +//typedef ISAMTree<R> Shard; + + +#include "query/rangecount.h" + +START_TEST(t_range_count) +{ + + auto buffer = create_sequential_mbuffer<R>(100, 1000); + auto shard = Shard(buffer->get_buffer_view()); + + rc::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + auto state = rc::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = rc::Query<R, Shard>::query(&shard, state, &parms); + rc::Query<R, Shard>::delete_query_state(state); + + ck_assert_int_eq(result.size(), 1); + ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_range_count) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + + rc::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + { + auto view = buffer->get_buffer_view(); + auto state = rc::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = rc::Query<R, Shard>::buffer_query(state, &parms); + rc::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), 1); + ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_range_count_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + rc::Parms<R> parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + + size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; + + auto state1 = rc::Query<R, Shard>::get_query_state(&shard1, &parms); + auto state2 = rc::Query<R, Shard>::get_query_state(&shard2, &parms); + + std::vector<std::vector<de::Wrapped<R>>> results(2); + results[0] = rc::Query<R, Shard>::query(&shard1, state1, &parms); + results[1] = rc::Query<R, Shard>::query(&shard2, state2, &parms); + + rc::Query<R, Shard>::delete_query_state(state1); + rc::Query<R, Shard>::delete_query_state(state2); + + ck_assert_int_eq(results[0].size(), 1); + ck_assert_int_eq(results[1].size(), 1); + + auto result = rc::Query<R, Shard>::merge(results, nullptr); + + ck_assert_int_eq(result[0].key, result_size); + + delete buffer1; + delete buffer2; +} +END_TEST + + +START_TEST(t_lower_bound) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = new Shard(buffer1->get_buffer_view()); + auto shard2 = new Shard(buffer2->get_buffer_view()); + + std::vector<Shard*> shards = {shard1, shard2}; + + auto merged = Shard(shards); + + for (size_t i=100; i<1000; i++) { + R r; + r.key = i; + r.value = i; + + auto idx = merged.get_lower_bound(i); + + assert(idx < merged.get_record_count()); + + auto res = merged.get_record_at(idx); + + if (i >=200 && i <400) { + ck_assert_int_lt(res->rec.key, i); + } else { + ck_assert_int_eq(res->rec.key, i); + } + } + + delete buffer1; + delete buffer2; + delete shard1; + delete shard2; +} +END_TEST + +static void inject_rangecount_tests(Suite *suite) { + TCase *range_count = tcase_create("Range Query Testing"); + tcase_add_test(range_count, t_range_count); + tcase_add_test(range_count, t_buffer_range_count); + tcase_add_test(range_count, t_range_count_merge); + suite_add_tcase(suite, range_count); +} diff --git a/tests/include/rangequery.h b/tests/include/rangequery.h new file mode 100644 index 0000000..a8a73f7 --- /dev/null +++ b/tests/include/rangequery.h @@ -0,0 +1,183 @@ +/* + * tests/include/rangequery.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +//#include "shard/ISAMTree.h" +//#include "query/rangequery.h" +//#include "testing.h" +//#include <check.h> +//using namespace de; +//typedef ISAMTree<R> Shard; + +#include "query/rangequery.h" + + +START_TEST(t_range_query) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + auto shard = Shard(buffer->get_buffer_view()); + + rq::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = rq::Query<R, Shard>::query(&shard, state, &parms); + rq::Query<R, Shard>::delete_query_state(state); + + ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_range_query) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + + rq::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + { + auto view = buffer->get_buffer_view(); + auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = rq::Query<R, Shard>::buffer_query(state, &parms); + rq::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + } + + delete buffer; +} +END_TEST + + +START_TEST(t_range_query_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + rq::Parms<R> parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + + size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; + + auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); + auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); + + std::vector<std::vector<de::Wrapped<R>>> results(2); + results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); + results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); + + rq::Query<R, Shard>::delete_query_state(state1); + rq::Query<R, Shard>::delete_query_state(state2); + + ck_assert_int_eq(results[0].size() + results[1].size(), result_size); + + std::vector<std::vector<Wrapped<R>>> proc_results; + + for (size_t j=0; j<results.size(); j++) { + proc_results.emplace_back(std::vector<Wrapped<R>>()); + for (size_t i=0; i<results[j].size(); i++) { + proc_results[j].emplace_back(results[j][i]); + } + } + + auto result = rq::Query<R, Shard>::merge(proc_results, nullptr); + std::sort(result.begin(), result.end()); + + ck_assert_int_eq(result.size(), result_size); + auto key = parms.lower_bound; + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_eq(key++, result[i].key); + if (key == 200) { + key = 400; + } + } + + delete buffer1; + delete buffer2; +} +END_TEST + + +START_TEST(t_lower_bound) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = new Shard(buffer1->get_buffer_view()); + auto shard2 = new Shard(buffer2->get_buffer_view()); + + std::vector<Shard*> shards = {shard1, shard2}; + + auto merged = Shard(shards); + + for (size_t i=100; i<1000; i++) { + R r; + r.key = i; + r.value = i; + + auto idx = merged.get_lower_bound(i); + + assert(idx < merged.get_record_count()); + + auto res = merged.get_record_at(idx); + + if (i >=200 && i <400) { + ck_assert_int_lt(res->rec.key, i); + } else { + ck_assert_int_eq(res->rec.key, i); + } + } + + delete buffer1; + delete buffer2; + delete shard1; + delete shard2; +} +END_TEST + +static void inject_rangequery_tests(Suite *suite) { + TCase *range_query = tcase_create("Range Query Testing"); + tcase_add_test(range_query, t_range_query); + tcase_add_test(range_query, t_buffer_range_query); + tcase_add_test(range_query, t_range_query_merge); + suite_add_tcase(suite, range_query); +} diff --git a/tests/include/shard_standard.h b/tests/include/shard_standard.h new file mode 100644 index 0000000..7d17dcb --- /dev/null +++ b/tests/include/shard_standard.h @@ -0,0 +1,202 @@ +/* + * tests/include/shard_standard.h + * + * Standardized unit tests for Shard objects + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface. For other types of record, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +/* +#include "shard/ISAMTree.h" +#include "shard/ISAMTree.h" +#include "testing.h" +#include <check.h> +using namespace de; +typedef Rec R; +typedef ISAMTree<R> Shard; +*/ + +START_TEST(t_mbuffer_init) +{ + auto buffer = new MutableBuffer<R>(512, 1024); + for (uint64_t i = 512; i > 0; i--) { + uint32_t v = i; + buffer->append({i, v, 1}); + } + + for (uint64_t i = 1; i <= 256; ++i) { + uint32_t v = i; + buffer->append({i, v, 1}, true); + } + + for (uint64_t i = 257; i <= 512; ++i) { + uint32_t v = i + 1; + buffer->append({i, v, 1}); + } + + Shard* shard = new Shard(buffer->get_buffer_view()); + ck_assert_uint_eq(shard->get_record_count(), 512); + + delete buffer; + delete shard; +} + + +START_TEST(t_shard_init) +{ + size_t n = 512; + auto mbuffer1 = create_test_mbuffer<R>(n); + auto mbuffer2 = create_test_mbuffer<R>(n); + auto mbuffer3 = create_test_mbuffer<R>(n); + + auto shard1 = new Shard(mbuffer1->get_buffer_view()); + auto shard2 = new Shard(mbuffer2->get_buffer_view()); + auto shard3 = new Shard(mbuffer3->get_buffer_view()); + + std::vector<Shard*> shards = {shard1, shard2, shard3}; + auto shard4 = new Shard(shards); + + ck_assert_int_eq(shard4->get_record_count(), n * 3); + ck_assert_int_eq(shard4->get_tombstone_count(), 0); + + size_t total_cnt = 0; + size_t shard1_idx = 0; + size_t shard2_idx = 0; + size_t shard3_idx = 0; + + for (size_t i = 0; i < shard4->get_record_count(); ++i) { + auto rec1 = shard1->get_record_at(shard1_idx); + auto rec2 = shard2->get_record_at(shard2_idx); + auto rec3 = shard3->get_record_at(shard3_idx); + + auto cur_rec = shard4->get_record_at(i); + + if (shard1_idx < n && cur_rec->rec == rec1->rec) { + ++shard1_idx; + } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { + ++shard2_idx; + } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { + ++shard3_idx; + } else { + assert(false); + } + } + + delete mbuffer1; + delete mbuffer2; + delete mbuffer3; + + delete shard1; + delete shard2; + delete shard3; + delete shard4; +} + + +START_TEST(t_full_cancelation) +{ + size_t n = 100; + auto buffer = create_double_seq_mbuffer<R>(n, false); + auto buffer_ts = create_double_seq_mbuffer<R>(n, true); + + Shard* shard = new Shard(buffer->get_buffer_view()); + Shard* shard_ts = new Shard(buffer_ts->get_buffer_view()); + + ck_assert_int_eq(shard->get_record_count(), n); + ck_assert_int_eq(shard->get_tombstone_count(), 0); + ck_assert_int_eq(shard_ts->get_record_count(), n); + ck_assert_int_eq(shard_ts->get_tombstone_count(), n); + + std::vector<Shard *> shards = {shard, shard_ts}; + + Shard* merged = new Shard(shards); + + ck_assert_int_eq(merged->get_tombstone_count(), 0); + ck_assert_int_eq(merged->get_record_count(), 0); + + delete buffer; + delete buffer_ts; + delete shard; + delete shard_ts; + delete merged; +} +END_TEST + + +START_TEST(t_point_lookup) +{ + size_t n = 10000; + + auto buffer = create_double_seq_mbuffer<R>(n, false); + auto isam = Shard(buffer->get_buffer_view()); + + { + auto view = buffer->get_buffer_view(); + + for (size_t i=0; i<n; i++) { + R r; + auto rec = view.get(i); + r.key = rec->rec.key; + r.value = rec->rec.value; + + auto result = isam.point_lookup(r); + ck_assert_ptr_nonnull(result); + ck_assert_int_eq(result->rec.key, r.key); + ck_assert_int_eq(result->rec.value, r.value); + } + } + + delete buffer; +} +END_TEST + + +START_TEST(t_point_lookup_miss) +{ + size_t n = 10000; + + auto buffer = create_double_seq_mbuffer<R>(n, false); + auto isam = Shard(buffer->get_buffer_view()); + + for (size_t i=n + 100; i<2*n; i++) { + R r; + r.key = i; + r.value = i; + + auto result = isam.point_lookup(r); + ck_assert_ptr_null(result); + } + + delete buffer; +} + +static void inject_shard_tests(Suite *suite) { + TCase *create = tcase_create("Shard constructor Testing"); + tcase_add_test(create, t_mbuffer_init); + tcase_add_test(create, t_shard_init); + tcase_set_timeout(create, 100); + suite_add_tcase(suite, create); + TCase *tombstone = tcase_create("Shard tombstone cancellation Testing"); + tcase_add_test(tombstone, t_full_cancelation); + suite_add_tcase(suite, tombstone); + TCase *pointlookup = tcase_create("Shard point lookup Testing"); + tcase_add_test(pointlookup, t_point_lookup); + tcase_add_test(pointlookup, t_point_lookup_miss); + suite_add_tcase(suite, pointlookup); +} diff --git a/tests/testing.h b/tests/include/testing.h index bdf4869..f935b53 100644 --- a/tests/testing.h +++ b/tests/include/testing.h @@ -6,7 +6,7 @@ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #pragma once @@ -18,12 +18,12 @@ #include "util/types.h" #include "psu-util/alignment.h" -#include "framework/MutableBuffer.h" -#include "framework/RecordInterface.h" +#include "framework/structure/MutableBuffer.h" +#include "framework/interface/Record.h" typedef de::WeightedRecord<uint64_t, uint32_t, uint64_t> WRec; typedef de::Record<uint64_t, uint32_t> Rec; -typedef de::EuclidPoint<int64_t> PRec; +typedef de::EuclidPoint<uint64_t> PRec; template <de::RecordInterface R> std::vector<R> strip_wrapping(std::vector<de::Wrapped<R>> vec) { @@ -76,55 +76,48 @@ static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) { return ((double) std::abs(n1 - n2) / (double) mag) < epsilon; } -static de::MutableBuffer<PRec> *create_2d_mbuffer(size_t cnt) { - auto buffer = new de::MutableBuffer<PRec>(cnt, cnt); - - for (int64_t i=0; i<cnt; i++) { - buffer->append({rand(), rand()}); - } - - return buffer; -} - -static de::MutableBuffer<PRec> *create_2d_sequential_mbuffer(size_t cnt) { - auto buffer = new de::MutableBuffer<PRec>(cnt, cnt); - for (int64_t i=0; i<cnt; i++) { - buffer->append({i, i}); - } - - return buffer; -} - -template <de::KVPInterface R> +template <de::RecordInterface R> static de::MutableBuffer<R> *create_test_mbuffer(size_t cnt) { - auto buffer = new de::MutableBuffer<R>(cnt, cnt); + auto buffer = new de::MutableBuffer<R>(cnt/2, cnt); R rec; - for (size_t i = 0; i < cnt; i++) { - rec.key = rand(); - rec.value = rand(); + if constexpr (de::KVPInterface<R>) { + for (size_t i = 0; i < cnt; i++) { + rec.key = rand(); + rec.value = rand(); - if constexpr (de::WeightedRecordInterface<R>) { - rec.weight = 1; - } + if constexpr (de::WeightedRecordInterface<R>) { + rec.weight = 1; + } - buffer->append(rec); - } + buffer->append(rec); + } + } else if constexpr (de::NDRecordInterface<R>) { + for (size_t i=0; i<cnt; i++) { + uint64_t a = rand(); + uint64_t b = rand(); + buffer->append({a, b}); + } + } return buffer; } -template <de::KVPInterface R> -static de::MutableBuffer<R> *create_sequential_mbuffer(decltype(R::key) start, decltype(R::key) stop) +template <de::RecordInterface R> +static de::MutableBuffer<R> *create_sequential_mbuffer(size_t start, size_t stop) { size_t cnt = stop - start; - auto buffer = new de::MutableBuffer<R>(cnt, cnt); + auto buffer = new de::MutableBuffer<R>(cnt/2, cnt); for (size_t i=start; i<stop; i++) { R rec; - rec.key = i; - rec.value = i; + if constexpr (de::KVPInterface<R>) { + rec.key = i; + rec.value = i; + } else if constexpr (de::NDRecordInterface<R>) { + rec = {i, i}; + } if constexpr (de::WeightedRecordInterface<R>) { rec.weight = 1; @@ -139,7 +132,7 @@ static de::MutableBuffer<R> *create_sequential_mbuffer(decltype(R::key) start, d template <de::KVPInterface R> static de::MutableBuffer<R> *create_test_mbuffer_tombstones(size_t cnt, size_t ts_cnt) { - auto buffer = new de::MutableBuffer<R>(cnt, ts_cnt); + auto buffer = new de::MutableBuffer<R>(cnt/2, cnt); std::vector<std::pair<uint64_t, uint32_t>> tombstones; @@ -171,7 +164,7 @@ template <typename R> requires de::WeightedRecordInterface<R> && de::KVPInterface<R> static de::MutableBuffer<R> *create_weighted_mbuffer(size_t cnt) { - auto buffer = new de::MutableBuffer<R>(cnt, cnt); + auto buffer = new de::MutableBuffer<R>(cnt/2, cnt); // Put in half of the count with weight one. for (uint32_t i=0; i< cnt / 2; i++) { @@ -194,7 +187,7 @@ static de::MutableBuffer<R> *create_weighted_mbuffer(size_t cnt) template <de::KVPInterface R> static de::MutableBuffer<R> *create_double_seq_mbuffer(size_t cnt, bool ts=false) { - auto buffer = new de::MutableBuffer<R>(cnt, cnt); + auto buffer = new de::MutableBuffer<R>(cnt/2, cnt); for (size_t i = 0; i < cnt / 2; i++) { R rec; diff --git a/tests/include/wirs.h b/tests/include/wirs.h new file mode 100644 index 0000000..90cd22d --- /dev/null +++ b/tests/include/wirs.h @@ -0,0 +1,181 @@ +/* + * tests/include/rangequery.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +//#include "shard/ISAMTree.h" +//#include "query/rangequery.h" +//#include "testing.h" +//#include <check.h> +//using namespace de; +//typedef ISAMTree<R> Shard; + + +START_TEST(t_range_query) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + auto shard = Shard(buffer->get_buffer_view()); + + rq::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = rq::Query<R, Shard>::query(&shard, state, &parms); + rq::Query<R, Shard>::delete_query_state(state); + + ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_range_query) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + + rq::Parms<R> parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + + { + auto view = buffer->get_buffer_view(); + auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = rq::Query<R, Shard>::buffer_query(state, &parms); + rq::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + } + + delete buffer; +} +END_TEST + + +START_TEST(t_range_query_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + rq::Parms<R> parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + + size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; + + auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); + auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); + + std::vector<std::vector<de::Wrapped<R>>> results(2); + results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); + results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); + + rq::Query<R, Shard>::delete_query_state(state1); + rq::Query<R, Shard>::delete_query_state(state2); + + ck_assert_int_eq(results[0].size() + results[1].size(), result_size); + + std::vector<std::vector<Wrapped<R>>> proc_results; + + for (size_t j=0; j<results.size(); j++) { + proc_results.emplace_back(std::vector<Wrapped<R>>()); + for (size_t i=0; i<results[j].size(); i++) { + proc_results[j].emplace_back(results[j][i]); + } + } + + auto result = rq::Query<R, Shard>::merge(proc_results, nullptr); + std::sort(result.begin(), result.end()); + + ck_assert_int_eq(result.size(), result_size); + auto key = parms.lower_bound; + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_eq(key++, result[i].key); + if (key == 200) { + key = 400; + } + } + + delete buffer1; + delete buffer2; +} +END_TEST + + +START_TEST(t_lower_bound) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = new Shard(buffer1->get_buffer_view()); + auto shard2 = new Shard(buffer2->get_buffer_view()); + + std::vector<Shard*> shards = {shard1, shard2}; + + auto merged = Shard(shards); + + for (size_t i=100; i<1000; i++) { + R r; + r.key = i; + r.value = i; + + auto idx = merged.get_lower_bound(i); + + assert(idx < merged.get_record_count()); + + auto res = merged.get_record_at(idx); + + if (i >=200 && i <400) { + ck_assert_int_lt(res->rec.key, i); + } else { + ck_assert_int_eq(res->rec.key, i); + } + } + + delete buffer1; + delete buffer2; + delete shard1; + delete shard2; +} +END_TEST + +static void inject_rangequery_tests(Suite *suite) { + TCase *range_query = tcase_create("Range Query Testing"); + tcase_add_test(range_query, t_range_query); + tcase_add_test(range_query, t_buffer_range_query); + tcase_add_test(range_query, t_range_query_merge); + suite_add_tcase(suite, range_query); +} diff --git a/tests/include/wss.h b/tests/include/wss.h new file mode 100644 index 0000000..f0ac74c --- /dev/null +++ b/tests/include/wss.h @@ -0,0 +1,144 @@ +/* + * tests/include/rangequery.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +#include "shard/Alias.h" +#include "testing.h" +#include <check.h> +using namespace de; +typedef Alias<R> Shard; + +#include "query/wss.h" + +START_TEST(t_wss_query) +{ + auto buffer = create_weighted_mbuffer<R>(1000); + auto shard = Shard(buffer->get_buffer_view()); + + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + wss::Parms<R> parms; + parms.rng = rng; + parms.sample_size = 20; + + auto state = wss::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = wss::Query<R, Shard>::query(&shard, state, &parms); + wss::Query<R, Shard>::delete_query_state(state); + + delete buffer; + gsl_rng_free(rng); +} +END_TEST + + +START_TEST(t_buffer_wss_query) +{ + auto buffer = create_weighted_mbuffer<R>(1000); + + + auto rng = gsl_rng_alloc(gsl_rng_mt19937); + + wss::Parms<R> parms; + parms.rng = rng; + + { + auto view = buffer->get_buffer_view(); + auto state = wss::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = wss::Query<R, Shard>::buffer_query(state, &parms); + wss::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), parms.sample_size); + for (size_t i=0; i<result.size(); i++) { + + } + } + + delete buffer; +} +END_TEST + + +/* +START_TEST(t_range_query_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + wss::Parms<R> parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + + size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; + + auto state1 = wss::Query<R, Shard>::get_query_state(&shard1, &parms); + auto state2 = wss::Query<R, Shard>::get_query_state(&shard2, &parms); + + std::vector<std::vector<de::Wrapped<R>>> results(2); + results[0] = wss::Query<R, Shard>::query(&shard1, state1, &parms); + results[1] = wss::Query<R, Shard>::query(&shard2, state2, &parms); + + wss::Query<R, Shard>::delete_query_state(state1); + wss::Query<R, Shard>::delete_query_state(state2); + + ck_assert_int_eq(results[0].size() + results[1].size(), result_size); + + std::vector<std::vector<Wrapped<R>>> proc_results; + + for (size_t j=0; j<results.size(); j++) { + proc_results.emplace_back(std::vector<Wrapped<R>>()); + for (size_t i=0; i<results[j].size(); i++) { + proc_results[j].emplace_back(results[j][i]); + } + } + + auto result = wss::Query<R, Shard>::merge(proc_results, nullptr); + std::sort(result.begin(), result.end()); + + ck_assert_int_eq(result.size(), result_size); + auto key = parms.lower_bound; + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_eq(key++, result[i].key); + if (key == 200) { + key = 400; + } + } + + delete buffer1; + delete buffer2; +} +END_TEST +*/ + + +static void inject_wss_tests(Suite *suite) { + TCase *wss_query = tcase_create("WSS Query Testing"); + tcase_add_test(wss_query, t_wss_query); + tcase_add_test(wss_query, t_buffer_wss_query); + //tcase_add_test(wss_query, t_wss_query_merge); + suite_add_tcase(suite, wss_query); +} diff --git a/tests/internal_level_tests.cpp b/tests/internal_level_tests.cpp index 9deb485..06b0bab 100644 --- a/tests/internal_level_tests.cpp +++ b/tests/internal_level_tests.cpp @@ -6,42 +6,41 @@ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ -#include "shard/WIRS.h" -#include "framework/InternalLevel.h" -#include "framework/RecordInterface.h" -#include "framework/QueryInterface.h" -#include "framework/ShardInterface.h" +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "framework/structure/InternalLevel.h" +#include "framework/interface/Record.h" +#include "framework/interface/Query.h" +#include "framework/interface/Shard.h" -#include "testing.h" +#include "include/testing.h" #include <check.h> using namespace de; -typedef InternalLevel<WRec, WIRS<WRec>, WIRSQuery<WRec>> ILevel; +typedef InternalLevel<Rec, ISAMTree<Rec>, rq::Query<Rec, ISAMTree<Rec>>> ILevel; START_TEST(t_memlevel_merge) { - auto tbl1 = create_test_mbuffer<WRec>(100); - auto tbl2 = create_test_mbuffer<WRec>(100); + auto tbl1 = create_test_mbuffer<Rec>(100); + auto tbl2 = create_test_mbuffer<Rec>(100); auto base_level = new ILevel(1, 1); - base_level->append_buffer(tbl1); + base_level->append_buffer(tbl1->get_buffer_view()); ck_assert_int_eq(base_level->get_record_count(), 100); auto merging_level = new ILevel(0, 1); - merging_level->append_buffer(tbl2); + merging_level->append_buffer(tbl2->get_buffer_view()); ck_assert_int_eq(merging_level->get_record_count(), 100); - auto old_level = base_level; - base_level = ILevel::merge_levels(old_level, merging_level); + auto new_level = ILevel::reconstruction(base_level, merging_level); - delete old_level; delete merging_level; - ck_assert_int_eq(base_level->get_record_count(), 200); + ck_assert_int_eq(new_level->get_record_count(), 200); delete base_level; delete tbl1; @@ -50,12 +49,12 @@ START_TEST(t_memlevel_merge) ILevel *create_test_memlevel(size_t reccnt) { - auto tbl1 = create_test_mbuffer<WRec>(reccnt/2); - auto tbl2 = create_test_mbuffer<WRec>(reccnt/2); + auto tbl1 = create_test_mbuffer<Rec>(reccnt/2); + auto tbl2 = create_test_mbuffer<Rec>(reccnt/2); auto base_level = new ILevel(1, 2); - base_level->append_buffer(tbl1); - base_level->append_buffer(tbl2); + base_level->append_buffer(tbl1->get_buffer_view()); + base_level->append_buffer(tbl2->get_buffer_view()); delete tbl1; delete tbl2; @@ -67,7 +66,7 @@ Suite *unit_testing() { Suite *unit = suite_create("InternalLevel Unit Testing"); - TCase *merge = tcase_create("de::InternalLevel::merge_level Testing"); + TCase *merge = tcase_create("de::InternalLevel::reconstruction Testing"); tcase_add_test(merge, t_memlevel_merge); suite_add_tcase(unit, merge); diff --git a/tests/memisam_tests.cpp b/tests/memisam_tests.cpp index 0ae97dc..9117ce3 100644 --- a/tests/memisam_tests.cpp +++ b/tests/memisam_tests.cpp @@ -1,361 +1,33 @@ /* - * tests/irs_tests.cpp + * tests/isam_tests.cpp * - * Unit tests for MemISAM (Augmented B+Tree) shard + * Unit tests for ISAM Tree shard * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ -#include "shard/MemISAM.h" -#include "testing.h" - +#include "shard/ISAMTree.h" +#include "include/testing.h" #include <check.h> using namespace de; -typedef MemISAM<Rec> Shard; - -START_TEST(t_mbuffer_init) -{ - auto buffer = new MutableBuffer<Rec>(1024, 1024); - for (uint64_t i = 512; i > 0; i--) { - uint32_t v = i; - buffer->append({i,v, 1}); - } - - for (uint64_t i = 1; i <= 256; ++i) { - uint32_t v = i; - buffer->append({i, v, 1}, true); - } - - for (uint64_t i = 257; i <= 512; ++i) { - uint32_t v = i + 1; - buffer->append({i, v, 1}); - } - - Shard* shard = new Shard(buffer); - ck_assert_uint_eq(shard->get_record_count(), 512); - - delete buffer; - delete shard; -} - - -START_TEST(t_irs_init) -{ - size_t n = 512; - auto mbuffer1 = create_test_mbuffer<Rec>(n); - auto mbuffer2 = create_test_mbuffer<Rec>(n); - auto mbuffer3 = create_test_mbuffer<Rec>(n); - - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); - - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); - - ck_assert_int_eq(shard4->get_record_count(), n * 3); - ck_assert_int_eq(shard4->get_tombstone_count(), 0); - - size_t total_cnt = 0; - size_t shard1_idx = 0; - size_t shard2_idx = 0; - size_t shard3_idx = 0; - - for (size_t i = 0; i < shard4->get_record_count(); ++i) { - auto rec1 = shard1->get_record_at(shard1_idx); - auto rec2 = shard2->get_record_at(shard2_idx); - auto rec3 = shard3->get_record_at(shard3_idx); - - auto cur_rec = shard4->get_record_at(i); - - if (shard1_idx < n && cur_rec->rec == rec1->rec) { - ++shard1_idx; - } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { - ++shard2_idx; - } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { - ++shard3_idx; - } else { - assert(false); - } - } - - delete mbuffer1; - delete mbuffer2; - delete mbuffer3; - - delete shard1; - delete shard2; - delete shard3; - delete shard4; -} - -START_TEST(t_point_lookup) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto isam = Shard(buffer); - - for (size_t i=0; i<n; i++) { - Rec r; - auto rec = (buffer->get_data() + i); - r.key = rec->rec.key; - r.value = rec->rec.value; - - auto result = isam.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.key, r.key); - ck_assert_int_eq(result->rec.value, r.value); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_point_lookup_miss) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto isam = Shard(buffer); - - for (size_t i=n + 100; i<2*n; i++) { - Rec r; - r.key = i; - r.value = i; - - auto result = isam.point_lookup(r); - ck_assert_ptr_null(result); - } - - delete buffer; -} - - -START_TEST(t_full_cancelation) -{ - size_t n = 100; - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto buffer_ts = create_double_seq_mbuffer<Rec>(n, true); - - Shard* shard = new Shard(buffer); - Shard* shard_ts = new Shard(buffer_ts); - - ck_assert_int_eq(shard->get_record_count(), n); - ck_assert_int_eq(shard->get_tombstone_count(), 0); - ck_assert_int_eq(shard_ts->get_record_count(), n); - ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - - Shard* shards[] = {shard, shard_ts}; - - Shard* merged = new Shard(shards, 2); - - ck_assert_int_eq(merged->get_tombstone_count(), 0); - ck_assert_int_eq(merged->get_record_count(), 0); - - delete buffer; - delete buffer_ts; - delete shard; - delete shard_ts; - delete merged; -} -END_TEST - - -START_TEST(t_irs_query) -{ - size_t n=1000; - auto buffer = create_double_seq_mbuffer<Rec>(n); - auto isam = Shard(buffer); - - uint64_t lower_key = 100; - uint64_t upper_key = 250; - - size_t k = 100; - - size_t cnt[3] = {0}; - irs_query_parms<Rec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = IRSQuery<Rec, false>::get_query_state(&isam, &parms); - ((IRSState<WRec> *) state)->sample_size = k; - auto result = IRSQuery<Rec, false>::query(&isam, state, &parms); - - ck_assert_int_eq(result.size(), k); - - for (auto &rec : result) { - ck_assert_int_le(rec.rec.key, upper_key); - ck_assert_int_ge(rec.rec.key, lower_key); - } - - IRSQuery<Rec, false>::delete_query_state(state); - } - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -START_TEST(t_irs_query_merge) -{ - size_t n=1000; - auto buffer = create_double_seq_mbuffer<Rec>(n); - - Shard shard = Shard(buffer); - - uint64_t lower_key = 100; - uint64_t upper_key = 250; - - size_t k = 1000; - - size_t cnt[3] = {0}; - irs_query_parms<Rec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - std::vector<std::vector<de::Wrapped<Rec>>> results(2); - - for (size_t i=0; i<1000; i++) { - auto state1 = IRSQuery<Rec>::get_query_state(&shard, &parms); - ((IRSState<WRec> *) state1)->sample_size = k; - results[0] = IRSQuery<Rec>::query(&shard, state1, &parms); - - auto state2 = IRSQuery<Rec>::get_query_state(&shard, &parms); - ((IRSState<WRec> *) state2)->sample_size = k; - results[1] = IRSQuery<Rec>::query(&shard, state2, &parms); - - IRSQuery<Rec>::delete_query_state(state1); - IRSQuery<Rec>::delete_query_state(state2); - } - - auto merged = IRSQuery<Rec>::merge(results, nullptr); - - ck_assert_int_eq(merged.size(), 2*k); - for (size_t i=0; i<merged.size(); i++) { - ck_assert_int_ge(merged[i].key, lower_key); - ck_assert_int_le(merged[i].key, upper_key); - } - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -START_TEST(t_irs_buffer_query_scan) -{ - size_t n=1000; - auto buffer = create_double_seq_mbuffer<Rec>(n); - - uint64_t lower_key = 100; - uint64_t upper_key = 250; - - size_t k = 100; - - size_t cnt[3] = {0}; - irs_query_parms<Rec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = IRSQuery<Rec, false>::get_buffer_query_state(buffer, &parms); - ((IRSBufferState<WRec> *) state)->sample_size = k; - auto result = IRSQuery<Rec, false>::buffer_query(buffer, state, &parms); - - ck_assert_int_eq(result.size(), k); - - for (auto &rec : result) { - ck_assert_int_le(rec.rec.key, upper_key); - ck_assert_int_ge(rec.rec.key, lower_key); - } - - IRSQuery<Rec, false>::delete_buffer_query_state(state); - } - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -START_TEST(t_irs_buffer_query_rejection) -{ - size_t n=1000; - auto buffer = create_double_seq_mbuffer<Rec>(n); - - uint64_t lower_key = 100; - uint64_t upper_key = 250; - - size_t k = 10000; - - size_t cnt[3] = {0}; - irs_query_parms<Rec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = IRSQuery<Rec>::get_buffer_query_state(buffer, &parms); - ((IRSBufferState<WRec> *) state)->sample_size = k; - auto result = IRSQuery<Rec>::buffer_query(buffer, state, &parms); - - ck_assert_int_gt(result.size(), 0); - ck_assert_int_le(result.size(), k); - - for (auto &rec : result) { - ck_assert_int_le(rec.rec.key, upper_key); - ck_assert_int_ge(rec.rec.key, lower_key); - } - - IRSQuery<Rec>::delete_buffer_query_state(state); - } - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST +typedef Rec R; +typedef ISAMTree<R> Shard; +#include "include/shard_standard.h" +#include "include/rangequery.h" Suite *unit_testing() { - Suite *unit = suite_create("MemISAM Shard Unit Testing"); - - TCase *create = tcase_create("de::MemISAM constructor Testing"); - tcase_add_test(create, t_mbuffer_init); - tcase_add_test(create, t_irs_init); - tcase_set_timeout(create, 100); - suite_add_tcase(unit, create); - - - TCase *tombstone = tcase_create("de:MemISAM::tombstone cancellation Testing"); - tcase_add_test(tombstone, t_full_cancelation); - suite_add_tcase(unit, tombstone); - - - TCase *lookup = tcase_create("de:MemISAM:point_lookup Testing"); - tcase_add_test(lookup, t_point_lookup); - tcase_add_test(lookup, t_point_lookup_miss); - suite_add_tcase(unit, lookup); - + Suite *unit = suite_create("Alias-augmented B+Tree Shard Unit Testing"); - TCase *sampling = tcase_create("de:MemISAM::MemISAMQuery Testing"); - tcase_add_test(sampling, t_irs_query); - tcase_add_test(sampling, t_irs_query_merge); - tcase_add_test(sampling, t_irs_buffer_query_rejection); - tcase_add_test(sampling, t_irs_buffer_query_scan); - tcase_set_timeout(sampling, 100); - suite_add_tcase(unit, sampling); + inject_rangequery_tests(unit); + inject_shard_tests(unit); return unit; } diff --git a/tests/mutable_buffer_tests.cpp b/tests/mutable_buffer_tests.cpp index 201fddb..31c16dc 100644 --- a/tests/mutable_buffer_tests.cpp +++ b/tests/mutable_buffer_tests.cpp @@ -1,39 +1,47 @@ /* * tests/mutable_buffer_tests.cpp * - * Unit tests for MutableBuffer + * Unit tests for MutableBuffer and BufferView * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ -#include <string> + #include <thread> #include <vector> -#include <algorithm> -#include "testing.h" -#include "framework/MutableBuffer.h" +#include "include/testing.h" +#include "framework/structure/MutableBuffer.h" #include <check.h> -#define DE_MT_TEST 0 - using namespace de; START_TEST(t_create) { - auto buffer = new MutableBuffer<Rec>(100, 50); + size_t lwm = 50, hwm = 100; + size_t cap = 2 * hwm; + + auto buffer = new MutableBuffer<Rec>(lwm, hwm); ck_assert_ptr_nonnull(buffer); - ck_assert_int_eq(buffer->get_capacity(), 100); - ck_assert_int_eq(buffer->get_record_count(), 0); + ck_assert_int_eq(buffer->get_capacity(), cap); + ck_assert_int_eq(buffer->get_low_watermark(), lwm); + ck_assert_int_eq(buffer->get_high_watermark(), hwm); + ck_assert_int_eq(buffer->is_full(), false); - ck_assert_ptr_nonnull(buffer->get_data()); + ck_assert_int_eq(buffer->is_at_low_watermark(), false); + ck_assert_int_eq(buffer->get_record_count(), 0); ck_assert_int_eq(buffer->get_tombstone_count(), 0); - ck_assert_int_eq(buffer->get_tombstone_capacity(), 50); + + { + auto view = buffer->get_buffer_view(); + ck_assert_int_eq(view.get_tombstone_count(), 0); + ck_assert_int_eq(view.get_record_count(), 0); + } delete buffer; } @@ -42,76 +50,149 @@ END_TEST START_TEST(t_insert) { - auto buffer = new MutableBuffer<WRec>(100, 50); + auto buffer = new MutableBuffer<Rec>(50, 100); + + Rec rec = {0, 5, 1}; - uint64_t key = 0; - uint32_t val = 5; + /* insert records up to the low watermark */ + size_t cnt = 0; + for (size_t i=0; i<50; i++) { + ck_assert_int_eq(buffer->is_at_low_watermark(), false); + ck_assert_int_eq(buffer->append(rec), 1); + ck_assert_int_eq(buffer->check_tombstone(rec), 0); - WRec rec = {0, 5, 1}; + rec.key++; + rec.value++; + cnt++; - for (size_t i=0; i<99; i++) { + ck_assert_int_eq(buffer->get_record_count(), cnt); + ck_assert_int_eq(buffer->get_buffer_view().get_record_count(), cnt); + ck_assert_int_eq(buffer->get_tail(), cnt); + } + + ck_assert_int_eq(buffer->is_at_low_watermark(), true); + + /* insert records up to the high watermark */ + for (size_t i=0; i<50; i++) { + ck_assert_int_eq(buffer->is_full(), 0); ck_assert_int_eq(buffer->append(rec), 1); ck_assert_int_eq(buffer->check_tombstone(rec), 0); rec.key++; rec.value++; + cnt++; + + ck_assert_int_eq(buffer->get_record_count(), cnt); + ck_assert_int_eq(buffer->get_buffer_view().get_record_count(), cnt); - ck_assert_int_eq(buffer->get_record_count(), i+1); ck_assert_int_eq(buffer->get_tombstone_count(), 0); - ck_assert_int_eq(buffer->is_full(), 0); + ck_assert_int_eq(buffer->is_at_low_watermark(), true); + ck_assert_int_eq(buffer->get_tail(), cnt); } - ck_assert_int_eq(buffer->append(rec), 1); - + /* further inserts should fail */ rec.key++; rec.value++; - ck_assert_int_eq(buffer->is_full(), 1); ck_assert_int_eq(buffer->append(rec), 0); delete buffer; - } END_TEST -START_TEST(t_insert_tombstones) +START_TEST(t_advance_head) { - auto buffer = new MutableBuffer<Rec>(100, 50); + auto buffer = new MutableBuffer<Rec>(50, 100); - size_t ts_cnt = 0; + /* insert 75 records and get tail when LWM is exceeded */ + size_t new_head = 0; + Rec rec = {1, 1}; + size_t cnt = 0; + for (size_t i=0; i<75; i++) { + ck_assert_int_eq(buffer->append(rec), 1); - Rec rec = {0, 5}; + rec.key++; + rec.value++; + cnt++; - for (size_t i=0; i<99; i++) { - bool ts = false; - if (i % 2 == 0) { - ts_cnt++; - ts=true; + if (buffer->is_at_low_watermark() && new_head == 0) { + new_head = buffer->get_tail(); } + } - ck_assert_int_eq(buffer->append(rec, ts), 1); - ck_assert_int_eq(buffer->check_tombstone(rec), ts); + ck_assert_int_eq(buffer->get_available_capacity(), 200 - cnt); - rec.key++; - rec.value++; + Wrapped<Rec> *view_records = new Wrapped<Rec>[buffer->get_record_count()]; + { + /* get a view of the pre-advanced state */ + auto view = buffer->get_buffer_view(); + ck_assert_int_eq(view.get_record_count(), cnt); + view.copy_to_buffer((psudb::byte *) view_records); - ck_assert_int_eq(buffer->get_record_count(), i+1); - ck_assert_int_eq(buffer->get_tombstone_count(), ts_cnt); - ck_assert_int_eq(buffer->is_full(), 0); + /* advance the head */ + ck_assert_int_eq(buffer->advance_head(new_head), 1); + ck_assert_int_eq(buffer->get_record_count(), 25); + ck_assert_int_eq(buffer->get_buffer_view().get_record_count(), 25); + ck_assert_int_eq(view.get_record_count(), cnt); + ck_assert_int_eq(buffer->get_available_capacity(), 200 - cnt); + + /* refuse to advance head again while there remain references to the old one */ + ck_assert_int_eq(buffer->advance_head(buffer->get_tail() -1), 0); } - // inserting one more tombstone should not be possible - ck_assert_int_eq(buffer->append(rec, true), 0); + /* once the buffer view falls out of scope, the capacity of the buffer should increase */ + ck_assert_int_eq(buffer->get_available_capacity(), 175); + /* now the head should be able to be advanced */ + ck_assert_int_eq(buffer->advance_head(buffer->get_tail()), 1); - ck_assert_int_eq(buffer->append(rec), 1); + /* and the buffer should be empty */ + ck_assert_int_eq(buffer->get_record_count(), 0); - rec.key++; - rec.value++; + delete buffer; + delete[] view_records; +} +END_TEST + +void insert_records(std::vector<Rec> *values, size_t start, size_t stop, MutableBuffer<Rec> *buffer) +{ + for (size_t i=start; i<stop; i++) { + buffer->append((*values)[i]); + } + +} + +START_TEST(t_multithreaded_insert) +{ + size_t cnt = 10000; + auto buffer = new MutableBuffer<Rec>(cnt/2, cnt); + + std::vector<Rec> records(cnt); + for (size_t i=0; i<cnt; i++) { + records[i] = Rec {(uint64_t) rand(), (uint32_t) rand()}; + } + + /* perform a multithreaded insertion */ + size_t thread_cnt = 8; + size_t per_thread = cnt / thread_cnt; + std::vector<std::thread> workers(thread_cnt); + size_t start = 0; + size_t stop = start + per_thread; + for (size_t i=0; i<thread_cnt; i++) { + workers[i] = std::thread(insert_records, &records, start, stop, buffer); + start = stop; + stop = std::min(start + per_thread, cnt); + } + + for (size_t i=0; i<thread_cnt; i++) { + if (workers[i].joinable()) { + workers[i].join(); + } + } ck_assert_int_eq(buffer->is_full(), 1); - ck_assert_int_eq(buffer->append(rec), 0); + ck_assert_int_eq(buffer->get_record_count(), cnt); delete buffer; } @@ -120,7 +201,7 @@ END_TEST START_TEST(t_truncate) { - auto buffer = new MutableBuffer<Rec>(100, 100); + auto buffer = new MutableBuffer<Rec>(50, 100); size_t ts_cnt = 0; Rec rec = {0, 5}; @@ -157,42 +238,76 @@ START_TEST(t_truncate) } END_TEST - -START_TEST(t_get_data) +START_TEST(t_bview_get) { - size_t cnt = 100; + auto buffer = new MutableBuffer<Rec>(50, 100); - auto buffer = new MutableBuffer<Rec>(cnt, cnt/2); + /* insert 75 records and get tail when LWM is exceeded */ + size_t new_head = 0; + Rec rec = {1, 1}; + size_t cnt = 0; + for (size_t i=0; i<75; i++) { + ck_assert_int_eq(buffer->append(rec), 1); + rec.key++; + rec.value++; + cnt++; - std::vector<uint64_t> keys(cnt); - for (size_t i=0; i<cnt-2; i++) { - keys[i] = rand(); + if (buffer->is_at_low_watermark() && new_head == 0) { + new_head = buffer->get_tail(); + } } - // duplicate final two records for tombstone testing - // purposes - keys[cnt-2] = keys[cnt-3]; - keys[cnt-1] = keys[cnt-2]; + ck_assert_int_eq(buffer->get_available_capacity(), 200 - cnt); + + { + /* get a view of the pre-advanced state */ + auto view = buffer->get_buffer_view(); + auto reccnt = view.get_record_count(); - uint32_t val = 12345; - for (size_t i=0; i<cnt-2; i++) { - buffer->append(Rec {keys[i], val}); + /* scan the records in the view */ + for (size_t i=0; i<reccnt; i++) { + ck_assert_int_eq(view.get(i)->rec.key, i+1); + } + + /* advance the head */ + buffer->advance_head(new_head); + + /* scan the records in the view again -- should be unchanged */ + for (size_t i=0; i<reccnt; i++) { + ck_assert_int_eq(view.get(i)->rec.key, i+1); + } } - Rec r1 = {keys[cnt-2], val}; - buffer->append(r1, true); + { + /* get a new view (should have fewer records) */ + auto view = buffer->get_buffer_view(); + auto reccnt = view.get_record_count(); - Rec r2 = {keys[cnt-1], val}; - buffer->append(r2, true); + /* verify the scan again */ + for (size_t i=0; i<reccnt; i++) { + ck_assert_int_eq(view.get(i)->rec.key, i + 51); + } + } + + /* insert more records (to trigger a wrap-around) */ + for (size_t i=0; i<75; i++) { + ck_assert_int_eq(buffer->append(rec), 1); + rec.key++; + rec.value++; + cnt++; + } - auto *sorted_records = buffer->get_data(); - std::sort(keys.begin(), keys.end()); - std::sort(sorted_records, sorted_records + buffer->get_record_count(), std::less<Wrapped<Rec>>()); + { + /* get a new view (should have fewer records) */ + auto view = buffer->get_buffer_view(); + auto reccnt = view.get_record_count(); - for (size_t i=0; i<cnt; i++) { - ck_assert_int_eq(sorted_records[i].rec.key, keys[i]); + /* verify the scan again */ + for (size_t i=0; i<reccnt; i++) { + ck_assert_int_eq(view.get(i)->rec.key, i + 51); + } } delete buffer; @@ -200,56 +315,65 @@ START_TEST(t_get_data) END_TEST -void insert_records(std::vector<std::pair<uint64_t, uint32_t>> *values, size_t start, size_t stop, MutableBuffer<Rec> *buffer) +START_TEST(t_bview_delete) { - for (size_t i=start; i<stop; i++) { - buffer->append({(*values)[i].first, (*values)[i].second}); - } -} + auto buffer = new MutableBuffer<Rec>(50, 100); -#if DE_MT_TEST -START_TEST(t_multithreaded_insert) -{ - size_t cnt = 10000; - auto buffer = new MutableBuffer<Rec>(cnt, true, cnt/2); - - std::vector<Rec> records(cnt); - for (size_t i=0; i<cnt; i++) { - records[i] = Rec {(uint64_t) rand(), (uint32_t) rand()}; - } + /* insert 75 records and get tail when LWM is exceeded */ + size_t new_head = 0; + Rec rec = {1, 1}; + size_t cnt = 0; + for (size_t i=0; i<75; i++) { + ck_assert_int_eq(buffer->append(rec), 1); - // perform a t_multithreaded insertion - size_t thread_cnt = 8; - size_t per_thread = cnt / thread_cnt; - std::vector<std::thread> workers(thread_cnt); - size_t start = 0; - size_t stop = start + per_thread; - for (size_t i=0; i<thread_cnt; i++) { - workers[i] = std::thread(insert_records, &records, start, stop, buffer); - start = stop; - stop = std::min(start + per_thread, cnt); - } + rec.key++; + rec.value++; + cnt++; - for (size_t i=0; i<thread_cnt; i++) { - if (workers[i].joinable()) { - workers[i].join(); + if (buffer->is_at_low_watermark() && new_head == 0) { + new_head = buffer->get_tail(); } } - ck_assert_int_eq(buffer->is_full(), 1); - ck_assert_int_eq(buffer->get_record_count(), cnt); + buffer->advance_head(new_head); - std::sort(records.begin(), records.end()); - auto *sorted_records = buffer->sorted_output(); - for (size_t i=0; i<cnt; i++) { - ck_assert_int_eq(sorted_records[i].key, records[i].key); + for (size_t i=0; i<75; i++) { + ck_assert_int_eq(buffer->append(rec), 1); + + rec.key++; + rec.value++; + cnt++; + } + + Rec dr1 = {67, 67}; + Rec dr2 = {89, 89}; + Rec dr3 = {103, 103}; + + Rec fdr1 = {5, 5}; + Rec fdr2 = {300, 300}; + { + /* get a new view (should have fewer records) */ + auto view = buffer->get_buffer_view(); + ck_assert_int_eq(view.delete_record(dr1), 1); + ck_assert_int_eq(view.delete_record(dr2), 1); + ck_assert_int_eq(view.delete_record(dr3), 1); + ck_assert_int_eq(view.delete_record(fdr1), 0); + ck_assert_int_eq(view.delete_record(fdr2), 0); + + for (size_t i=0; i<view.get_record_count(); i++) { + if (view.get(i)->rec == dr1 || view.get(i)->rec == dr2 + || view.get(i)->rec == dr3) { + ck_assert_int_eq(view.get(i)->is_deleted(), 1); + } else { + ck_assert_int_eq(view.get(i)->is_deleted(), 0); + } + } } delete buffer; } END_TEST -#endif Suite *unit_testing() @@ -263,13 +387,16 @@ Suite *unit_testing() TCase *append = tcase_create("de::MutableBuffer::append Testing"); tcase_add_test(append, t_insert); - tcase_add_test(append, t_insert_tombstones); - #if DE_MT_TEST - tcase_add_test(append, t_multithreaded_insert); - #endif + tcase_add_test(append, t_advance_head); + tcase_add_test(append, t_multithreaded_insert); suite_add_tcase(unit, append); + TCase *view = tcase_create("de::BufferView Testing"); + tcase_add_test(view, t_bview_get); + tcase_add_test(view, t_bview_delete); + + suite_add_tcase(unit, view); TCase *truncate = tcase_create("de::MutableBuffer::truncate Testing"); tcase_add_test(truncate, t_truncate); @@ -277,11 +404,6 @@ Suite *unit_testing() suite_add_tcase(unit, truncate); - TCase *sorted_out = tcase_create("de::MutableBuffer::get_data"); - tcase_add_test(sorted_out, t_get_data); - - suite_add_tcase(unit, sorted_out); - return unit; } diff --git a/tests/pgm_tests.cpp b/tests/pgm_tests.cpp index 0552417..ee350de 100644 --- a/tests/pgm_tests.cpp +++ b/tests/pgm_tests.cpp @@ -1,339 +1,33 @@ /* - * tests/irs_tests.cpp + * tests/isam_tests.cpp * - * Unit tests for PGM (Augmented B+Tree) shard + * Unit tests for ISAM Tree shard * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ #include "shard/PGM.h" -#include "testing.h" - +#include "include/testing.h" #include <check.h> using namespace de; -typedef PGM<Rec> Shard; - -START_TEST(t_mbuffer_init) -{ - auto buffer = new MutableBuffer<Rec>(1024, 1024); - for (uint64_t i = 512; i > 0; i--) { - uint32_t v = i; - buffer->append({i,v, 1}); - } - - for (uint64_t i = 1; i <= 256; ++i) { - uint32_t v = i; - buffer->append({i, v, 1}, true); - } - - for (uint64_t i = 257; i <= 512; ++i) { - uint32_t v = i + 1; - buffer->append({i, v, 1}); - } - - Shard* shard = new Shard(buffer); - ck_assert_uint_eq(shard->get_record_count(), 512); - - delete buffer; - delete shard; -} - - -START_TEST(t_irs_init) -{ - size_t n = 512; - auto mbuffer1 = create_test_mbuffer<Rec>(n); - auto mbuffer2 = create_test_mbuffer<Rec>(n); - auto mbuffer3 = create_test_mbuffer<Rec>(n); - - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); - - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); - - ck_assert_int_eq(shard4->get_record_count(), n * 3); - ck_assert_int_eq(shard4->get_tombstone_count(), 0); - - size_t total_cnt = 0; - size_t shard1_idx = 0; - size_t shard2_idx = 0; - size_t shard3_idx = 0; - - for (size_t i = 0; i < shard4->get_record_count(); ++i) { - auto rec1 = shard1->get_record_at(shard1_idx); - auto rec2 = shard2->get_record_at(shard2_idx); - auto rec3 = shard3->get_record_at(shard3_idx); - - auto cur_rec = shard4->get_record_at(i); - - if (shard1_idx < n && cur_rec->rec == rec1->rec) { - ++shard1_idx; - } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { - ++shard2_idx; - } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { - ++shard3_idx; - } else { - assert(false); - } - } - - delete mbuffer1; - delete mbuffer2; - delete mbuffer3; - - delete shard1; - delete shard2; - delete shard3; - delete shard4; -} - -START_TEST(t_point_lookup) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto shard = Shard(buffer); - - for (size_t i=0; i<n; i++) { - Rec r; - auto rec = (buffer->get_data() + i); - r.key = rec->rec.key; - r.value = rec->rec.value; - - auto result = shard.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.key, r.key); - ck_assert_int_eq(result->rec.value, r.value); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_point_lookup_miss) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto isam = Shard(buffer); - - for (size_t i=n + 100; i<2*n; i++) { - Rec r; - r.key = i; - r.value = i; - - auto result = isam.point_lookup(r); - ck_assert_ptr_null(result); - } - - delete buffer; -} - - -START_TEST(t_range_query) -{ - auto buffer = create_sequential_mbuffer<Rec>(100, 1000); - auto shard = Shard(buffer); - - pgm_range_query_parms<Rec> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = PGMRangeQuery<Rec>::get_query_state(&shard, &parms); - auto result = PGMRangeQuery<Rec>::query(&shard, state, &parms); - PGMRangeQuery<Rec>::delete_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_buffer_range_query) -{ - auto buffer = create_sequential_mbuffer<Rec>(100, 1000); - - pgm_range_query_parms<Rec> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = PGMRangeQuery<Rec>::get_buffer_query_state(buffer, &parms); - auto result = PGMRangeQuery<Rec>::buffer_query(buffer, state, &parms); - PGMRangeQuery<Rec>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_range_query_merge) -{ - auto buffer1 = create_sequential_mbuffer<Rec>(100, 200); - auto buffer2 = create_sequential_mbuffer<Rec>(400, 1000); - - auto shard1 = Shard(buffer1); - auto shard2 = Shard(buffer2); - - pgm_range_query_parms<Rec> parms; - parms.lower_bound = 150; - parms.upper_bound = 500; - - size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - - auto state1 = PGMRangeQuery<Rec>::get_query_state(&shard1, &parms); - auto state2 = PGMRangeQuery<Rec>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<Rec>>> results(2); - results[0] = PGMRangeQuery<Rec>::query(&shard1, state1, &parms); - results[1] = PGMRangeQuery<Rec>::query(&shard2, state2, &parms); - - PGMRangeQuery<Rec>::delete_query_state(state1); - PGMRangeQuery<Rec>::delete_query_state(state2); - - ck_assert_int_eq(results[0].size() + results[1].size(), result_size); - - std::vector<std::vector<Wrapped<Rec>>> proc_results; - - for (size_t j=0; j<results.size(); j++) { - proc_results.emplace_back(std::vector<Wrapped<Rec>>()); - for (size_t i=0; i<results[j].size(); i++) { - proc_results[j].emplace_back(results[j][i]); - } - } - - auto result = PGMRangeQuery<Rec>::merge(proc_results, nullptr); - std::sort(result.begin(), result.end()); - - ck_assert_int_eq(result.size(), result_size); - auto key = parms.lower_bound; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(key++, result[i].key); - if (key == 200) { - key = 400; - } - } - - delete buffer1; - delete buffer2; -} -END_TEST - -START_TEST(t_lower_bound) -{ - auto buffer1 = create_sequential_mbuffer<Rec>(100, 200); - auto buffer2 = create_sequential_mbuffer<Rec>(400, 1000); - - de::PGM<Rec> *shards[2]; - - auto shard1 = Shard(buffer1); - auto shard2 = Shard(buffer2); - - shards[0] = &shard1; - shards[1] = &shard2; - - auto merged = Shard(shards, 2); - - for (size_t i=100; i<1000; i++) { - Rec r; - r.key = i; - r.value = i; - - auto idx = merged.get_lower_bound(i); - - assert(idx < merged.get_record_count()); - - auto res = merged.get_record_at(idx); - - if (i >=200 && i <400) { - ck_assert_int_lt(res->rec.key, i); - } else { - ck_assert_int_eq(res->rec.key, i); - } - } - - delete buffer1; - delete buffer2; -} -END_TEST - - -START_TEST(t_full_cancelation) -{ - size_t n = 100; - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto buffer_ts = create_double_seq_mbuffer<Rec>(n, true); - - Shard* shard = new Shard(buffer); - Shard* shard_ts = new Shard(buffer_ts); - - ck_assert_int_eq(shard->get_record_count(), n); - ck_assert_int_eq(shard->get_tombstone_count(), 0); - ck_assert_int_eq(shard_ts->get_record_count(), n); - ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - - Shard* shards[] = {shard, shard_ts}; - - Shard* merged = new Shard(shards, 2); - - ck_assert_int_eq(merged->get_tombstone_count(), 0); - ck_assert_int_eq(merged->get_record_count(), 0); - - delete buffer; - delete buffer_ts; - delete shard; - delete shard_ts; - delete merged; -} -END_TEST +typedef Rec R; +typedef PGM<R> Shard; +#include "include/shard_standard.h" +#include "include/rangequery.h" Suite *unit_testing() { Suite *unit = suite_create("PGM Shard Unit Testing"); - TCase *create = tcase_create("de::PGM constructor Testing"); - tcase_add_test(create, t_mbuffer_init); - tcase_add_test(create, t_irs_init); - tcase_set_timeout(create, 100); - suite_add_tcase(unit, create); - - - TCase *tombstone = tcase_create("de:PGM::tombstone cancellation Testing"); - tcase_add_test(tombstone, t_full_cancelation); - suite_add_tcase(unit, tombstone); - - - TCase *lookup = tcase_create("de:PGM:point_lookup Testing"); - tcase_add_test(lookup, t_point_lookup); - tcase_add_test(lookup, t_point_lookup_miss); - tcase_add_test(lookup, t_lower_bound); - suite_add_tcase(unit, lookup); - - TCase *range_query = tcase_create("de:PGM::range_query Testing"); - tcase_add_test(range_query, t_range_query); - tcase_add_test(range_query, t_buffer_range_query); - tcase_add_test(range_query, t_range_query_merge); - suite_add_tcase(unit, range_query); + inject_rangequery_tests(unit); + inject_shard_tests(unit); return unit; } diff --git a/tests/rangecount_tests.cpp b/tests/rangecount_tests.cpp new file mode 100644 index 0000000..3be8234 --- /dev/null +++ b/tests/rangecount_tests.cpp @@ -0,0 +1,56 @@ +/* + * tests/rangequery_tests.cpp + * + * Unit tests for Range Queries across several different + * shards + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "include/testing.h" + +#include <check.h> + +using namespace de; + +typedef Rec R; +typedef ISAMTree<Rec> Shard; + +#include "include/rangecount.h" + + +Suite *unit_testing() +{ + Suite *unit = suite_create("Range Count Query Testing"); + inject_rangecount_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/rangequery_tests.cpp b/tests/rangequery_tests.cpp new file mode 100644 index 0000000..bf5fb5e --- /dev/null +++ b/tests/rangequery_tests.cpp @@ -0,0 +1,55 @@ +/* + * tests/rangequery_tests.cpp + * + * Unit tests for Range Queries across several different + * shards + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Dong Xie <dongx@psu.edu> + * + * Distributed under the Modified BSD License. + * + */ + +#include "shard/ISAMTree.h" +#include "query/rangequery.h" +#include "include/testing.h" + +#include <check.h> + +using namespace de; + +typedef Rec R; +typedef ISAMTree<R> Shard; + +#include "include/rangequery.h" + +Suite *unit_testing() +{ + Suite *unit = suite_create("Range Count Query Testing"); + inject_rangequery_tests(unit); + + return unit; +} + + +int shard_unit_tests() +{ + int failed = 0; + Suite *unit = unit_testing(); + SRunner *unit_shardner = srunner_create(unit); + + srunner_run_all(unit_shardner, CK_NORMAL); + failed = srunner_ntests_failed(unit_shardner); + srunner_free(unit_shardner); + + return failed; +} + + +int main() +{ + int unit_failed = shard_unit_tests(); + + return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/triespline_tests.cpp b/tests/triespline_tests.cpp index 6f63961..e884360 100644 --- a/tests/triespline_tests.cpp +++ b/tests/triespline_tests.cpp @@ -1,258 +1,33 @@ /* - * tests/triespline_tests.cpp + * tests/isam_tests.cpp * - * Unit tests for TrieSpline (Augmented B+Tree) shard + * Unit tests for ISAM Tree shard * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * Dong Xie <dongx@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ -#include <functional> - #include "shard/TrieSpline.h" -#include "testing.h" - +#include "include/testing.h" #include <check.h> using namespace de; -typedef TrieSpline<Rec> Shard; - -START_TEST(t_mbuffer_init) -{ - auto buffer = new MutableBuffer<Rec>(1024, 1024); - for (uint64_t i = 512; i > 0; i--) { - uint32_t v = i; - buffer->append({i,v, 1}); - } - - for (uint64_t i = 1; i <= 256; ++i) { - uint32_t v = i; - buffer->append({i, v, 1}, true); - } - - for (uint64_t i = 257; i <= 512; ++i) { - uint32_t v = i + 1; - buffer->append({i, v, 1}); - } - - Shard* shard = new Shard(buffer); - ck_assert_uint_eq(shard->get_record_count(), 512); - - delete buffer; - delete shard; -} - - -START_TEST(t_init) -{ - size_t n = 512; - auto mbuffer1 = create_test_mbuffer<Rec>(n); - auto mbuffer2 = create_test_mbuffer<Rec>(n); - auto mbuffer3 = create_test_mbuffer<Rec>(n); - - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); - - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); - - ck_assert_int_eq(shard4->get_record_count(), n * 3); - ck_assert_int_eq(shard4->get_tombstone_count(), 0); - - size_t total_cnt = 0; - size_t shard1_idx = 0; - size_t shard2_idx = 0; - size_t shard3_idx = 0; - - for (size_t i = 0; i < shard4->get_record_count(); ++i) { - auto rec1 = shard1->get_record_at(shard1_idx); - auto rec2 = shard2->get_record_at(shard2_idx); - auto rec3 = shard3->get_record_at(shard3_idx); - - auto cur_rec = shard4->get_record_at(i); - - if (shard1_idx < n && cur_rec->rec == rec1->rec) { - ++shard1_idx; - } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { - ++shard2_idx; - } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { - ++shard3_idx; - } else { - assert(false); - } - } - - delete mbuffer1; - delete mbuffer2; - delete mbuffer3; - - delete shard1; - delete shard2; - delete shard3; - delete shard4; -} - -START_TEST(t_point_lookup) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto shard = Shard(buffer); - - for (size_t i=0; i<n; i++) { - Rec r; - auto rec = (buffer->get_data() + i); - r.key = rec->rec.key; - r.value = rec->rec.value; - - auto result = shard.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.key, r.key); - ck_assert_int_eq(result->rec.value, r.value); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_point_lookup_miss) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto isam = Shard(buffer); - - for (size_t i=n + 100; i<2*n; i++) { - Rec r; - r.key = i; - r.value = i; - - auto result = isam.point_lookup(r); - ck_assert_ptr_null(result); - } - - delete buffer; -} - - -START_TEST(t_full_cancelation) -{ - size_t n = 100; - auto buffer = create_double_seq_mbuffer<Rec>(n, false); - auto buffer_ts = create_double_seq_mbuffer<Rec>(n, true); - - Shard* shard = new Shard(buffer); - Shard* shard_ts = new Shard(buffer_ts); - - ck_assert_int_eq(shard->get_record_count(), n); - ck_assert_int_eq(shard->get_tombstone_count(), 0); - ck_assert_int_eq(shard_ts->get_record_count(), n); - ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - - Shard* shards[] = {shard, shard_ts}; - - Shard* merged = new Shard(shards, 2); - - ck_assert_int_eq(merged->get_tombstone_count(), 0); - ck_assert_int_eq(merged->get_record_count(), 0); - - delete buffer; - delete buffer_ts; - delete shard; - delete shard_ts; - delete merged; -} -END_TEST - - -START_TEST(t_range_query) -{ - auto buffer = create_sequential_mbuffer<Rec>(100, 1000); - auto shard = Shard(buffer); - - ts_range_query_parms<Rec> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = TrieSplineRangeQuery<Rec>::get_query_state(&shard, &parms); - auto result = TrieSplineRangeQuery<Rec>::query(&shard, state, &parms); - TrieSplineRangeQuery<Rec>::delete_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_buffer_range_query) -{ - auto buffer = create_sequential_mbuffer<Rec>(100, 1000); - - ts_range_query_parms<Rec> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = TrieSplineRangeQuery<Rec>::get_buffer_query_state(buffer, &parms); - auto result = TrieSplineRangeQuery<Rec>::buffer_query(buffer, state, &parms); - TrieSplineRangeQuery<Rec>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_range_query_merge) -{ - -} -END_TEST +typedef Rec R; +typedef TrieSpline<R> Shard; +#include "include/shard_standard.h" +#include "include/rangequery.h" Suite *unit_testing() { - Suite *unit = suite_create("TrieSpline Shard Unit Testing"); - - TCase *create = tcase_create("de::TrieSpline constructor Testing"); - tcase_add_test(create, t_mbuffer_init); - tcase_add_test(create, t_init); - tcase_set_timeout(create, 100); - suite_add_tcase(unit, create); - - - TCase *tombstone = tcase_create("de:TrieSpline::tombstone cancellation Testing"); - tcase_add_test(tombstone, t_full_cancelation); - suite_add_tcase(unit, tombstone); - - - TCase *lookup = tcase_create("de:TrieSpline:point_lookup Testing"); - tcase_add_test(lookup, t_point_lookup); - tcase_add_test(lookup, t_point_lookup_miss); - suite_add_tcase(unit, lookup); - - - TCase *range_query = tcase_create("de:TrieSpline::range_query Testing"); - tcase_add_test(range_query, t_range_query); - tcase_add_test(range_query, t_buffer_range_query); - tcase_add_test(range_query, t_range_query_merge); - suite_add_tcase(unit, range_query); + Suite *unit = suite_create("Triespline Shard Unit Testing"); + inject_rangequery_tests(unit); + inject_shard_tests(unit); return unit; } diff --git a/tests/vptree_tests.cpp b/tests/vptree_tests.cpp index 06f147b..ff99ba6 100644 --- a/tests/vptree_tests.cpp +++ b/tests/vptree_tests.cpp @@ -5,31 +5,32 @@ * * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> * - * All rights reserved. Published under the Modified BSD License. + * Distributed under the Modified BSD License. * */ + +#include "include/testing.h" #include "shard/VPTree.h" -#include "testing.h" -#include "vptree.hpp" +#include "query/knn.h" #include <check.h> using namespace de; - -typedef VPTree<PRec> Shard; +typedef PRec R; +typedef VPTree<R> Shard; START_TEST(t_mbuffer_init) { size_t n= 24; - auto buffer = new MutableBuffer<PRec>(n, n); + auto buffer = new MutableBuffer<PRec>(n/2, n); for (int64_t i=0; i<n; i++) { - buffer->append({i, i}); + buffer->append({(uint64_t) i, (uint64_t) i}); } - Shard* shard = new Shard(buffer); + Shard* shard = new Shard(buffer->get_buffer_view()); ck_assert_uint_eq(shard->get_record_count(), n); delete buffer; @@ -40,16 +41,16 @@ START_TEST(t_mbuffer_init) START_TEST(t_wss_init) { size_t n = 512; - auto mbuffer1 = create_2d_mbuffer(n); - auto mbuffer2 = create_2d_mbuffer(n); - auto mbuffer3 = create_2d_mbuffer(n); + auto mbuffer1 = create_test_mbuffer<R>(n); + auto mbuffer2 = create_test_mbuffer<R>(n); + auto mbuffer3 = create_test_mbuffer<R>(n); - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); + auto shard1 = new Shard(mbuffer1->get_buffer_view()); + auto shard2 = new Shard(mbuffer2->get_buffer_view()); + auto shard3 = new Shard(mbuffer3->get_buffer_view()); - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); + std::vector<Shard *> shards = {shard1, shard2, shard3}; + auto shard4 = new Shard(shards); ck_assert_int_eq(shard4->get_record_count(), n * 3); ck_assert_int_eq(shard4->get_tombstone_count(), 0); @@ -69,19 +70,23 @@ START_TEST(t_point_lookup) { size_t n = 16; - auto buffer = create_2d_sequential_mbuffer(n); - auto wss = Shard(buffer); + auto buffer = create_sequential_mbuffer<R>(0, n); + auto wss = Shard(buffer->get_buffer_view()); - for (size_t i=0; i<n; i++) { - PRec r; - auto rec = (buffer->get_data() + i); - r.data[0] = rec->rec.data[0]; - r.data[1] = rec->rec.data[1]; + { + auto bv = buffer->get_buffer_view(); - auto result = wss.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.data[0], r.data[0]); - ck_assert_int_eq(result->rec.data[1], r.data[1]); + for (size_t i=0; i<n; i++) { + PRec r; + auto rec = (bv.get(i)); + r.data[0] = rec->rec.data[0]; + r.data[1] = rec->rec.data[1]; + + auto result = wss.point_lookup(r); + ck_assert_ptr_nonnull(result); + ck_assert_int_eq(result->rec.data[0], r.data[0]); + ck_assert_int_eq(result->rec.data[1], r.data[1]); + } } delete buffer; @@ -93,8 +98,8 @@ START_TEST(t_point_lookup_miss) { size_t n = 10000; - auto buffer = create_2d_sequential_mbuffer(n); - auto wss = Shard(buffer); + auto buffer = create_sequential_mbuffer<R>(0, n); + auto wss = Shard(buffer->get_buffer_view()); for (size_t i=n + 100; i<2*n; i++) { PRec r; @@ -112,24 +117,27 @@ START_TEST(t_point_lookup_miss) START_TEST(t_buffer_query) { size_t n = 10000; - auto buffer = create_2d_sequential_mbuffer(n); + auto buffer = create_sequential_mbuffer<R>(0, n); PRec target; target.data[0] = 120; target.data[1] = 120; - KNNQueryParms<PRec> p; + knn::Parms<PRec> p; p.k = 10; p.point = target; - auto state = KNNQuery<PRec>::get_buffer_query_state(buffer, &p); - auto result = KNNQuery<PRec>::buffer_query(buffer, state, &p); - KNNQuery<PRec>::delete_buffer_query_state(state); + { + auto bv = buffer->get_buffer_view(); + auto state = knn::Query<PRec, Shard>::get_buffer_query_state(&bv, &p); + auto result = knn::Query<PRec, Shard>::buffer_query(state, &p); + knn::Query<PRec, Shard>::delete_buffer_query_state(state); - std::sort(result.begin(), result.end()); - size_t start = 120 - 5; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(result[i].rec.data[0], start++); + std::sort(result.begin(), result.end()); + size_t start = 120 - 5; + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_eq(result[i].rec.data[0], start++); + } } delete buffer; @@ -138,19 +146,19 @@ START_TEST(t_buffer_query) START_TEST(t_knn_query) { size_t n = 1000; - auto buffer = create_2d_sequential_mbuffer(n); + auto buffer = create_sequential_mbuffer<R>(0, n); - auto vptree = VPTree<PRec>(buffer); + auto vptree = VPTree<PRec>(buffer->get_buffer_view()); - KNNQueryParms<PRec> p; + knn::Parms<PRec> p; for (size_t i=0; i<100; i++) { p.k = rand() % 150; p.point.data[0] = rand() % (n-p.k); p.point.data[1] = p.point.data[0]; - auto state = KNNQuery<PRec>::get_query_state(&vptree, &p); - auto results = KNNQuery<PRec>::query(&vptree, state, &p); - KNNQuery<PRec>::delete_query_state(state); + auto state = knn::Query<PRec, Shard>::get_query_state(&vptree, &p); + auto results = knn::Query<PRec, Shard>::query(&vptree, state, &p); + knn::Query<PRec, Shard>::delete_query_state(state); ck_assert_int_eq(results.size(), p.k); diff --git a/tests/wirs_tests.cpp b/tests/wirs_tests.cpp deleted file mode 100644 index a72f950..0000000 --- a/tests/wirs_tests.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* - * tests/wirs_tests.cpp - * - * Unit tests for WIRS (Augmented B+Tree) shard - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ - -#include "shard/WIRS.h" -#include "testing.h" - -#include <check.h> - -using namespace de; - -typedef WIRS<WRec> Shard; - -START_TEST(t_mbuffer_init) -{ - auto buffer = new MutableBuffer<WRec>(1024, 1024); - for (uint64_t i = 512; i > 0; i--) { - uint32_t v = i; - buffer->append({i,v, 1}); - } - - for (uint64_t i = 1; i <= 256; ++i) { - uint32_t v = i; - buffer->append({i, v, 1}, true); - } - - for (uint64_t i = 257; i <= 512; ++i) { - uint32_t v = i + 1; - buffer->append({i, v, 1}); - } - - Shard* shard = new Shard(buffer); - ck_assert_uint_eq(shard->get_record_count(), 512); - - delete buffer; - delete shard; -} - - -START_TEST(t_wirs_init) -{ - size_t n = 512; - auto mbuffer1 = create_test_mbuffer<WRec>(n); - auto mbuffer2 = create_test_mbuffer<WRec>(n); - auto mbuffer3 = create_test_mbuffer<WRec>(n); - - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); - - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); - - ck_assert_int_eq(shard4->get_record_count(), n * 3); - ck_assert_int_eq(shard4->get_tombstone_count(), 0); - - size_t total_cnt = 0; - size_t shard1_idx = 0; - size_t shard2_idx = 0; - size_t shard3_idx = 0; - - for (size_t i = 0; i < shard4->get_record_count(); ++i) { - auto rec1 = shard1->get_record_at(shard1_idx); - auto rec2 = shard2->get_record_at(shard2_idx); - auto rec3 = shard3->get_record_at(shard3_idx); - - auto cur_rec = shard4->get_record_at(i); - - if (shard1_idx < n && cur_rec->rec == rec1->rec) { - ++shard1_idx; - } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { - ++shard2_idx; - } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { - ++shard3_idx; - } else { - assert(false); - } - } - - delete mbuffer1; - delete mbuffer2; - delete mbuffer3; - - delete shard1; - delete shard2; - delete shard3; - delete shard4; -} - - -START_TEST(t_point_lookup) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto wirs = Shard(buffer); - - for (size_t i=0; i<n; i++) { - WRec r; - auto rec = (buffer->get_data() + i); - r.key = rec->rec.key; - r.value = rec->rec.value; - - auto result = wirs.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.key, r.key); - ck_assert_int_eq(result->rec.value, r.value); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_point_lookup_miss) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto wirs = Shard(buffer); - - for (size_t i=n + 100; i<2*n; i++) { - WRec r; - r.key = i; - r.value = i; - - auto result = wirs.point_lookup(r); - ck_assert_ptr_null(result); - } - - delete buffer; -} - - -START_TEST(t_full_cancelation) -{ - size_t n = 100; - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto buffer_ts = create_double_seq_mbuffer<WRec>(n, true); - - Shard* shard = new Shard(buffer); - Shard* shard_ts = new Shard(buffer_ts); - - ck_assert_int_eq(shard->get_record_count(), n); - ck_assert_int_eq(shard->get_tombstone_count(), 0); - ck_assert_int_eq(shard_ts->get_record_count(), n); - ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - - Shard* shards[] = {shard, shard_ts}; - - Shard* merged = new Shard(shards, 2); - - ck_assert_int_eq(merged->get_tombstone_count(), 0); - ck_assert_int_eq(merged->get_record_count(), 0); - - delete buffer; - delete buffer_ts; - delete shard; - delete shard_ts; - delete merged; -} -END_TEST - - -START_TEST(t_wirs_query) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - Shard* shard = new Shard(buffer); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wirs_query_parms<WRec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WIRSQuery<WRec>::get_query_state(shard, &parms); - ((WIRSState<WRec> *) state)->sample_size = k; - auto result = WIRSQuery<WRec>::query(shard, state, &parms); - - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WIRSQuery<WRec>::delete_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .05)); - - gsl_rng_free(parms.rng); - delete shard; - delete buffer; -} -END_TEST - - -START_TEST(t_wirs_query_merge) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - Shard* shard = new Shard(buffer); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wirs_query_parms<WRec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - std::vector<std::vector<Wrapped<WRec>>> results(2); - - for (size_t i=0; i<1000; i++) { - auto state1 = WIRSQuery<WRec>::get_query_state(shard, &parms); - ((WIRSState<WRec> *) state1)->sample_size = k; - results[0] = WIRSQuery<WRec>::query(shard, state1, &parms); - - auto state2 = WIRSQuery<WRec>::get_query_state(shard, &parms); - ((WIRSState<WRec> *) state2)->sample_size = k; - results[1] = WIRSQuery<WRec>::query(shard, state2, &parms); - - WIRSQuery<WRec>::delete_query_state(state1); - WIRSQuery<WRec>::delete_query_state(state2); - } - - auto merged = WIRSQuery<WRec>::merge(results, nullptr); - - ck_assert_int_eq(merged.size(), 2*k); - for (size_t i=0; i<merged.size(); i++) { - ck_assert_int_ge(merged[i].key, lower_key); - ck_assert_int_le(merged[i].key, upper_key); - } - - gsl_rng_free(parms.rng); - delete shard; - delete buffer; -} -END_TEST - - -START_TEST(t_wirs_buffer_query_scan) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wirs_query_parms<WRec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WIRSQuery<WRec, false>::get_buffer_query_state(buffer, &parms); - ((WIRSBufferState<WRec> *) state)->sample_size = k; - auto result = WIRSQuery<WRec, false>::buffer_query(buffer, state, &parms); - - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WIRSQuery<WRec, false>::delete_buffer_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .05)); - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -START_TEST(t_wirs_buffer_query_rejection) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wirs_query_parms<WRec> parms = {lower_key, upper_key, k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WIRSQuery<WRec>::get_buffer_query_state(buffer, &parms); - ((WIRSBufferState<WRec> *) state)->sample_size = k; - auto result = WIRSQuery<WRec>::buffer_query(buffer, state, &parms); - - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WIRSQuery<WRec>::delete_buffer_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .05)); - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -Suite *unit_testing() -{ - Suite *unit = suite_create("WIRS Shard Unit Testing"); - - TCase *create = tcase_create("de::WIRS constructor Testing"); - tcase_add_test(create, t_mbuffer_init); - tcase_add_test(create, t_wirs_init); - tcase_set_timeout(create, 100); - suite_add_tcase(unit, create); - - - TCase *tombstone = tcase_create("de:WIRS::tombstone cancellation Testing"); - tcase_add_test(tombstone, t_full_cancelation); - suite_add_tcase(unit, tombstone); - - - TCase *lookup = tcase_create("de:WIRS:point_lookup Testing"); - tcase_add_test(lookup, t_point_lookup); - tcase_add_test(lookup, t_point_lookup_miss); - suite_add_tcase(unit, lookup); - - - TCase *sampling = tcase_create("de:WIRS::WIRSQuery Testing"); - tcase_add_test(sampling, t_wirs_query); - tcase_add_test(sampling, t_wirs_query_merge); - tcase_add_test(sampling, t_wirs_buffer_query_rejection); - tcase_add_test(sampling, t_wirs_buffer_query_scan); - suite_add_tcase(unit, sampling); - - return unit; -} - - -int shard_unit_tests() -{ - int failed = 0; - Suite *unit = unit_testing(); - SRunner *unit_shardner = srunner_create(unit); - - srunner_run_all(unit_shardner, CK_NORMAL); - failed = srunner_ntests_failed(unit_shardner); - srunner_free(unit_shardner); - - return failed; -} - - -int main() -{ - int unit_failed = shard_unit_tests(); - - return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/wss_tests.cpp b/tests/wss_tests.cpp deleted file mode 100644 index cdc8001..0000000 --- a/tests/wss_tests.cpp +++ /dev/null @@ -1,390 +0,0 @@ -/* - * tests/wss_tests.cpp - * - * Unit tests for WSS (Augmented B+Tree) shard - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> - * - * All rights reserved. Published under the Modified BSD License. - * - */ - -#include "shard/WSS.h" -#include "testing.h" - -#include <check.h> - -using namespace de; - -typedef WSS<WRec> Shard; - -START_TEST(t_mbuffer_init) -{ - auto buffer = new MutableBuffer<WRec>(1024, 1024); - for (uint64_t i = 512; i > 0; i--) { - uint32_t v = i; - buffer->append({i,v, 1}); - } - - for (uint64_t i = 1; i <= 256; ++i) { - uint32_t v = i; - buffer->append({i, v, 1}, true); - } - - for (uint64_t i = 257; i <= 512; ++i) { - uint32_t v = i + 1; - buffer->append({i, v, 1}); - } - - Shard* shard = new Shard(buffer); - ck_assert_uint_eq(shard->get_record_count(), 512); - - delete buffer; - delete shard; -} - - -START_TEST(t_wss_init) -{ - size_t n = 512; - auto mbuffer1 = create_test_mbuffer<WRec>(n); - auto mbuffer2 = create_test_mbuffer<WRec>(n); - auto mbuffer3 = create_test_mbuffer<WRec>(n); - - auto shard1 = new Shard(mbuffer1); - auto shard2 = new Shard(mbuffer2); - auto shard3 = new Shard(mbuffer3); - - Shard* shards[3] = {shard1, shard2, shard3}; - auto shard4 = new Shard(shards, 3); - - ck_assert_int_eq(shard4->get_record_count(), n * 3); - ck_assert_int_eq(shard4->get_tombstone_count(), 0); - - size_t total_cnt = 0; - size_t shard1_idx = 0; - size_t shard2_idx = 0; - size_t shard3_idx = 0; - - for (size_t i = 0; i < shard4->get_record_count(); ++i) { - auto rec1 = shard1->get_record_at(shard1_idx); - auto rec2 = shard2->get_record_at(shard2_idx); - auto rec3 = shard3->get_record_at(shard3_idx); - - auto cur_rec = shard4->get_record_at(i); - - if (shard1_idx < n && cur_rec->rec == rec1->rec) { - ++shard1_idx; - } else if (shard2_idx < n && cur_rec->rec == rec2->rec) { - ++shard2_idx; - } else if (shard3_idx < n && cur_rec->rec == rec3->rec) { - ++shard3_idx; - } else { - assert(false); - } - } - - delete mbuffer1; - delete mbuffer2; - delete mbuffer3; - - delete shard1; - delete shard2; - delete shard3; - delete shard4; -} - - -START_TEST(t_point_lookup) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto wss = Shard(buffer); - - for (size_t i=0; i<n; i++) { - WRec r; - auto rec = (buffer->get_data() + i); - r.key = rec->rec.key; - r.value = rec->rec.value; - - auto result = wss.point_lookup(r); - ck_assert_ptr_nonnull(result); - ck_assert_int_eq(result->rec.key, r.key); - ck_assert_int_eq(result->rec.value, r.value); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_point_lookup_miss) -{ - size_t n = 10000; - - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto wss = Shard(buffer); - - for (size_t i=n + 100; i<2*n; i++) { - WRec r; - r.key = i; - r.value = i; - - auto result = wss.point_lookup(r); - ck_assert_ptr_null(result); - } - - delete buffer; -} - -START_TEST(t_full_cancelation) -{ - size_t n = 100; - auto buffer = create_double_seq_mbuffer<WRec>(n, false); - auto buffer_ts = create_double_seq_mbuffer<WRec>(n, true); - - Shard* shard = new Shard(buffer); - Shard* shard_ts = new Shard(buffer_ts); - - ck_assert_int_eq(shard->get_record_count(), n); - ck_assert_int_eq(shard->get_tombstone_count(), 0); - ck_assert_int_eq(shard_ts->get_record_count(), n); - ck_assert_int_eq(shard_ts->get_tombstone_count(), n); - - Shard* shards[] = {shard, shard_ts}; - - Shard* merged = new Shard(shards, 2); - - ck_assert_int_eq(merged->get_tombstone_count(), 0); - ck_assert_int_eq(merged->get_record_count(), 0); - - delete buffer; - delete buffer_ts; - delete shard; - delete shard_ts; - delete merged; -} -END_TEST - - -START_TEST(t_wss_query) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - Shard* shard = new Shard(buffer); - - size_t k = 1000; - - size_t cnt[3] = {0}; - wss_query_parms<WRec> parms = {k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WSSQuery<WRec>::get_query_state(shard, &parms); - ((WSSState<WRec> *) state)->sample_size = k; - auto result = WSSQuery<WRec>::query(shard, state, &parms); - - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WSSQuery<WRec>::delete_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .05)); - - gsl_rng_free(parms.rng); - delete shard; - delete buffer; -} -END_TEST - - -START_TEST(t_wss_query_merge) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - Shard* shard = new Shard(buffer); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wss_query_parms<WRec> parms = {k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - std::vector<std::vector<Wrapped<WRec>>> results(2); - - for (size_t i=0; i<1000; i++) { - auto state1 = WSSQuery<WRec>::get_query_state(shard, &parms); - ((WSSState<WRec> *) state1)->sample_size = k; - results[0] = WSSQuery<WRec>::query(shard, state1, &parms); - - auto state2 = WSSQuery<WRec>::get_query_state(shard, &parms); - ((WSSState<WRec> *) state2)->sample_size = k; - results[1] = WSSQuery<WRec>::query(shard, state2, &parms); - - WSSQuery<WRec>::delete_query_state(state1); - WSSQuery<WRec>::delete_query_state(state2); - } - - auto merged = WSSQuery<WRec>::merge(results, nullptr); - - ck_assert_int_eq(merged.size(), 2*k); - for (size_t i=0; i<merged.size(); i++) { - ck_assert_int_ge(merged[i].key, lower_key); - ck_assert_int_le(merged[i].key, upper_key); - } - - gsl_rng_free(parms.rng); - delete shard; - delete buffer; -} -END_TEST - - -START_TEST(t_wss_buffer_query_scan) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wss_query_parms<WRec> parms = {k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WSSQuery<WRec, false>::get_buffer_query_state(buffer, &parms); - ((WSSBufferState<WRec> *) state)->sample_size = k; - auto result = WSSQuery<WRec, false>::buffer_query(buffer, state, &parms); - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WSSQuery<WRec, false>::delete_buffer_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .05)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .05)); - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -START_TEST(t_wss_buffer_query_rejection) -{ - size_t n=1000; - auto buffer = create_weighted_mbuffer<WRec>(n); - - uint64_t lower_key = 0; - uint64_t upper_key = 5; - - size_t k = 1000; - - size_t cnt[3] = {0}; - wss_query_parms<WRec> parms = {k}; - parms.rng = gsl_rng_alloc(gsl_rng_mt19937); - - size_t total_samples = 0; - - for (size_t i=0; i<1000; i++) { - auto state = WSSQuery<WRec>::get_buffer_query_state(buffer, &parms); - ((WSSBufferState<WRec> *) state)->sample_size = k; - auto result = WSSQuery<WRec>::buffer_query(buffer, state, &parms); - - total_samples += result.size(); - - for (size_t j=0; j<result.size(); j++) { - cnt[result[j].rec.key - 1]++; - } - - WSSQuery<WRec>::delete_buffer_query_state(state); - } - - ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .1)); - ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .1)); - ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .1)); - - gsl_rng_free(parms.rng); - delete buffer; -} -END_TEST - - -Suite *unit_testing() -{ - Suite *unit = suite_create("WSS Shard Unit Testing"); - - TCase *create = tcase_create("de::WSS constructor Testing"); - tcase_add_test(create, t_mbuffer_init); - tcase_add_test(create, t_wss_init); - tcase_set_timeout(create, 100); - suite_add_tcase(unit, create); - - - TCase *tombstone = tcase_create("de:WSS::tombstone cancellation Testing"); - tcase_add_test(tombstone, t_full_cancelation); - suite_add_tcase(unit, tombstone); - - - TCase *lookup = tcase_create("de:WSS:point_lookup Testing"); - tcase_add_test(lookup, t_point_lookup); - tcase_add_test(lookup, t_point_lookup_miss); - suite_add_tcase(unit, lookup); - - - - TCase *sampling = tcase_create("de:WSS::WSSQuery Testing"); - tcase_add_test(sampling, t_wss_query); - tcase_add_test(sampling, t_wss_query_merge); - tcase_add_test(sampling, t_wss_buffer_query_rejection); - tcase_add_test(sampling, t_wss_buffer_query_scan); - suite_add_tcase(unit, sampling); - - return unit; -} - - -int shard_unit_tests() -{ - int failed = 0; - Suite *unit = unit_testing(); - SRunner *unit_shardner = srunner_create(unit); - - srunner_run_all(unit_shardner, CK_NORMAL); - failed = srunner_ntests_failed(unit_shardner); - srunner_free(unit_shardner); - - return failed; -} - - -int main() -{ - int unit_failed = shard_unit_tests(); - - return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} |