82 files changed, 4414 insertions, 4964 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a03c351..b185b0f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,10 @@
-cmake_minimum_required(VERSION 3.13)
+cmake_minimum_required(VERSION 3.22)
 
-#set(CMAKE_C_COMPILER clang)
-#set(CMAKE_CXX_COMPILER clang++)
+set(CMAKE_C_COMPILER gcc)
+set(CMAKE_CXX_COMPILER g++)
 
 set(CMAKE_CXX_STANDARD 20)
-#set(CMAKE_CXX_STANDARD_REQUIRED True)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
 
 set(namespace "de")
 project("Practical Dynamic Extension" VERSION 0.1.0)
@@ -21,6 +21,10 @@ set(CMAKE_CXX_FLAGS=-latomic -mcx16)
 
 add_compile_options(-Iinclude -Iexternal/PLEX/include -Iexternal -mcx16 -march=native) # -fconcepts-diagnostics-depth=3)
 
+find_package(OpenMP REQUIRED)
+add_compile_options(${OpenMP_CXX_FLAGS})
+link_libraries(OpenMP::OpenMP_CXX)
+
 if (BSD) 
     add_link_options(-L/usr/local/lib)
     add_compile_options(-I/usr/local/include)
@@ -28,7 +32,7 @@ if (BSD)
 endif()
 
 if (debug) 
-    add_compile_options(-g -O0)
+    add_compile_options(-g -O0 -Wall)
     if (!BSD)
         add_compile_options(-fsanitize=address)
         add_link_options(-fsanitize=address)
@@ -36,7 +40,7 @@ if (debug)
         add_link_options(-fsanitize=undefined)
     endif()
 else()
-    add_compile_options(-O3 -g)
+    add_compile_options(-O3 -g -Wall)
 endif()
 
 # Test build instructions
@@ -44,11 +48,6 @@ if (tests)
     set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/tests")
     file(MAKE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
 
-    add_executable(augbtree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/augbtree_tests.cpp)
-    target_link_libraries(augbtree_tests PUBLIC gsl check subunit pthread atomic)
-    target_link_options(augbtree_tests PUBLIC -mcx16)
-    target_include_directories(augbtree_tests PRIVATE include external/psudb-common/cpp/include external/ctpl)
-
     add_executable(internal_level_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/internal_level_tests.cpp)
     target_link_libraries(internal_level_tests PUBLIC gsl check subunit  pthread atomic)
     target_link_options(internal_level_tests PUBLIC -mcx16)
@@ -64,6 +63,11 @@ if (tests)
     target_link_options(rangequery_tests PUBLIC -mcx16)
     target_include_directories(rangequery_tests PRIVATE include external/psudb-common/cpp/include)
 
+    add_executable(irs_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/irs_tests.cpp)
+    target_link_libraries(irs_tests PUBLIC gsl check subunit  pthread atomic)
+    target_link_options(irs_tests PUBLIC -mcx16)
+    target_include_directories(irs_tests PRIVATE include external/psudb-common/cpp/include)
+
 
     add_executable(rangecount_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/rangecount_tests.cpp)
     target_link_libraries(rangecount_tests PUBLIC gsl check subunit  pthread atomic)
@@ -101,6 +105,11 @@ if (tests)
     target_link_options(de_bsm_tomb PUBLIC -mcx16)
     target_include_directories(de_bsm_tomb PRIVATE include external/PLEX/include external/psudb-common/cpp/include external)
 
+    add_executable(de_bsm_tag ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_bsm_tag.cpp)
+    target_link_libraries(de_bsm_tag PUBLIC gsl check subunit  pthread atomic)
+    target_link_options(de_bsm_tag PUBLIC -mcx16)
+    target_include_directories(de_bsm_tag PRIVATE include external/PLEX/include external/psudb-common/cpp/include external)
+
     add_executable(de_level_concurrent ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_level_concurrent.cpp)
     target_link_libraries(de_level_concurrent PUBLIC gsl check subunit  pthread atomic)
     target_link_options(de_level_concurrent PUBLIC -mcx16)
@@ -116,31 +125,30 @@ if (tests)
     target_link_options(memisam_tests PUBLIC -mcx16)
     target_include_directories(memisam_tests PRIVATE include external/psudb-common/cpp/include)
 
-    add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp)
-    target_link_libraries(triespline_tests PUBLIC gsl check subunit  pthread atomic)
-    target_link_options(triespline_tests PUBLIC -mcx16)
-    target_include_directories(triespline_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include)
-
     add_executable(alias_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/alias_tests.cpp)
     target_link_libraries(alias_tests PUBLIC gsl check subunit  pthread atomic)
     target_link_options(alias_tests PUBLIC -mcx16)
     target_include_directories(alias_tests PRIVATE include external/psudb-common/cpp/include)
 
-    # OpenBSD doesn't have OpenMP support, so don't build the PGM code on that
-    # platform.
+    add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp)
+    target_link_libraries(triespline_tests PUBLIC gsl check subunit  pthread atomic)
+    target_link_options(triespline_tests PUBLIC -mcx16)
+    target_include_directories(triespline_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include)
+
     add_executable(pgm_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/pgm_tests.cpp)
     target_link_libraries(pgm_tests PUBLIC gsl check subunit  pthread gomp atomic)
     target_include_directories(pgm_tests PRIVATE include external/PGM-index/include external/psudb-common/cpp/include)
     target_link_options(pgm_tests PUBLIC -mcx16)
-    target_compile_options(pgm_tests PUBLIC -fopenmp)
+    target_compile_options(pgm_tests PUBLIC)
 
-    # Triespline code doesn't build under OpenBSD either due to ambiguous function call;
+    # Triespline code doesn't build under OpenBSD due to ambiguous function call;
     # this is likely a difference between gcc and clang, rather than an OS thing 
-    add_executable(triespline_debug ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_debug.cpp)
-    target_link_libraries(triespline_debug PUBLIC gsl check subunit  pthread atomic)
-    target_link_options(triespline_debug PUBLIC -mcx16)
-    target_include_directories(triespline_debug PRIVATE include external/psudb-common/cpp/include external/PLEX/include)
-
+    if (NOT BSD) 
+        add_executable(triespline_debug ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_debug.cpp)
+        target_link_libraries(triespline_debug PUBLIC gsl check subunit  pthread atomic)
+        target_link_options(triespline_debug PUBLIC -mcx16)
+        target_include_directories(triespline_debug PRIVATE include external/psudb-common/cpp/include external/PLEX/include)
+    endif()
 
     add_executable(fst_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/fst_tests.cpp)
     target_link_libraries(fst_tests PUBLIC gsl check subunit  pthread atomic)
@@ -155,123 +163,123 @@ if (vldb_bench)
     target_link_libraries(irs_bench PUBLIC gsl pthread atomic)
     target_include_directories(irs_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(irs_bench PUBLIC -mcx16)
-    target_compile_options(irs_bench PUBLIC -fopenmp)
+    target_compile_options(irs_bench PUBLIC)
 
     add_executable(vptree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench.cpp)
     target_link_libraries(vptree_bench PUBLIC gsl pthread atomic)
     target_include_directories(vptree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(vptree_bench PUBLIC -mcx16)
-    target_compile_options(vptree_bench PUBLIC -fopenmp)
+    target_compile_options(vptree_bench PUBLIC)
 
 
     add_executable(vptree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench_alt.cpp)
     target_link_libraries(vptree_bench_alt PUBLIC gsl pthread atomic)
     target_include_directories(vptree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(vptree_bench_alt PUBLIC -mcx16)
-    target_compile_options(vptree_bench_alt PUBLIC -fopenmp)
+    target_compile_options(vptree_bench_alt PUBLIC)
 
 
     add_executable(vptree_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_parmsweep.cpp)
     target_link_libraries(vptree_parmsweep PUBLIC gsl pthread atomic)
     target_include_directories(vptree_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(vptree_parmsweep PUBLIC -mcx16)
-    target_compile_options(vptree_parmsweep PUBLIC -fopenmp)
+    target_compile_options(vptree_parmsweep PUBLIC)
 
     add_executable(vptree_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench.cpp)
     target_link_libraries(vptree_bsm_bench PUBLIC gsl pthread atomic)
     target_include_directories(vptree_bsm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(vptree_bsm_bench PUBLIC -mcx16)
-    target_compile_options(vptree_bsm_bench PUBLIC -fopenmp)
+    target_compile_options(vptree_bsm_bench PUBLIC)
 
     add_executable(vptree_bsm_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bsm_bench_alt.cpp)
     target_link_libraries(vptree_bsm_bench_alt PUBLIC gsl pthread atomic)
     target_include_directories(vptree_bsm_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(vptree_bsm_bench_alt PUBLIC -mcx16)
-    target_compile_options(vptree_bsm_bench_alt PUBLIC -fopenmp)
+    target_compile_options(vptree_bsm_bench_alt PUBLIC)
 
     add_executable(fst_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bench.cpp)
     target_link_libraries(fst_bench PUBLIC gsl pthread atomic)
     target_include_directories(fst_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(fst_bench PUBLIC -mcx16)
-    target_compile_options(fst_bench PUBLIC -fopenmp)
+    target_compile_options(fst_bench PUBLIC)
 
     add_executable(fst_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/fst_bsm_bench.cpp)
     target_link_libraries(fst_bsm_bench PUBLIC gsl pthread atomic)
     target_include_directories(fst_bsm_bench PRIVATE include external external/fast_succinct_trie/include external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(fst_bsm_bench PUBLIC -mcx16)
-    target_compile_options(fst_bsm_bench PUBLIC -fopenmp)
+    target_compile_options(fst_bsm_bench PUBLIC)
 
     add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bench.cpp)
     target_link_libraries(ts_bench PUBLIC gsl pthread atomic)
     target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(ts_bench PUBLIC -mcx16)
-    target_compile_options(ts_bench PUBLIC -fopenmp)
+    target_compile_options(ts_bench PUBLIC)
 
     add_executable(ts_parmsweep ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_parmsweep.cpp)
     target_link_libraries(ts_parmsweep PUBLIC gsl pthread atomic)
     target_include_directories(ts_parmsweep PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(ts_parmsweep PUBLIC -mcx16)
-    target_compile_options(ts_parmsweep PUBLIC -fopenmp)
+    target_compile_options(ts_parmsweep PUBLIC)
 
     add_executable(ts_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bsm_bench.cpp)
     target_link_libraries(ts_bsm_bench PUBLIC gsl pthread atomic)
     target_include_directories(ts_bsm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(ts_bsm_bench PUBLIC -mcx16)
-    target_compile_options(ts_bsm_bench PUBLIC -fopenmp)
+    target_compile_options(ts_bsm_bench PUBLIC)
 
     #add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp)
     #target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic)
     #target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     #target_link_options(ts_mdsp_bench PUBLIC -mcx16)
-    #target_compile_options(ts_mdsp_bench PUBLIC -fopenmp)
+    #target_compile_options(ts_mdsp_bench PUBLIC)
     
     add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/pgm_bench.cpp)
     target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp)
     target_include_directories(pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(pgm_bench PUBLIC -mcx16)
-    target_compile_options(pgm_bench PUBLIC -fopenmp)
+    target_compile_options(pgm_bench PUBLIC)
 
     add_executable(dynamic_pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/dynamic_pgm_bench.cpp)
     target_link_libraries(dynamic_pgm_bench PUBLIC gsl pthread atomic gomp)
     target_include_directories(dynamic_pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(dynamic_pgm_bench PUBLIC -mcx16)
-    target_compile_options(dynamic_pgm_bench PUBLIC -fopenmp)
+    target_compile_options(dynamic_pgm_bench PUBLIC)
 
     add_executable(btree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/btree_bench.cpp)
     target_link_libraries(btree_bench PUBLIC gsl pthread atomic gomp)
     target_include_directories(btree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(btree_bench PUBLIC -mcx16)
-    target_compile_options(btree_bench PUBLIC -fopenmp)
+    target_compile_options(btree_bench PUBLIC)
 
     add_executable(alex_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/alex_bench.cpp)
     target_link_libraries(alex_bench PUBLIC gsl )
     target_include_directories(alex_bench PRIVATE external/psudb-common/cpp/include external/alex/src/core/ benchmarks/include)
-    target_compile_options(alex_bench PUBLIC -fopenmp)
+    target_compile_options(alex_bench PUBLIC)
     set_property(TARGET alex_bench PROPERTY CXX_STANDARD 14)
 
     add_executable(mtree_bench_alt ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench_alt.cpp)
     target_link_libraries(mtree_bench_alt PUBLIC gsl pthread atomic gomp)
     target_include_directories(mtree_bench_alt PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(mtree_bench_alt PUBLIC -mcx16)
-    target_compile_options(mtree_bench_alt PUBLIC -fopenmp)
+    target_compile_options(mtree_bench_alt PUBLIC)
 
     add_executable(bigann_sample ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/bigann_sample.cpp)
     target_link_libraries(bigann_sample PUBLIC gsl pthread atomic gomp)
     target_include_directories(bigann_sample PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(bigann_sample PUBLIC -mcx16)
-    target_compile_options(bigann_sample PUBLIC -fopenmp)
+    target_compile_options(bigann_sample PUBLIC)
 
     add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp)
     target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp)
     target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(mtree_bench PUBLIC -mcx16)
-    target_compile_options(mtree_bench PUBLIC -fopenmp)
+    target_compile_options(mtree_bench PUBLIC)
 
     add_executable(thread_scaling_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/thread_scaling_bench.cpp)
     target_link_libraries(thread_scaling_bench PUBLIC gsl pthread atomic)
     target_include_directories(thread_scaling_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(thread_scaling_bench PUBLIC -mcx16)
-    target_compile_options(thread_scaling_bench PUBLIC -fopenmp)
+    target_compile_options(thread_scaling_bench PUBLIC)
 
 
     add_executable(btree_thread_scaling_bench
@@ -279,7 +287,7 @@ if (vldb_bench)
     target_link_libraries(btree_thread_scaling_bench PUBLIC gsl pthread atomic)
     target_include_directories(btree_thread_scaling_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(btree_thread_scaling_bench PUBLIC -mcx16)
-    target_compile_options(btree_thread_scaling_bench PUBLIC -fopenmp)
+    target_compile_options(btree_thread_scaling_bench PUBLIC)
 
 endif()
 
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
index 41eb18c..1a40a78 100644
--- a/benchmarks/include/file_util.h
+++ b/benchmarks/include/file_util.h
@@ -269,7 +269,7 @@ static std::vector<R> read_binary_vector_file(std::string &fname, size_t n) {
     return records;
 }
 
-static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
+[[maybe_unused]] static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) {
 
     std::fstream file;
     file.open(fname, std::ios::in);
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
index b805c08..797b0c5 100644
--- a/benchmarks/include/standard_benchmarks.h
+++ b/benchmarks/include/standard_benchmarks.h
@@ -18,25 +18,15 @@
 #include "psu-util/progress.h"
 #include "benchmark_types.h"
 #include "psu-util/bentley-saxe.h"
+#include "shard/ISAMTree.h"
 
 static size_t g_deleted_records = 0;
-static double delete_proportion = 0.05;
+static double delete_proportion = 0.5;
 
 static volatile size_t total = 0;
 
-template<typename DE, typename QP, typename R>
-static void run_queries(DE *extension, DE *ghost, std::vector<QP> &queries) {
-    for (size_t i=0; i<queries.size(); i++) {
-        std::vector<R> res = extension->query(&queries[i]);
-        std::vector<R> negres = ghost->query(&queries[i]);
-        auto result = res[0].first - negres[0].first;
-        total = result;
-    }
-}
-
-
-template<typename DE, typename QP, bool BSM=false>
-static void run_queries(DE *extension, std::vector<QP> &queries) {
+template<typename DE, typename Q, bool BSM=false>
+static void run_queries(DE *extension, std::vector<typename Q::Parameters> &queries) {
     for (size_t i=0; i<queries.size(); i++) {
         if constexpr (std::is_same_v<MTree, DE>) {
             std::vector<Word2VecRec> result;
@@ -72,7 +62,8 @@ static void run_queries(DE *extension, std::vector<QP> &queries) {
                 ++ptr;
             }
         } else {
-            auto res = extension->query(&queries[i]);
+            auto q = queries[i];
+            auto res = extension->query(std::move(q));
             if constexpr (!BSM) {
                 auto result = res.get();
                 #ifdef BENCH_PRINT_RESULTS
@@ -100,8 +91,8 @@ static void run_queries(DE *extension, std::vector<QP> &queries) {
     }
 }
 
-template <typename R>
-static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) {
+template <typename R, typename Q>
+static void run_btree_queries(BenchBTree *btree, std::vector<typename Q::Parameters> &queries) {
     std::vector<int64_t> sample_set;
     sample_set.reserve(queries[0].sample_size);
 
@@ -111,18 +102,16 @@ static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>>
 }
 
 
-template<typename S, typename QP, typename Q>
-static void run_static_queries(S *shard, std::vector<QP> &queries) {
+template<typename S, typename Q>
+static void run_static_queries(S *shard, std::vector<typename Q::Parameters> &queries) {
     for (size_t i=0; i<queries.size(); i++) {
         auto q = &queries[i];
 
-        auto state = Q::get_query_state(shard, q);
-
-        std::vector<void*> shards = {shard};
-        std::vector<void*> states = {state};
+        std::vector<S *> shards = {shard};
+        std::vector<typename Q::LocalQuery*> local_queries = {Q::local_preproc(shard, q)};
 
-        Q::process_query_states(q, states, nullptr);
-        auto res = Q::query(shard, state, q);
+        Q::distribute_query(q, local_queries, nullptr);
+        auto res = Q::local_query(shard, local_queries[0]); 
 
         #ifdef BENCH_PRINT_RESULTS
             fprintf(stdout, "\n\n");
@@ -136,55 +125,12 @@ static void run_static_queries(S *shard, std::vector<QP> &queries) {
     }
 }
 
-
-/*
- * Insert records into a standard Bentley-Saxe extension. Deletes are not
- * supported.
- */
-template<typename DS, typename R, bool MDSP=false>
-static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, 
-                           size_t start, size_t stop, std::vector<R> &records) {
-
-    psudb::progress_update(0, "Insert Progress");
-    for (size_t i=start; i<stop; i++) {
-        extension->insert(records[i]);
-    }
-
-    psudb::progress_update(1, "Insert Progress");
-}
-
-
-template<typename DS, typename R, bool MDSP=false>
-static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension, 
-                           psudb::bsm::BentleySaxe<R, DS, MDSP> *ghost, 
-                           size_t start, size_t stop, std::vector<R> &records,
-                           std::vector<size_t> &to_delete, size_t &delete_idx, 
-                           gsl_rng *rng) {
-
-    psudb::progress_update(0, "Insert Progress");
-    size_t reccnt = 0;
-    for (size_t i=start; i<stop; i++) {
-
-        extension->insert(records[i]);
-
-        if (gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
-            ghost->insert(records[to_delete[delete_idx]]);
-            delete_idx++;
-            g_deleted_records++;
-        }
-
-    }
-
-}
-
-
 template<typename DE, typename R>
 static void insert_records(DE *structure, size_t start, size_t stop, 
                            std::vector<R> &records, std::vector<size_t> &to_delete, 
                            size_t &delete_idx, bool delete_records, gsl_rng *rng) {
 
     psudb::progress_update(0, "Insert Progress");
-    size_t reccnt = 0;
     for (size_t i=start; i<stop; i++) {
 
         if constexpr (std::is_same_v<BenchBTree, DE>) {
@@ -302,8 +248,8 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
     return continue_benchmark;
 }
 
-template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true>
-static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) {
+template <typename DE, typename Q, bool PROGRESS=true>
+static bool query_latency_bench(DE &de_index, std::vector<typename Q::Parameters> queries, size_t trial_cnt=1) {
     char progbuf[25];
     if constexpr (PROGRESS) {
         sprintf(progbuf, "querying:");
@@ -339,8 +285,8 @@ static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t tr
 }
 
 
-template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true>
-static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) {
+template <typename Shard, typename Q, bool PROGRESS=true>
+static bool static_latency_bench(Shard *shard, std::vector<typename Q::Parameters> queries, size_t trial_cnt=100) {
     char progbuf[25];
     if constexpr (PROGRESS) {
         sprintf(progbuf, "querying:");
@@ -354,15 +300,15 @@ static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t t
             psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
         }
 
-        std::vector<void *> states(1);
+        std::vector<typename Q::LocalQuery*> local_queries(1);
 
         auto start = std::chrono::high_resolution_clock::now();
         for (size_t j=0; j<queries.size(); j++) {
-            states[0] = Q::get_query_state(shard, &queries[j]);
-            Q::process_query_states(&queries[j], states, nullptr);
-            auto res = Q::query(shard, states[0], &queries[j]);
+            local_queries[0] = Q::local_preproc(shard, &queries[j]);
+            Q::distribute_query(&queries[j], local_queries, nullptr);
+            auto res = Q::local_query(shard, local_queries[0]); 
             total_results += res.size();
-            Q::delete_query_state(states[0]);
+            delete local_queries[0];
         }
         auto stop = std::chrono::high_resolution_clock::now();
 
diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp
index ba687f3..636f576 100644
--- a/benchmarks/vldb/alex_bench.cpp
+++ b/benchmarks/vldb/alex_bench.cpp
@@ -33,7 +33,6 @@ static void insert_records(Alex *structure, size_t start, size_t stop,
                            size_t &delete_idx, bool delete_records, gsl_rng *rng) {
 
     psudb::progress_update(0, "Insert Progress");
-    size_t reccnt = 0;
     for (size_t i=start; i<stop; i++) {
         structure->insert(records[i].key, records[i].value);
 
diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp
index fa72831..dc5142a 100644
--- a/benchmarks/vldb/btree_bench.cpp
+++ b/benchmarks/vldb/btree_bench.cpp
@@ -19,8 +19,8 @@
 typedef btree_record<int64_t, int64_t> Rec;
 
 typedef de::ISAMTree<Rec> Shard;
-typedef de::irs::Query<Rec, Shard> Q;
-typedef de::irs::Parms<Rec> QP;
+typedef de::irs::Query<Shard> Q;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -71,7 +71,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_btree_queries<Rec>(&btree, queries);
+    run_btree_queries<Rec, Q>(&btree, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/btree_thread_scaling_bench.cpp b/benchmarks/vldb/btree_thread_scaling_bench.cpp
index 557e966..d33a1f8 100644
--- a/benchmarks/vldb/btree_thread_scaling_bench.cpp
+++ b/benchmarks/vldb/btree_thread_scaling_bench.cpp
@@ -7,6 +7,7 @@
 #include <thread>
 
 #include "query/irs.h"
+#include "shard/ISAMTree.h"
 #include "benchmark_types.h"
 #include "file_util.h"
 #include <mutex>
@@ -17,7 +18,10 @@
 
 
 typedef btree_record<int64_t, int64_t> Rec;
-typedef de::irs::Parms<Rec> QP;
+
+typedef de::ISAMTree<Rec> Shard;
+typedef de::irs::Query<Shard> Q;
+typedef Q::Parameters QP;
 
 std::atomic<bool> inserts_done = false;
 
@@ -47,7 +51,6 @@ void query_thread(BenchBTree *tree, std::vector<QP> *queries) {
 }
 
 void insert_thread(BenchBTree *tree, size_t start, std::vector<Rec> *records) {
-    size_t reccnt = 0;
     for (size_t i=start; i<records->size(); i++) {
         btree_record<int64_t, int64_t> r;
         r.key = (*records)[i].key;
diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp
index 15b130f..9206e40 100644
--- a/benchmarks/vldb/dynamic_pgm_bench.cpp
+++ b/benchmarks/vldb/dynamic_pgm_bench.cpp
@@ -14,9 +14,11 @@
 
 #include "psu-util/timer.h"
 
-
 typedef de::Record<uint64_t, uint64_t> Rec;
-typedef de::rc::Parms<Rec> QP;
+
+typedef de::ISAMTree<Rec> Shard;
+typedef de::rc::Query<Shard> Q;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -62,7 +64,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<PGM, QP>(&pgm, queries);
+    run_queries<PGM, Q>(&pgm, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp
index 276a922..e4b5bf6 100644
--- a/benchmarks/vldb/fst_bench.cpp
+++ b/benchmarks/vldb/fst_bench.cpp
@@ -21,9 +21,9 @@
 
 typedef de::Record<const char *, uint64_t> Rec;
 typedef de::FSTrie<Rec> Shard;
-typedef de::pl::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::pl::Parms<Rec> QP;
+typedef de::pl::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile\n", progname);
@@ -75,7 +75,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -83,7 +83,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp
index 15a441a..b0be115 100644
--- a/benchmarks/vldb/fst_bsm_bench.cpp
+++ b/benchmarks/vldb/fst_bsm_bench.cpp
@@ -21,9 +21,9 @@
 
 typedef de::Record<const char *, uint64_t> Rec;
 typedef de::FSTrie<Rec> Shard;
-typedef de::pl::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::pl::Parms<Rec> QP;
+typedef de::pl::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile\n", progname);
@@ -75,7 +75,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -83,7 +83,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp
index e062e80..a772326 100644
--- a/benchmarks/vldb/irs_bench.cpp
+++ b/benchmarks/vldb/irs_bench.cpp
@@ -18,9 +18,9 @@
 
 typedef de::Record<uint64_t, uint64_t> Rec;
 typedef de::ISAMTree<Rec> Shard;
-typedef de::irs::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::irs::Parms<Rec> QP;
+typedef de::irs::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -72,7 +72,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -80,7 +80,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp
index cc2f41f..ed107b5 100644
--- a/benchmarks/vldb/mtree_bench.cpp
+++ b/benchmarks/vldb/mtree_bench.cpp
@@ -5,6 +5,7 @@
 #define ENABLE_TIMER
 
 #include "query/knn.h"
+#include "shard/VPTree.h"
 #include "file_util.h"
 #include "standard_benchmarks.h"
 
@@ -14,7 +15,9 @@
 
 
 typedef Word2VecRec Rec;
-typedef de::knn::Parms<Rec> QP;
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Shard> Q;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -66,7 +69,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<MTree, QP>(mtree, queries);
+    run_queries<MTree, Q>(mtree, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp
index 50c6117..c5ab283 100644
--- a/benchmarks/vldb/mtree_bench_alt.cpp
+++ b/benchmarks/vldb/mtree_bench_alt.cpp
@@ -5,6 +5,7 @@
 #define ENABLE_TIMER
 
 #include "query/knn.h"
+#include "shard/VPTree.h"
 #include "file_util.h"
 #include "standard_benchmarks.h"
 
@@ -14,7 +15,9 @@
 
 
 typedef ANNRec Rec;
-typedef de::knn::Parms<Rec> QP;
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Shard> Q;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -66,7 +69,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<MTree_alt, QP>(mtree, queries);
+    run_queries<MTree_alt, Q>(mtree, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp
index cec95df..3b4340b 100644
--- a/benchmarks/vldb/pgm_bench.cpp
+++ b/benchmarks/vldb/pgm_bench.cpp
@@ -20,9 +20,9 @@
 
 typedef de::Record<uint64_t, uint64_t> Rec;
 typedef de::PGM<Rec> Shard;
-typedef de::rc::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::rc::Parms<Rec> QP;
+typedef de::rc::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -77,7 +77,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/thread_scaling_bench.cpp b/benchmarks/vldb/thread_scaling_bench.cpp
index b679e92..3b9311b 100644
--- a/benchmarks/vldb/thread_scaling_bench.cpp
+++ b/benchmarks/vldb/thread_scaling_bench.cpp
@@ -20,9 +20,9 @@
 
 typedef de::Record<int64_t, int64_t> Rec;
 typedef de::ISAMTree<Rec> ISAM;
-typedef de::irs::Query<Rec, ISAM> Q;
-typedef de::DynamicExtension<Rec, ISAM, Q> Ext;
-typedef de::irs::Parms<Rec> QP;
+typedef de::irs::Query<ISAM> Q;
+typedef de::DynamicExtension<ISAM, Q> Ext;
+typedef Q::Parameters QP;
 
 std::atomic<bool> inserts_done = false;
 
@@ -39,7 +39,7 @@ void query_thread(Ext *extension, std::vector<QP> *queries) {
         q.rng = rng;
         q.sample_size = 1000;
 
-        auto res = extension->query(&q);
+        auto res = extension->query(std::move(q));
         auto r = res.get();
         total += r.size();
         nanosleep(&delay, nullptr);
diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp
index 81a430a..1bc75b6 100644
--- a/benchmarks/vldb/ts_bench.cpp
+++ b/benchmarks/vldb/ts_bench.cpp
@@ -21,9 +21,9 @@
 
 typedef de::Record<uint64_t, uint64_t> Rec;
 typedef de::TrieSpline<Rec> Shard;
-typedef de::rc::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::rc::Parms<Rec> QP;
+typedef de::rc::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -70,7 +70,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp
index 4511350..5bcfb5d 100644
--- a/benchmarks/vldb/ts_bsm_bench.cpp
+++ b/benchmarks/vldb/ts_bsm_bench.cpp
@@ -21,9 +21,9 @@
 
 typedef de::Record<uint64_t, uint64_t> Rec;
 typedef de::TrieSpline<Rec> Shard;
-typedef de::rc::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::rc::Parms<Rec> QP;
+typedef de::rc::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -70,7 +70,7 @@ int main(int argc, char **argv) {
     size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
     auto shard = extension->create_static_structure();
 
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp
deleted file mode 100644
index cc0cd99..0000000
--- a/benchmarks/vldb/ts_mdsp_bench.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *
- */
-
-#define ENABLE_TIMER
-
-#include <thread>
-
-#include "triespline_bsm.h"
-#include "psu-util/bentley-saxe.h"
-#include "framework/interface/Record.h"
-#include "file_util.h"
-#include "query/rangecount.h"
-#include "psu-util/timer.h"
-#include "standard_benchmarks.h"
-
-typedef std::pair<uint64_t, uint64_t> Rec;
-typedef de::Record<uint64_t, uint64_t> FRec;
-
-typedef BSMTrieSpline<uint64_t, uint64_t> Shard;
-typedef de::rc::Parms<FRec> QP;
-typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext;
-
-void usage(char *progname) {
-    fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
-}
-
-int main(int argc, char **argv) {
-
-    if (argc < 4) {
-        usage(argv[0]);
-        exit(EXIT_FAILURE);
-    }
-
-    size_t n = atol(argv[1]);
-    std::string d_fname = std::string(argv[2]);
-    std::string q_fname = std::string(argv[3]);
-
-    auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>();
-    gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
-    
-    auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n);
-    auto queries = read_range_queries<QP>(q_fname, .0001);
-
-    /* warmup structure w/ 10% of records */
-    size_t warmup = .1 * n;
-    insert_records<Shard, Rec, true>(extension, 0, warmup, data);
-
-    TIMER_INIT();
-
-    TIMER_START();
-    insert_records<Shard, Rec, true>(extension, warmup, data.size(), data);
-    TIMER_STOP();
-
-    auto insert_latency = TIMER_RESULT();
-    size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
-
-    TIMER_START();
-    run_queries<Ext, QP, true>(extension, queries);
-    TIMER_STOP();
-
-    auto query_latency = TIMER_RESULT() / queries.size();
-
-    fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
-
-    gsl_rng_free(rng);
-    delete extension;
-    fflush(stderr);
-}
-
diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp
index 2c9412a..a9203ab 100644
--- a/benchmarks/vldb/ts_parmsweep.cpp
+++ b/benchmarks/vldb/ts_parmsweep.cpp
@@ -18,10 +18,10 @@
 
 typedef de::Record<uint64_t, uint64_t> Rec;
 typedef de::TrieSpline<Rec> Shard;
-typedef de::rc::Query<Rec, Shard, true> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2;
-typedef de::rc::Parms<Rec> QP;
+typedef de::rc::Query<Shard, true> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
             size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
             TIMER_START();
-            run_queries<Ext, QP>(extension, queries);
+            run_queries<Ext, Q>(extension, queries);
             TIMER_STOP();
 
             auto query_latency = TIMER_RESULT() / queries.size();
@@ -106,7 +106,7 @@ int main(int argc, char **argv) {
             size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
             TIMER_START();
-            run_queries<Ext2, QP>(extension, queries);
+            run_queries<Ext2, Q>(extension, queries);
             TIMER_STOP();
 
             auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp
index 0b98a52..417e3af 100644
--- a/benchmarks/vldb/vptree_bench.cpp
+++ b/benchmarks/vldb/vptree_bench.cpp
@@ -19,9 +19,9 @@
 typedef Word2VecRec Rec;
 
 typedef de::VPTree<Rec, 100, true> Shard;
-typedef de::knn::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::knn::Parms<Rec> QP;
+typedef de::knn::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -75,7 +75,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -84,7 +84,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "Running Static query tests\n\n");
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp
index b09ee7d..5279f68 100644
--- a/benchmarks/vldb/vptree_bench_alt.cpp
+++ b/benchmarks/vldb/vptree_bench_alt.cpp
@@ -19,9 +19,9 @@
 typedef ANNRec Rec;
 
 typedef de::VPTree<Rec, 100, true> Shard;
-typedef de::knn::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::knn::Parms<Rec> QP;
+typedef de::knn::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -75,7 +75,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -84,7 +84,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "Running Static query tests\n\n");
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard, Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp
index 4a7fcb6..d0d963c 100644
--- a/benchmarks/vldb/vptree_bsm_bench.cpp
+++ b/benchmarks/vldb/vptree_bsm_bench.cpp
@@ -18,10 +18,11 @@
 
 typedef Word2VecRec Rec;
 
+
 typedef de::VPTree<Rec, 100, true> Shard;
-typedef de::knn::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::knn::Parms<Rec> QP;
+typedef de::knn::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -75,7 +76,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
@@ -84,7 +85,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "Running Static query tests\n\n");
     TIMER_START();
-    run_static_queries<Shard, QP, Q>(shard, queries);
+    run_static_queries<Shard,Q>(shard, queries);
     TIMER_STOP();
 
     auto static_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
index 63baf8b..b4956a2 100644
--- a/benchmarks/vldb/vptree_bsm_bench_alt.cpp
+++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp
@@ -19,9 +19,9 @@
 typedef ANNRec Rec;
 
 typedef de::VPTree<Rec, 100, true> Shard;
-typedef de::knn::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::knn::Parms<Rec> QP;
+typedef de::knn::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -75,7 +75,7 @@ int main(int argc, char **argv) {
 
     fprintf(stderr, "[I] Running Query Benchmark\n");
     TIMER_START();
-    run_queries<Ext, QP>(extension, queries);
+    run_queries<Ext, Q>(extension, queries);
     TIMER_STOP();
 
     auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vldb/vptree_parmsweep.cpp b/benchmarks/vldb/vptree_parmsweep.cpp
index 2cbd521..5e496d4 100644
--- a/benchmarks/vldb/vptree_parmsweep.cpp
+++ b/benchmarks/vldb/vptree_parmsweep.cpp
@@ -19,10 +19,10 @@
 typedef Word2VecRec Rec;
 
 typedef de::VPTree<Rec, 100, true> Shard;
-typedef de::knn::Query<Rec, Shard> Q;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
-typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2;
-typedef de::knn::Parms<Rec> QP;
+typedef de::knn::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::DynamicExtension<Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2;
+typedef Q::Parameters QP;
 
 void usage(char *progname) {
     fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
             size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
             TIMER_START();
-            run_queries<Ext, QP>(extension, queries);
+            run_queries<Ext, Q>(extension, queries);
             TIMER_STOP();
 
             auto query_latency = TIMER_RESULT() / queries.size();
@@ -111,7 +111,7 @@ int main(int argc, char **argv) {
             size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
 
             TIMER_START();
-            run_queries<Ext2, QP>(extension, queries);
+            run_queries<Ext2, Q>(extension, queries);
             TIMER_STOP();
 
             auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/external/psudb-common b/external/psudb-common
-Subproject de975098c12a83e996923a11f6b525ddb1985ae
+Subproject 3be9caf90a12b6ac3afd4437ddd62167ba6d28b
diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h
index e2e2784..16cbb0e 100644
--- a/include/framework/DynamicExtension.h
+++ b/include/framework/DynamicExtension.h
@@ -1,8 +1,8 @@
 /*
  * include/framework/DynamicExtension.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -14,766 +14,782 @@
 #include <vector>
 
 #include "framework/interface/Scheduler.h"
-#include "framework/scheduling/FIFOScheduler.h"
 #include "framework/scheduling/SerialScheduler.h"
 
-#include "framework/structure/MutableBuffer.h"
-#include "framework/interface/Record.h"
 #include "framework/structure/ExtensionStructure.h"
+#include "framework/structure/MutableBuffer.h"
 
-#include "framework/util/Configuration.h"
 #include "framework/scheduling/Epoch.h"
+#include "framework/util/Configuration.h"
 
 namespace de {
 
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING, 
-          DeletePolicy D=DeletePolicy::TAGGING, SchedulerInterface SCHED=FIFOScheduler>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType,
+          LayoutPolicy L = LayoutPolicy::TEIRING,
+          DeletePolicy D = DeletePolicy::TAGGING,
+          SchedulerInterface SchedType = SerialScheduler>
 class DynamicExtension {
-    typedef S Shard;
-    typedef MutableBuffer<R> Buffer;
-    typedef ExtensionStructure<R, S, Q, L> Structure;
-    typedef Epoch<R, S, Q, L> _Epoch;
-    typedef BufferView<R> BufView;
-
-    static constexpr size_t QUERY = 1;
-    static constexpr size_t RECONSTRUCTION = 2;
-
-    struct epoch_ptr {
-        _Epoch *epoch;
-        size_t refcnt;
-    };
-
+  /* for unit testing purposes */
 public:
-    DynamicExtension(size_t buffer_lwm, size_t buffer_hwm, size_t scale_factor, size_t memory_budget=0, 
-                     size_t thread_cnt=16)
-        : m_scale_factor(scale_factor)
-        , m_max_delete_prop(1)
-        , m_sched(memory_budget, thread_cnt)
-        , m_buffer(new Buffer(buffer_lwm, buffer_hwm))
-        , m_core_cnt(thread_cnt)
-        , m_next_core(0)
-        , m_epoch_cnt(0)
-    {
-        if constexpr (L == LayoutPolicy::BSM) {
-            assert(scale_factor == 2);
-        }
-
-        auto vers = new Structure(buffer_hwm, m_scale_factor, m_max_delete_prop);
-        m_current_epoch.store({new _Epoch(0, vers, m_buffer, 0), 0});
-        m_previous_epoch.store({nullptr, 0});
-        m_next_epoch.store({nullptr, 0});
-    }
-
-    ~DynamicExtension() {
-
-        /* let any in-flight epoch transition finish */
-        await_next_epoch();
-
-        /* shutdown the scheduler */
-        m_sched.shutdown();
-
-        /* delete all held resources */
-        delete m_next_epoch.load().epoch;
-        delete m_current_epoch.load().epoch;
-        delete m_previous_epoch.load().epoch;
-
-        delete m_buffer;
-    }
-
-    /*
-     * Insert the record `rec` into the index. If the buffer is full and
-     * the framework is blocking on an epoch transition, this call may fail
-     * and return 0. In this case, retry the call again later. If
-     * successful, 1 will be returned. The record will be immediately
-     * visible in the buffer upon the successful return of this function.
-     */
-    int insert(const R &rec) {
-        return internal_append(rec, false);
-    }
-
-    /*
-     * Erase the record `rec` from the index. It is assumed that `rec`
-     * currently exists--no special checks are made for correctness here.
-     * The behavior if this function will differ depending on if tombstone
-     * or tagged deletes are used.
-     *
-     * Tombstone deletes - inserts a tombstone record for `rec`. This *may*
-     * return 0 and fail if the buffer is full and the framework is
-     * blocking on an epoch transition. In this case, repeat the call
-     * later. 1 will be returned when the tombstone is successfully
-     * inserted.
-     *
-     * Tagging deletes - Does a point lookup for the record across the
-     * entire structure, and sets its delete bit when found. Returns 1 if
-     * the record is found and marked, and 0 if it was not (i.e., if it
-     * isn't present in the index). 
-     */
-    int erase(const R &rec) {
-        // FIXME: delete tagging will require a lot of extra work to get
-        //        operating "correctly" in a concurrent environment.
- 
-        /* 
-         * Get a view on the buffer *first*. This will ensure a stronger 
-         * ordering than simply accessing the buffer directly, but is
-         * not *strictly* necessary.
-         */
-        if constexpr (D == DeletePolicy::TAGGING) {
-            static_assert(std::same_as<SCHED, SerialScheduler>, "Tagging is only supported in single-threaded operation");
-
-            auto view = m_buffer->get_buffer_view();
-
-            auto epoch = get_active_epoch();
-            if (epoch->get_structure()->tagged_delete(rec)) {
-                end_job(epoch);
-                return 1;
-            }
-
-            end_job(epoch);
-
-            /*
-             * the buffer will take the longest amount of time, and 
-             * probably has the lowest probability of having the record,
-             * so we'll check it last.
-             */
-            return view.delete_record(rec);
-        }
+    LayoutPolicy Layout = L;
 
-        /*
-         * If tagging isn't used, then delete using a tombstone
-         */
-        return internal_append(rec, true);
-    }
-
-    /*
-     * Execute the query with parameters `parms` and return a future. This
-     * future can be used to access a vector containing the results of the
-     * query.
-     *
-     * The behavior of this function is undefined if `parms` is not a
-     * pointer to a valid query parameter object for the query type used as
-     * a template parameter to construct the framework.
-     */
-    std::future<std::vector<R>> query(void *parms) {
-        return schedule_query(parms);
-    }
-
-    /*
-     * Returns the number of records (included tagged records and
-     * tombstones) currently within the framework.
-     */
-    size_t get_record_count() {
-        auto epoch = get_active_epoch();
-        auto t =  epoch->get_buffer().get_record_count() + epoch->get_structure()->get_record_count();
-        end_job(epoch);
+private:
+  /* convenience typedefs for commonly used types within the class */
+  typedef typename ShardType::RECORD RecordType;
+  typedef MutableBuffer<RecordType> Buffer;
+  typedef ExtensionStructure<ShardType, QueryType, L> Structure;
+  typedef Epoch<ShardType, QueryType, L> _Epoch;
+  typedef BufferView<RecordType> BufView;
+
+  typedef typename QueryType::Parameters Parameters;
+  typedef typename QueryType::LocalQuery LocalQuery;
+  typedef typename QueryType::LocalQueryBuffer BufferQuery;
+  typedef typename QueryType::LocalResultType LocalResult;
+  typedef typename QueryType::ResultType QueryResult;
+  
+
+  static constexpr size_t QUERY = 1;
+  static constexpr size_t RECONSTRUCTION = 2;
+
+  struct epoch_ptr {
+    _Epoch *epoch;
+    size_t refcnt;
+  };
 
-        return t;
+public:
+  /**
+   * Create a new Dynamized version of a data structure, supporting
+   * inserts and, possibly, deletes. The following parameters are used
+   * for configuration of the structure,
+   * @param buffer_low_watermark The number of records that can be 
+   *        inserted before a buffer flush is initiated
+   *
+   * @param buffer_high_watermark The maximum buffer capacity, inserts 
+   *        will begin to fail once this number is reached, until the 
+   *        buffer flush has completed. Has no effect in single-threaded 
+   *        operation
+   *
+   * @param scale_factor The rate at which the capacity of levels 
+   *        grows; should be at least 2 for reasonable performance
+   *
+   * @param memory_budget Unused at this time
+   *
+   * @param thread_cnt The maximum number of threads available to the
+   *        framework's scheduler for use in answering queries and 
+   *        performing compactions and flushes, etc.
+   */
+  DynamicExtension(size_t buffer_low_watermark, size_t buffer_high_watermark,
+                   size_t scale_factor, size_t memory_budget = 0,
+                   size_t thread_cnt = 16)
+      : m_scale_factor(scale_factor), m_max_delete_prop(1),
+        m_sched(memory_budget, thread_cnt),
+        m_buffer(new Buffer(buffer_low_watermark, buffer_high_watermark)),
+        m_core_cnt(thread_cnt), m_next_core(0), m_epoch_cnt(0) {
+    if constexpr (L == LayoutPolicy::BSM) {
+      assert(scale_factor == 2);
     }
 
-    /*
-     * Returns the number of tombstone records currently within the
-     * framework. This function can be called when tagged deletes are used,
-     * but will always return 0 in that case.
-     */
-    size_t get_tombstone_count() {
-        auto epoch = get_active_epoch();
-        auto t = epoch->get_buffer().get_tombstone_count() + epoch->get_structure()->get_tombstone_count();
-        end_job(epoch);
-
-        return t;
-    }
+    auto vers =
+        new Structure(buffer_high_watermark, m_scale_factor, m_max_delete_prop);
+    m_current_epoch.store({new _Epoch(0, vers, m_buffer, 0), 0});
+    m_previous_epoch.store({nullptr, 0});
+    m_next_epoch.store({nullptr, 0});
+  }
+
+  /**
+   *  Destructor for DynamicExtension. Will block until the completion of
+   *  any outstanding epoch transition, shut down the scheduler, and free
+   *  all currently allocated shards, buffers, etc., by calling their
+   *  destructors.
+   */
+  ~DynamicExtension() {
+
+    /* let any in-flight epoch transition finish */
+    await_next_epoch();
+
+    /* shutdown the scheduler */
+    m_sched.shutdown();
+
+    /* delete all held resources */
+    delete m_next_epoch.load().epoch;
+    delete m_current_epoch.load().epoch;
+    delete m_previous_epoch.load().epoch;
+
+    delete m_buffer;
+  }
+
+  /**
+   *  Inserts a record into the index. Returns 1 if the insert succeeds,
+   *  and 0 if it fails. Inserts may fail if the DynamicExtension's buffer
+   *  has reached the high water mark; in this case, the insert should be
+   *  retried when the buffer has flushed. The record will be immediately
+   *  visible inside the index upon the return of this function.
+   *
+   *  @param rec The record to be inserted
+   *
+   *  @return 1 on success, 0 on failure (in which case the insert should
+   *          be retried)
+   */
+  int insert(const RecordType &rec) { return internal_append(rec, false); }
+
+  /**
+   *  Erases a record from the index, according to the DeletePolicy 
+   *  template parameter. Returns 1 on success and 0 on failure. The
+   *  equality comparison operator of RecordType is used to identify
+   *  the record to be deleted. 
+   * 
+   *  Deletes behave differently, depending on the DeletionPolicy. For
+   *  Tombstone deletes, a tombstone record will be inserted into the
+   *  index. The presence of the deleted record is not checked first, so
+   *  deleting a record that does not exist will result in an unnecessary
+   *  tombstone record being written. 
+   *
+   *  Deletes using Tagging will perform a point lookup for the record to
+   *  be removed, and mark it as deleted in its header. 
+   *
+   *  @param rec The record to be deleted. The argument to this function 
+   *         should compare equal to the record to be deleted.
+   *
+   *  @return 1 on success, and 0 on failure. For tombstone deletes, a 
+   *          failure will occur if the insert fails due to the buffer 
+   *          being full, and can be retried. For tagging deletes, a 
+   *          failure means that hte record to be deleted could not be
+   *          found in the index, and should *not* be retried.
+   */
+  int erase(const RecordType &rec) {
+    // FIXME: delete tagging will require a lot of extra work to get
+    //        operating "correctly" in a concurrent environment.
 
     /*
-     * Get the number of levels within the framework. This count will
-     * include any empty levels, but will not include the buffer. Note that
-     * this is *not* the same as the number of shards when tiering is used,
-     * as each level can contain multiple shards in that case.
+     * Get a view on the buffer *first*. This will ensure a stronger
+     * ordering than simply accessing the buffer directly, but is
+     * not *strictly* necessary.
      */
-    size_t get_height() {
-        auto epoch = get_active_epoch();
-        auto t = epoch->get_structure()->get_height();
-        end_job(epoch);
+    if constexpr (D == DeletePolicy::TAGGING) {
+      static_assert(std::same_as<SchedType, SerialScheduler>,
+                    "Tagging is only supported in single-threaded operation");
 
-        return t;
-    }
+      auto view = m_buffer->get_buffer_view();
 
-    /*
-     * Get the number of bytes of memory allocated across the framework for
-     * storing records and associated index information (i.e., internal
-     * ISAM tree nodes). This includes memory that is allocated but
-     * currently unused in the buffer, or in shards themselves
-     * (overallocation due to delete cancellation, etc.).
-     */
-    size_t get_memory_usage() {
-        auto epoch = get_active_epoch();
-        auto t = m_buffer->get_memory_usage() + epoch->get_structure()->get_memory_usage();
+      auto epoch = get_active_epoch();
+      if (epoch->get_structure()->tagged_delete(rec)) {
         end_job(epoch);
+        return 1;
+      }
 
-        return t;
-    }
-
-    /*
-     * Get the number of bytes of memory allocated across the framework for
-     * auxiliary structures. This can include bloom filters, aux
-     * hashtables, etc. 
-     */
-    size_t get_aux_memory_usage() {
-        auto epoch = get_active_epoch();
-        auto t = epoch->get_structure()->get_aux_memory_usage();
-        end_job(epoch);
+      end_job(epoch);
 
-        return t;
+      /*
+       * the buffer will take the longest amount of time, and
+       * probably has the lowest probability of having the record,
+       * so we'll check it last.
+       */
+      return view.delete_record(rec);
     }
 
     /*
-     * Returns the maximum physical capacity of the buffer, measured in
-     * records.
+     * If tagging isn't used, then delete using a tombstone
      */
-    size_t get_buffer_capacity() {
-        return m_buffer->get_capacity();
+    return internal_append(rec, true);
+  }
+
+  /**
+   *  Schedule the execution of a query with specified parameters and
+   *  returns a future that can be used to access the results. The query
+   *  is executed asynchronously.
+   *  @param parms An rvalue reference to the query parameters.
+   *
+   *  @return A future, from which the query results can be retrieved upon
+   *          query completion
+   */
+  std::future<std::vector<QueryResult>>
+  query(Parameters &&parms) {
+    return schedule_query(std::move(parms));
+  }
+
+  /**
+   *  Determine the number of records (including tagged records and 
+   *  tombstones) currently within the framework. This number is used for
+   *  determining when and how reconstructions occur.
+   *
+   *  @return The number of records within the index
+   */
+  size_t get_record_count() {
+    auto epoch = get_active_epoch();
+    auto t = epoch->get_buffer().get_record_count() +
+             epoch->get_structure()->get_record_count();
+    end_job(epoch);
+
+    return t;
+  }
+
+  /**
+   *  Returns the number of tombstone records currently within the
+   *  index. This function can be called when tagged deletes are used,
+   *  but will always return 0 in that case.
+   *
+   *  @return The number of tombstone records within the index
+   */ 
+  size_t get_tombstone_count() {
+    auto epoch = get_active_epoch();
+    auto t = epoch->get_buffer().get_tombstone_count() +
+             epoch->get_structure()->get_tombstone_count();
+    end_job(epoch);
+
+    return t;
+  }
+
+  /**
+   *  Get the number of levels within the framework. This count will
+   *  include any empty levels, but will not include the buffer. Note that
+   *  this is *not* the same as the number of shards when tiering is used,
+   *  as each level can contain multiple shards in that case.
+   *
+   *  @return The number of levels within the index
+   */ 
+  size_t get_height() {
+    auto epoch = get_active_epoch();
+    auto t = epoch->get_structure()->get_height();
+    end_job(epoch);
+
+    return t;
+  }
+
+  /**
+   *  Get the number of bytes of memory allocated across the framework for
+   *  storing records and associated index information (i.e., internal
+   *  ISAM tree nodes). This includes memory that is allocated but
+   *  currently unused in the buffer, or in shards themselves
+   *  (overallocation due to delete cancellation, etc.).
+   *
+   *  @return The number of bytes of memory used for shards (as reported by
+   *          ShardType::get_memory_usage) and the buffer by the index.
+   */
+  size_t get_memory_usage() {
+    auto epoch = get_active_epoch();
+    auto t = m_buffer->get_memory_usage() +
+             epoch->get_structure()->get_memory_usage();
+    end_job(epoch);
+
+    return t;
+  }
+
+  /**
+   *  Get the number of bytes of memory allocated across the framework for
+   *  auxiliary structures. This can include bloom filters, aux
+   *  hashtables, etc.
+   *
+   *  @return The number of bytes of memory used for auxilliary structures
+   *          (as reported by ShardType::get_aux_memory_usage) by the index.
+   */
+  size_t get_aux_memory_usage() {
+    auto epoch = get_active_epoch();
+    auto t = epoch->get_structure()->get_aux_memory_usage();
+    end_job(epoch);
+
+    return t;
+  }
+
+  /**
+   *  Create a new single Shard object containing all of the records
+   *  within the framework (buffer and shards). 
+   *
+   *  @param await_reconstruction_completion Specifies whether the currently
+   *         active state of the index should be used to create the shard
+   *         (false), or if shard construction should wait for any active
+   *         reconstructions to finish first (true). Default value of false.
+   *
+   *  @return A new shard object, containing a copy of all records within
+   *          the index. Ownership of this object is transfered to the
+   *          caller.
+   */
+  ShardType *
+  create_static_structure(bool await_reconstruction_completion = false) {
+    if (await_reconstruction_completion) {
+      await_next_epoch();
     }
-    
-    /*
-     * Create a new single Shard object containing all of the records
-     * within the framework (buffer and shards). The optional parameter can
-     * be used to specify whether the Shard should be constructed with the
-     * currently active state of the framework (false), or if shard
-     * construction should wait until any ongoing reconstructions have
-     * finished and use that new version (true).
-     */
-    Shard *create_static_structure(bool await_reconstruction_completion=false) {
-        if (await_reconstruction_completion) {
-            await_next_epoch();
-        }
 
-        auto epoch = get_active_epoch();
-        auto vers = epoch->get_structure();
-        std::vector<Shard *> shards;
+    auto epoch = get_active_epoch();
+    auto vers = epoch->get_structure();
+    std::vector<ShardType *> shards;
 
-
-        if (vers->get_levels().size() > 0) {
-            for (int i=vers->get_levels().size() - 1; i>= 0; i--) {
-                if (vers->get_levels()[i] && vers->get_levels()[i]->get_record_count() > 0) {
-                    shards.emplace_back(vers->get_levels()[i]->get_combined_shard());
-                }
-            }
-        }
-
-        /* 
-         * construct a shard from the buffer view. We'll hold the view
-         * for as short a time as possible: once the records are exfiltrated
-         * from the buffer, there's no reason to retain a hold on the view's
-         * head pointer any longer
-         */
-        {
-            auto bv = epoch->get_buffer();
-            if (bv.get_record_count() > 0) {
-                shards.emplace_back(new S(std::move(bv)));
-            }
-        }
-
-        Shard *flattened = new S(shards);
-
-        for (auto shard : shards) {
-            delete shard;
+    if (vers->get_levels().size() > 0) {
+      for (int i = vers->get_levels().size() - 1; i >= 0; i--) {
+        if (vers->get_levels()[i] &&
+            vers->get_levels()[i]->get_record_count() > 0) {
+          shards.emplace_back(vers->get_levels()[i]->get_combined_shard());
         }
-
-        end_job(epoch);
-        return flattened;
+      }
     }
 
     /*
-     * If the current epoch is *not* the newest one, then wait for
-     * the newest one to become available. Otherwise, returns immediately.
+     * construct a shard from the buffer view. We'll hold the view
+     * for as short a time as possible: once the records are exfiltrated
+     * from the buffer, there's no reason to retain a hold on the view's
+     * head pointer any longer
      */
-    void await_next_epoch() {
-        while (m_next_epoch.load().epoch != nullptr) {
-            std::unique_lock<std::mutex> lk(m_epoch_cv_lk);
-            m_epoch_cv.wait(lk);
-        }
+    {
+      auto bv = epoch->get_buffer();
+      if (bv.get_record_count() > 0) {
+        shards.emplace_back(new ShardType(std::move(bv)));
+      }
     }
 
-    /*
-     * Mostly exposed for unit-testing purposes. Verifies that the current
-     * active version of the ExtensionStructure doesn't violate the maximum
-     * tombstone proportion invariant. 
-     */
-    bool validate_tombstone_proportion() {
-        auto epoch = get_active_epoch();
-        auto t = epoch->get_structure()->validate_tombstone_proportion();
-        end_job(epoch);
-        return t;
-    }
+    ShardType *flattened = new ShardType(shards);
 
+    for (auto shard : shards) {
+      delete shard;
+    }
 
-    void print_scheduler_statistics() {
-        m_sched.print_statistics();
+    end_job(epoch);
+    return flattened;
+  }
+
+  /*
+   * If the current epoch is *not* the newest one, then wait for
+   * the newest one to become available. Otherwise, returns immediately.
+   */
+  void await_next_epoch() {
+    while (m_next_epoch.load().epoch != nullptr) {
+      std::unique_lock<std::mutex> lk(m_epoch_cv_lk);
+      m_epoch_cv.wait(lk);
     }
+  }
+
+  /**
+   *  Verify that the currently active version of the index does not 
+   *  violate tombstone proportion invariants. Exposed for unit-testing
+   *  purposes.
+   *
+   *  @return Returns true if the tombstone proportion invariant is 
+   *  satisfied, and false if it is not.
+   */
+  bool validate_tombstone_proportion() {
+    auto epoch = get_active_epoch();
+    auto t = epoch->get_structure()->validate_tombstone_proportion();
+    end_job(epoch);
+    return t;
+  }
+
+  /**
+   * Calls SchedType::print_statistics, which should write a report of
+   * scheduler performance statistics to stdout.
+   */
+  void print_scheduler_statistics() const { m_sched.print_statistics(); }
 
 private:
-    SCHED m_sched;
-
-    Buffer *m_buffer;
+  size_t m_scale_factor;
+  double m_max_delete_prop;
 
-    //std::mutex m_struct_lock;
-    //std::set<Structure *> m_versions;
+  SchedType m_sched;
+  Buffer *m_buffer;
 
-    alignas(64) std::atomic<bool> m_reconstruction_scheduled;
+  size_t m_core_cnt;
+  std::atomic<int> m_next_core;
+  std::atomic<size_t> m_epoch_cnt;
+  
+  alignas(64) std::atomic<bool> m_reconstruction_scheduled;
 
-    std::atomic<epoch_ptr> m_next_epoch;
-    std::atomic<epoch_ptr> m_current_epoch;
-    std::atomic<epoch_ptr> m_previous_epoch;
+  std::atomic<epoch_ptr> m_next_epoch;
+  std::atomic<epoch_ptr> m_current_epoch;
+  std::atomic<epoch_ptr> m_previous_epoch;
 
-    std::condition_variable m_epoch_cv;
-    std::mutex m_epoch_cv_lk;
+  std::condition_variable m_epoch_cv;
+  std::mutex m_epoch_cv_lk;
 
-    std::atomic<size_t> m_epoch_cnt;
 
-    size_t m_scale_factor;
-    double m_max_delete_prop;
 
-    std::atomic<int> m_next_core;
-    size_t m_core_cnt;
 
-    void enforce_delete_invariant(_Epoch *epoch) {
-        auto structure = epoch->get_structure(); 
-        auto compactions = structure->get_compaction_tasks();
+  void enforce_delete_invariant(_Epoch *epoch) {
+    auto structure = epoch->get_structure();
+    auto compactions = structure->get_compaction_tasks();
 
-        while (compactions.size() > 0) {
+    while (compactions.size() > 0) {
 
-            /* schedule a compaction */
-            ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>();
-            args->epoch = epoch;
-            args->merges = compactions;
-            args->extension = this;
-            args->compaction = true;
-            /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */
+      /* schedule a compaction */
+      ReconstructionArgs<ShardType, QueryType, L> *args =
+          new ReconstructionArgs<ShardType, QueryType, L>();
+      args->epoch = epoch;
+      args->merges = compactions;
+      args->extension = this;
+      args->compaction = true;
+      /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed
+       * here */
 
-            auto wait = args->result.get_future();
+      auto wait = args->result.get_future();
 
-            m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION);
+      m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION);
 
-            /* wait for compaction completion */
-            wait.get();
-
-            /* get a new batch of compactions to perform, if needed */
-            compactions = structure->get_compaction_tasks();
-        }
-    }
+      /* wait for compaction completion */
+      wait.get();
 
-    _Epoch *get_active_epoch() {
-        epoch_ptr old, new_ptr;
-
-        do {
-            /* 
-             * during an epoch transition, a nullptr will installed in the
-             * current_epoch. At this moment, the "new" current epoch will
-             * soon be installed, but the "current" current epoch has been
-             * moved back to m_previous_epoch.
-             */
-            if (m_current_epoch.load().epoch == nullptr) {
-                old = m_previous_epoch;
-                new_ptr = {old.epoch, old.refcnt+1};
-                if (old.epoch != nullptr && m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
-                    break;
-                }
-            } else {
-                old = m_current_epoch;
-                new_ptr = {old.epoch, old.refcnt+1};
-                if (old.epoch != nullptr && m_current_epoch.compare_exchange_strong(old, new_ptr)) {
-                    break;
-                }
-            }
-        } while (true);
-
-        assert(new_ptr.refcnt > 0);
-
-        return new_ptr.epoch;
+      /* get a new batch of compactions to perform, if needed */
+      compactions = structure->get_compaction_tasks();
     }
+  }
+
+  _Epoch *get_active_epoch() {
+    epoch_ptr old, new_ptr;
+
+    do {
+      /*
+       * during an epoch transition, a nullptr will installed in the
+       * current_epoch. At this moment, the "new" current epoch will
+       * soon be installed, but the "current" current epoch has been
+       * moved back to m_previous_epoch.
+       */
+      if (m_current_epoch.load().epoch == nullptr) {
+        old = m_previous_epoch;
+        new_ptr = {old.epoch, old.refcnt + 1};
+        if (old.epoch != nullptr &&
+            m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
+          break;
+        }
+      } else {
+        old = m_current_epoch;
+        new_ptr = {old.epoch, old.refcnt + 1};
+        if (old.epoch != nullptr &&
+            m_current_epoch.compare_exchange_strong(old, new_ptr)) {
+          break;
+        }
+      }
+    } while (true);
 
-    void advance_epoch(size_t buffer_head) {
+    assert(new_ptr.refcnt > 0);
 
-        retire_epoch(m_previous_epoch.load().epoch);
+    return new_ptr.epoch;
+  }
 
-        epoch_ptr tmp = {nullptr, 0};
-        epoch_ptr cur;
-        do {
-            cur = m_current_epoch;
-        } while(!m_current_epoch.compare_exchange_strong(cur, tmp));
+  void advance_epoch(size_t buffer_head) {
 
-        m_previous_epoch.store(cur);
+    retire_epoch(m_previous_epoch.load().epoch);
 
-        // FIXME: this may currently block because there isn't any
-        // query preemption yet. At this point, we'd need to either
-        // 1) wait for all queries on the old_head to finish
-        // 2) kill all queries on the old_head
-        // 3) somehow migrate all queries on the old_head to the new
-        //    version
-        while (!m_next_epoch.load().epoch->advance_buffer_head(buffer_head)) {
-            _mm_pause();
-        }
+    epoch_ptr tmp = {nullptr, 0};
+    epoch_ptr cur;
+    do {
+      cur = m_current_epoch;
+    } while (!m_current_epoch.compare_exchange_strong(cur, tmp));
 
+    m_previous_epoch.store(cur);
 
-        m_current_epoch.store(m_next_epoch);
-        m_next_epoch.store({nullptr, 0});
+    // FIXME: this may currently block because there isn't any
+    // query preemption yet. At this point, we'd need to either
+    // 1) wait for all queries on the old_head to finish
+    // 2) kill all queries on the old_head
+    // 3) somehow migrate all queries on the old_head to the new
+    //    version
+    while (!m_next_epoch.load().epoch->advance_buffer_head(buffer_head)) {
+      _mm_pause();
+    }
 
+    m_current_epoch.store(m_next_epoch);
+    m_next_epoch.store({nullptr, 0});
 
-        /* notify any blocking threads that the new epoch is available */
-        m_epoch_cv_lk.lock();
-        m_epoch_cv.notify_all();
-        m_epoch_cv_lk.unlock();
-    }
+    /* notify any blocking threads that the new epoch is available */
+    m_epoch_cv_lk.lock();
+    m_epoch_cv.notify_all();
+    m_epoch_cv_lk.unlock();
+  }
 
+  /*
+   * Creates a new epoch by copying the currently active one. The new epoch's
+   * structure will be a shallow copy of the old one's.
+   */
+  _Epoch *create_new_epoch() {
     /*
-     * Creates a new epoch by copying the currently active one. The new epoch's
-     * structure will be a shallow copy of the old one's. 
+     * This epoch access is _not_ protected under the assumption that
+     * only one reconstruction will be able to trigger at a time. If that
+     * condition is violated, it is possible that this code will clone a retired
+     * epoch.
      */
-    _Epoch *create_new_epoch() {
-        /*
-         * This epoch access is _not_ protected under the assumption that
-         * only one reconstruction will be able to trigger at a time. If that condition
-         * is violated, it is possible that this code will clone a retired
-         * epoch.
-         */
-        assert(m_next_epoch.load().epoch == nullptr);
-        auto current_epoch = get_active_epoch();
+    assert(m_next_epoch.load().epoch == nullptr);
+    auto current_epoch = get_active_epoch();
 
-        m_epoch_cnt.fetch_add(1);
-        m_next_epoch.store({current_epoch->clone(m_epoch_cnt.load()), 0});
+    m_epoch_cnt.fetch_add(1);
+    m_next_epoch.store({current_epoch->clone(m_epoch_cnt.load()), 0});
 
-        end_job(current_epoch);
+    end_job(current_epoch);
 
-        return m_next_epoch.load().epoch;
-    }
+    return m_next_epoch.load().epoch;
+  }
 
-    void retire_epoch(_Epoch *epoch) {
-        /*
-         * Epochs with currently active jobs cannot
-         * be retired. By the time retire_epoch is called,
-         * it is assumed that a new epoch is active, meaning
-         * that the epoch to be retired should no longer
-         * accumulate new active jobs. Eventually, this
-         * number will hit zero and the function will
-         * proceed.
-         */
-
-        if (epoch == nullptr) {
-            return;
-        }
+  void retire_epoch(_Epoch *epoch) {
+    /*
+     * Epochs with currently active jobs cannot
+     * be retired. By the time retire_epoch is called,
+     * it is assumed that a new epoch is active, meaning
+     * that the epoch to be retired should no longer
+     * accumulate new active jobs. Eventually, this
+     * number will hit zero and the function will
+     * proceed.
+     */
 
-        epoch_ptr old, new_ptr;
-        new_ptr = {nullptr, 0};
-        do {
-            old = m_previous_epoch.load();
-
-            /*
-             * If running in single threaded mode, the failure to retire
-             * an Epoch will result in the thread of execution blocking
-             * indefinitely. 
-             */
-            if constexpr (std::same_as<SCHED, SerialScheduler>) {
-                if (old.epoch == epoch) assert(old.refcnt == 0);
-            }
-
-            if (old.epoch == epoch && old.refcnt == 0 &&
-                m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
-                break;
-            }
-            usleep(1);
-	    
-        } while(true);
-
-        delete epoch;
+    if (epoch == nullptr) {
+      return;
     }
 
-    static void reconstruction(void *arguments) {
-        auto args = (ReconstructionArgs<R, S, Q, L> *) arguments;
+    epoch_ptr old, new_ptr;
+    new_ptr = {nullptr, 0};
+    do {
+      old = m_previous_epoch.load();
+
+      /*
+       * If running in single threaded mode, the failure to retire
+       * an Epoch will result in the thread of execution blocking
+       * indefinitely.
+       */
+      if constexpr (std::same_as<SchedType, SerialScheduler>) {
+        if (old.epoch == epoch)
+          assert(old.refcnt == 0);
+      }
+
+      if (old.epoch == epoch && old.refcnt == 0 &&
+          m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
+        break;
+      }
+      usleep(1);
+
+    } while (true);
+
+    delete epoch;
+  }
+
+  static void reconstruction(void *arguments) {
+    auto args = (ReconstructionArgs<ShardType, QueryType, L> *)arguments;
+
+    ((DynamicExtension *)args->extension)->SetThreadAffinity();
+    Structure *vers = args->epoch->get_structure();
+
+    if constexpr (L == LayoutPolicy::BSM) {
+      if (args->merges.size() > 0) {
+        vers->reconstruction(args->merges[0]);
+      }
+    } else {
+      for (ssize_t i = 0; i < args->merges.size(); i++) {
+        vers->reconstruction(args->merges[i].target,
+                             args->merges[i].sources[0]);
+      }
+    }
 
-        ((DynamicExtension *) args->extension)->SetThreadAffinity();
-        Structure *vers = args->epoch->get_structure();
+    /*
+     * we'll grab the buffer AFTER doing the internal reconstruction, so we
+     * can flush as many records as possible in one go. The reconstruction
+     * was done so as to make room for the full buffer anyway, so there's
+     * no real benefit to doing this first.
+     */
+    auto buffer_view = args->epoch->get_buffer();
+    size_t new_head = buffer_view.get_tail();
 
-        if constexpr (L == LayoutPolicy::BSM) {
-            if (args->merges.size() > 0) {
-               vers->reconstruction(args->merges[0]); 
-            }
-        } else {
-            for (ssize_t i=0; i<args->merges.size(); i++) {
-                vers->reconstruction(args->merges[i].target, args->merges[i].sources[0]);
-            }
-        }
+    /*
+     * if performing a compaction, don't flush the buffer, as
+     * there is no guarantee that any necessary reconstructions
+     * will free sufficient space in L0 to support a flush
+     */
+    if (!args->compaction) {
+      vers->flush_buffer(std::move(buffer_view));
+    }
 
+    args->result.set_value(true);
 
-        /*
-         * we'll grab the buffer AFTER doing the internal reconstruction, so we
-         * can flush as many records as possible in one go. The reconstruction
-         * was done so as to make room for the full buffer anyway, so there's 
-         * no real benefit to doing this first.
-         */
-        auto buffer_view = args->epoch->get_buffer();
-        size_t new_head = buffer_view.get_tail();
+    /*
+     * Compactions occur on an epoch _before_ it becomes active,
+     * and as a result the active epoch should _not_ be advanced as
+     * part of a compaction
+     */
+    if (!args->compaction) {
+      ((DynamicExtension *)args->extension)->advance_epoch(new_head);
+    }
 
-        /*
-         * if performing a compaction, don't flush the buffer, as
-         * there is no guarantee that any necessary reconstructions
-         * will free sufficient space in L0 to support a flush
-         */
-        if (!args->compaction) {
-            vers->flush_buffer(std::move(buffer_view));
+    ((DynamicExtension *)args->extension)
+        ->m_reconstruction_scheduled.store(false);
+
+    delete args;
+  }
+
+  static void async_query(void *arguments) {
+    auto *args = 
+      (QueryArgs<ShardType, QueryType, DynamicExtension> *) arguments;
+
+    auto epoch = args->extension->get_active_epoch();
+
+    auto buffer = epoch->get_buffer();
+    auto vers = epoch->get_structure();
+    auto *parms = &(args->query_parms);
+
+    /* create initial buffer query */
+    auto buffer_query = QueryType::local_preproc_buffer(&buffer, parms);
+
+    /* create initial local queries */
+    std::vector<std::pair<ShardID, ShardType *>> shards;
+    std::vector<LocalQuery *> local_queries =
+        vers->get_local_queries(shards, parms);
+
+    /* process local/buffer queries to create the final version */
+    QueryType::distribute_query(parms, local_queries, buffer_query);
+
+    /* execute the local/buffer queries and combine the results into output */
+    std::vector<QueryResult> output;
+    do {
+      std::vector<std::vector<LocalResult>>
+          query_results(shards.size() + 1);
+      for (size_t i = 0; i < query_results.size(); i++) {
+        std::vector<LocalResult> local_results;
+        ShardID shid;
+
+        if (i == 0) { /* execute buffer query */
+          local_results = QueryType::local_query_buffer(buffer_query);
+          shid = INVALID_SHID;
+        } else { /*execute local queries */
+          local_results = QueryType::local_query(shards[i - 1].second,
+                                                 local_queries[i - 1]);
+          shid = shards[i - 1].first;
         }
 
-        args->result.set_value(true);
+        /* framework-level, automatic delete filtering */
+        query_results[i] = std::move(local_results);
 
-        /*
-         * Compactions occur on an epoch _before_ it becomes active,
-         * and as a result the active epoch should _not_ be advanced as
-         * part of a compaction 
-         */
-        if (!args->compaction) {
-            ((DynamicExtension *) args->extension)->advance_epoch(new_head);
+        /* end query early if EARLY_ABORT is set and a result exists */
+        if constexpr (QueryType::EARLY_ABORT) {
+          if (query_results[i].size() > 0)
+            break;
         }
+      }
 
-        ((DynamicExtension *) args->extension)->m_reconstruction_scheduled.store(false);
-        
-        delete args;
-    }
-    
-    static void async_query(void *arguments) {
-        QueryArgs<R, S, Q, L> *args = (QueryArgs<R, S, Q, L> *) arguments;
-
-        auto epoch = ((DynamicExtension *) args->extension)->get_active_epoch();
-
-        auto ptr1 = ((DynamicExtension *) args->extension)->m_previous_epoch.load().epoch;
-        auto ptr2 = ((DynamicExtension *) args->extension)->m_current_epoch.load().epoch;
-        auto ptr3 = ((DynamicExtension *) args->extension)->m_next_epoch.load().epoch;
-
-
-        auto buffer = epoch->get_buffer();
-        auto vers = epoch->get_structure();
-        void *parms = args->query_parms;
-
-        /* Get the buffer query states */
-        void *buffer_state = Q::get_buffer_query_state(&buffer, parms);
-
-        /* Get the shard query states */
-        std::vector<std::pair<ShardID, Shard*>> shards;
-        std::vector<void *> states = vers->get_query_states(shards, parms);
+      /*
+       * combine the results of the local queries, also translating
+       * from LocalResultType to ResultType
+       */
+      QueryType::combine(query_results, parms, output);
 
-        std::vector<R> results;
-        Q::process_query_states(parms, states, buffer_state);
+      /* optionally repeat the local queries if necessary */
+    } while (QueryType::repeat(parms, output, local_queries, buffer_query));
 
-        do {
-            std::vector<std::vector<Wrapped<R>>> query_results(shards.size() + 1);
-            for (size_t i=0; i<query_results.size(); i++) {
-                std::vector<Wrapped<R>> local_results;
-                ShardID shid;
+    /* return the output vector to caller via the future */
+    args->result_set.set_value(std::move(output));
 
-                if (i == 0) { /* process the buffer first */
-                    local_results = Q::buffer_query(buffer_state, parms);
-                    shid = INVALID_SHID;
-                } else {
-                    local_results = Q::query(shards[i - 1].second, states[i - 1], parms);
-                    shid = shards[i - 1].first; 
-                }
+    /* officially end the query job, releasing the pin on the epoch */
+    args->extension->end_job(epoch);
 
-                query_results[i] = std::move(filter_deletes(local_results, shid, vers, &buffer)); 
-
-                if constexpr (Q::EARLY_ABORT) {
-                    if (query_results[i].size() > 0) break;
-                }
-            }
-            Q::merge(query_results, parms, results);
-
-        } while (Q::repeat(parms, results, states, buffer_state));
-
-        args->result_set.set_value(std::move(results));
-
-        ((DynamicExtension *) args->extension)->end_job(epoch);
-
-        Q::delete_buffer_query_state(buffer_state);
-        for (size_t i=0; i<states.size(); i++) {
-            Q::delete_query_state(states[i]);
-        }
-
-        delete args;
+    /* clean up memory allocated for temporary query objects */
+    delete buffer_query;
+    for (size_t i = 0; i < local_queries.size(); i++) {
+      delete local_queries[i];
     }
 
-    void schedule_reconstruction() {
-        auto epoch = create_new_epoch();
-        /* 
-         * the reconstruction process calls end_job(), 
-         * so we must start one before calling it
-         */
-
-        ReconstructionArgs<R, S, Q, L> *args = new ReconstructionArgs<R, S, Q, L>();
-        args->epoch = epoch;
-        args->merges = epoch->get_structure()->get_reconstruction_tasks(m_buffer->get_high_watermark());
-        args->extension = this;
-        args->compaction = false;
-        /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed here */
-
-        m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION);
+    delete args;
+  }
+
+  void schedule_reconstruction() {
+    auto epoch = create_new_epoch();
+
+    ReconstructionArgs<ShardType, QueryType, L> *args =
+        new ReconstructionArgs<ShardType, QueryType, L>();
+    args->epoch = epoch;
+    args->merges = epoch->get_structure()->get_reconstruction_tasks(
+        m_buffer->get_high_watermark());
+    args->extension = this;
+    args->compaction = false;
+    /* NOTE: args is deleted by the reconstruction job, so shouldn't be freed
+     * here */
+
+    m_sched.schedule_job(reconstruction, 0, args, RECONSTRUCTION);
+  }
+
+  std::future<std::vector<QueryResult>>
+  schedule_query(Parameters &&query_parms) {
+    auto args =
+        new QueryArgs<ShardType, QueryType, DynamicExtension>();
+    args->extension = this;
+    args->query_parms = std::move(query_parms);
+    auto result = args->result_set.get_future();
+
+    m_sched.schedule_job(async_query, 0, (void *)args, QUERY);
+
+    return result;
+  }
+
+  int internal_append(const RecordType &rec, bool ts) {
+    if (m_buffer->is_at_low_watermark()) {
+      auto old = false;
+
+      if (m_reconstruction_scheduled.compare_exchange_strong(old, true)) {
+        schedule_reconstruction();
+      }
     }
 
-    std::future<std::vector<R>> schedule_query(void *query_parms) {
-        QueryArgs<R, S, Q, L> *args = new QueryArgs<R, S, Q, L>();
-        args->extension = this;
-        args->query_parms = query_parms;
-        auto result = args->result_set.get_future();
+    /* this will fail if the HWM is reached and return 0 */
+    return m_buffer->append(rec, ts);
+  }
 
-        m_sched.schedule_job(async_query, 0, args, QUERY);
-
-        return result;
+#ifdef _GNU_SOURCE
+  void SetThreadAffinity() {
+    if constexpr (std::same_as<SchedType, SerialScheduler>) {
+      return;
     }
 
-    int internal_append(const R &rec, bool ts) {
-        if (m_buffer->is_at_low_watermark()) {
-            auto old = false;
-
-            if (m_reconstruction_scheduled.compare_exchange_strong(old, true)) {
-                schedule_reconstruction();
-            }
-        }
-
-        /* this will fail if the HWM is reached and return 0 */
-        return m_buffer->append(rec, ts);
+    int core = m_next_core.fetch_add(1) % m_core_cnt;
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+
+    switch (core % 2) {
+    case 0:
+      // 0 |-> 0
+      // 2 |-> 2
+      // 4 |-> 4
+      core = core + 0;
+      break;
+    case 1:
+      // 1 |-> 28
+      // 3 |-> 30
+      // 5 |-> 32
+      core = (core - 1) + m_core_cnt;
+      break;
     }
+    CPU_SET(core, &mask);
+    ::sched_setaffinity(0, sizeof(mask), &mask);
+  }
+#else
+  void SetThreadAffinity() {}
+#endif
 
-    static std::vector<Wrapped<R>> filter_deletes(std::vector<Wrapped<R>> &records, ShardID shid, Structure *vers, BufView *bview) {
-        if constexpr (Q::SKIP_DELETE_FILTER) {
-            return std::move(records);
-        }
-
-        std::vector<Wrapped<R>> processed_records;
-        processed_records.reserve(records.size());
+  void end_job(_Epoch *epoch) {
+    epoch_ptr old, new_ptr;
 
-        /* 
-         * For delete tagging, we just need to check the delete bit 
-         * on each record. 
+    do {
+      if (m_previous_epoch.load().epoch == epoch) {
+        old = m_previous_epoch;
+        /*
+         * This could happen if we get into the system during a
+         * transition. In this case, we can just back out and retry
          */
-        if constexpr (D == DeletePolicy::TAGGING) {
-            for (auto &rec : records) {
-                if (rec.is_deleted()) {
-                    continue;
-                }
+        if (old.epoch == nullptr) {
+          continue;
+        }
 
-                processed_records.emplace_back(rec);
-            }
+        assert(old.refcnt > 0);
 
-            return processed_records;
+        new_ptr = {old.epoch, old.refcnt - 1};
+        if (m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
+          break;
         }
-
+      } else {
+        old = m_current_epoch;
         /*
-         * For tombstone deletes, we need to search for the corresponding 
-         * tombstone for each record.
+         * This could happen if we get into the system during a
+         * transition. In this case, we can just back out and retry
          */
-        for (auto &rec : records) {
-           if (rec.is_tombstone()) {
-                continue;
-            } 
-
-            // FIXME: need to figure out how best to re-enable the buffer tombstone
-            // check in the correct manner.
-            //if (buffview.check_tombstone(rec.rec)) {
-                //continue;
-            //}
-
-            for (size_t i=0; i<bview->get_record_count(); i++) {
-                if (bview->get(i)->is_tombstone() && bview->get(i)->rec == rec.rec) {
-                    continue;
-                }
-            }
-
-            if (shid != INVALID_SHID) {
-                for (size_t lvl=0; lvl<=shid.level_idx; lvl++) {
-                    if (vers->get_levels()[lvl]->check_tombstone(0, rec.rec)) {
-                        continue;
-                    }
-                }
-
-                if (vers->get_levels()[shid.level_idx]->check_tombstone(shid.shard_idx + 1, rec.rec)) {
-                    continue;
-                }
-            }
-
-            processed_records.emplace_back(rec);
-        }
-
-        return processed_records;
-    }
-
-#ifdef _GNU_SOURCE
-    void SetThreadAffinity() {
-        if constexpr (std::same_as<SCHED, SerialScheduler>) {
-            return;
+        if (old.epoch == nullptr) {
+          continue;
         }
 
-        int core = m_next_core.fetch_add(1) % m_core_cnt;
-        cpu_set_t mask;
-        CPU_ZERO(&mask);
+        assert(old.refcnt > 0);
 
-        switch (core % 2) {
-        case 0:
-          // 0 |-> 0
-          // 2 |-> 2
-          // 4 |-> 4
-          core = core;
-          break;
-        case 1:
-          // 1 |-> 28
-          // 3 |-> 30
-          // 5 |-> 32
-          core = (core - 1) + m_core_cnt;
+        new_ptr = {old.epoch, old.refcnt - 1};
+        if (m_current_epoch.compare_exchange_strong(old, new_ptr)) {
           break;
         }
-        CPU_SET(core, &mask);
-        ::sched_setaffinity(0, sizeof(mask), &mask);
-    }
-#else
-    void SetThreadAffinity() {
-
-    }
-#endif
-
-
-    void end_job(_Epoch *epoch) {
-        epoch_ptr old, new_ptr;
-
-        do {
-            if (m_previous_epoch.load().epoch == epoch) {
-                old = m_previous_epoch;
-                /* 
-                 * This could happen if we get into the system during a
-                 * transition. In this case, we can just back out and retry
-                 */
-                if (old.epoch == nullptr) {
-                    continue;
-                }
-
-                assert(old.refcnt > 0);
-
-                new_ptr = {old.epoch, old.refcnt - 1};
-                if (m_previous_epoch.compare_exchange_strong(old, new_ptr)) {
-                    break;
-                }
-            } else {
-                old = m_current_epoch;
-                /* 
-                 * This could happen if we get into the system during a
-                 * transition. In this case, we can just back out and retry
-                 */
-                if (old.epoch == nullptr) {
-                    continue;
-                }
-
-                assert(old.refcnt > 0);
-
-                new_ptr = {old.epoch, old.refcnt - 1};
-                if (m_current_epoch.compare_exchange_strong(old, new_ptr)) {
-                    break;
-                }
-            }
-        } while (true);
-    }
-
+      }
+    } while (true);
+  }
 };
-}
-
+} // namespace de
diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h
index 577d6cd..1b64646 100644
--- a/include/framework/interface/Query.h
+++ b/include/framework/interface/Query.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/interface/Query.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -10,23 +10,127 @@
 
 #include "framework/QueryRequirements.h"
 
-namespace de{
+namespace de {
 
-template <typename Q, typename R, typename S>
-concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv, std::vector<R> &resv) {
-    {Q::get_query_state(sh, p)} -> std::convertible_to<void*>;
-    {Q::get_buffer_query_state(bv, p)} -> std::convertible_to<void *>;
-    {Q::process_query_states(p, s, p)};
-    {Q::query(sh, p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>;
-    {Q::buffer_query(p, p)} -> std::convertible_to<std::vector<Wrapped<R>>>;
-    {Q::merge(rv, p, resv)};
+/*
+ * FIXME: It would probably be best to absorb the std::vector into
+ *        this type too; this would allow user-defined collections for
+ *        intermediate results, which could allow for more merging
+ *        optimizations. However, this would require an alternative
+ *        approach to doing delete checks, so we'll leave it for now.
+ */
+template <typename R>
+concept LocalResultInterface = requires(R res) {
+  { res.is_deleted() } -> std::convertible_to<bool>;
+  { res.is_tombstone() } -> std::convertible_to<bool>;
+};
+
+/*
+ *
+ *
+ */
+template <typename QUERY, typename SHARD,
+          typename RESULT = typename QUERY::ResultType,
+          typename LOCAL_RESULT = typename QUERY::LocalResultType,
+          typename PARAMETERS = typename QUERY::Parameters,
+          typename LOCAL = typename QUERY::LocalQuery,
+          typename LOCAL_BUFFER = typename QUERY::LocalQueryBuffer>
+concept QueryInterface = LocalResultInterface<LOCAL_RESULT> &&
+    requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query,
+             SHARD *shard, std::vector<LOCAL *> &local_queries,
+             std::vector<std::vector<LOCAL_RESULT>> &local_results,
+             std::vector<RESULT> &result,
+             BufferView<typename SHARD::RECORD> *bv) {
+
+  /*
+   * Given a set of query parameters and a shard, return a local query
+   * object for that shard.
+   */
+  { QUERY::local_preproc(shard, parameters) } -> std::convertible_to<LOCAL *>;
+
+  /*
+   * Given a set of query parameters and a buffer view, return a local
+   * query object for the buffer.
+   * NOTE: for interface reasons, the pointer to the buffer view MUST be
+   *       stored inside of the local query object. The future buffer
+   *       query routine will access the buffer by way of this pointer.
+   */
+  {
+    QUERY::local_preproc_buffer(bv, parameters)
+    } -> std::convertible_to<LOCAL_BUFFER *>;
+
+  /*
+   * Given a full set of local queries, and the buffer query, make any
+   * necessary adjustments to the local queries in-place, to account for
+   * global information. If no additional processing is required, this
+   * function can be left empty.
+   */
+  {QUERY::distribute_query(parameters, local_queries, buffer_query)};
+
+  /*
+   * Answer the local query, defined by `local` against `shard` and return
+   * a vector of LOCAL_RESULT objects defining the query result.
+   */
+  {
+    QUERY::local_query(shard, local)
+    } -> std::convertible_to<std::vector<LOCAL_RESULT>>;
+
+  /*
+   * Answer the local query defined by `local` against the buffer (which
+   * should be accessed by a pointer inside of `local`) and return a vector
+   * of LOCAL_RESULT objects defining the query result.
+   */
+  {
+    QUERY::local_query_buffer(buffer_query)
+    } -> std::convertible_to<std::vector<LOCAL_RESULT>>;
+
+  /*
+   * Process the local results from the buffer and all of the shards,
+   * stored in `local_results`, and insert the associated ResultType
+   * objects into the `result` vector, which represents the final result
+   * of the query. Updates to this vector are done in-place.
+   */
+  {QUERY::combine(local_results, parameters, result)};
 
-    {Q::delete_query_state(p)} -> std::same_as<void>;
-    {Q::delete_buffer_query_state(p)} -> std::same_as<void>;
+  /*
+   * Process the post-combine `result` vector of ResultType objects,
+   * in the context of the global and local query parameters, to determine
+   * if the query should be repeated. If so, make any necessary adjustments
+   * to the local query objects and return True. Otherwise, return False.
+   *
+   * If no repetition is needed for a given problem type, simply return
+   * False immediately and the query will end.
+   */
+  {
+    QUERY::repeat(parameters, result, local_queries, buffer_query)
+    } -> std::same_as<bool>;
 
-    {Q::repeat(p, resv, s, p)} -> std::same_as<bool>;
+  /*
+   * If this flag is True, then the query will immediately stop and return
+   * a result as soon as the first non-deleted LocalRecordType is found.
+   * Otherwise, every Shard and the buffer will be queried and the results
+   * merged, like normal.
+   *
+   * This is largely an optimization flag for use with point-lookup, or
+   * other single-record result queries
+   */
+  { QUERY::EARLY_ABORT } -> std::convertible_to<bool>;
 
-    {Q::EARLY_ABORT} -> std::convertible_to<bool>;
-    {Q::SKIP_DELETE_FILTER} -> std::convertible_to<bool>;
+  /*
+   * If false, the built-in delete filtering that the framework can
+   * apply to the local results, prior to calling combine, will be skipped.
+   * This general filtering can be inefficient, particularly for tombstone
+   * -based deletes, and so if a more efficient manual filtering can be
+   * performed, it is worth setting this to True and doing that filtering
+   * in the combine step.
+   *
+   * If deletes are not a consideration for your problem, it's also best
+   * to turn this off, as it'll avoid the framework making an extra pass
+   * over the local results prior to combining them.
+   *
+   * TODO: Temporarily disabling this, as we've dropped framework-level
+   *       delete filtering for the time being.
+   */
+   /* { QUERY::SKIP_DELETE_FILTER } -> std::convertible_to<bool>; */
 };
-}
+} // namespace de
diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h
index 19ccadd..d3e77d8 100644
--- a/include/framework/interface/Record.h
+++ b/include/framework/interface/Record.h
@@ -1,272 +1,247 @@
 /*
  * include/framework/interface/Record.h
  *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
- * FIXME: the record implementations could probably be broken out into 
+ * FIXME: the record implementations could probably be broken out into
  *        different files, leaving only the interface here
  */
 #pragma once
 
-#include <cstring>
-#include <concepts>
 #include <cmath>
+#include <concepts>
+#include <cstring>
 
 #include "psu-util/hash.h"
 
 namespace de {
 
-template<typename R>
+template <typename R>
 concept RecordInterface = requires(R r, R s) {
-    { r < s } ->std::convertible_to<bool>;
-    { r == s } ->std::convertible_to<bool>;
+  { r < s } -> std::convertible_to<bool>;
+  { r == s } -> std::convertible_to<bool>;
 };
 
-template<typename R>
+template <typename R>
 concept WeightedRecordInterface = requires(R r) {
-    {r.weight} -> std::convertible_to<double>;
+  { r.weight } -> std::convertible_to<double>;
 };
 
-template<typename R>
+template <typename R>
 concept NDRecordInterface = RecordInterface<R> && requires(R r, R s) {
-    {r.calc_distance(s)} -> std::convertible_to<double>;
+  { r.calc_distance(s) } -> std::convertible_to<double>;
 };
 
 template <typename R>
 concept KVPInterface = RecordInterface<R> && requires(R r) {
-    r.key;
-    r.value;
+  r.key;
+  r.value;
 };
 
-template<typename R>
+template <typename R>
 concept AlexInterface = KVPInterface<R> && requires(R r) {
-    {r.key} -> std::convertible_to<size_t>;
-    {r.value} -> std::convertible_to<size_t>;
+  { r.key } -> std::convertible_to<size_t>;
+  { r.value } -> std::convertible_to<size_t>;
 };
 
-template<typename R>
-concept WrappedInterface = RecordInterface<R> && requires(R r, R s, bool b, int i) {
-    {r.header} -> std::convertible_to<uint32_t>;
-     r.rec;
-    {r.set_delete()};
-    {r.is_deleted()} -> std::convertible_to<bool>;
-    {r.set_tombstone(b)};
-    {r.is_tombstone()} -> std::convertible_to<bool>;
-    {r.set_timestamp(i)};
-    {r.get_timestamp()} -> std::convertible_to<uint32_t>;
-    {r.clear_timestamp()};
-    {r.is_visible()} -> std::convertible_to<bool>;
-    {r.set_visible()};
-    {r < s} -> std::convertible_to<bool>;
-    {r == s} ->std::convertible_to<bool>;
+template <typename R>
+concept WrappedInterface = RecordInterface<R> &&
+    requires(R r, R s, bool b, int i) {
+  { r.header } -> std::convertible_to<uint32_t>;
+  r.rec;
+  {r.set_delete()};
+  { r.is_deleted() } -> std::convertible_to<bool>;
+  {r.set_tombstone(b)};
+  { r.is_tombstone() } -> std::convertible_to<bool>;
+  {r.set_timestamp(i)};
+  { r.get_timestamp() } -> std::convertible_to<uint32_t>;
+  {r.clear_timestamp()};
+  { r.is_visible() } -> std::convertible_to<bool>;
+  {r.set_visible()};
+  { r < s } -> std::convertible_to<bool>;
+  { r == s } -> std::convertible_to<bool>;
 };
 
-template<RecordInterface R>
-struct Wrapped {
-    uint32_t header;
-    R rec;
+template <RecordInterface R> struct Wrapped {
+  uint32_t header;
+  R rec;
 
-    inline void set_delete() {
-        header |= 2;
-    }
+  inline void set_delete() { header |= 2; }
 
-    inline bool is_deleted() const {
-        return header & 2;
-    }
+  inline bool is_deleted() const { return header & 2; }
 
-    inline void set_visible() {
-        header |= 4;
-    }
+  inline void set_visible() { header |= 4; }
 
-    inline bool is_visible() const {
-        return header & 4;
-    }
+  inline bool is_visible() const { return header & 4; }
 
-    inline void set_timestamp(int ts) {
-        header |= (ts << 3);
-    }
-
-    inline int get_timestamp() const {
-        return header >> 3;
-    }
+  inline void set_timestamp(int ts) { header |= (ts << 3); }
 
-    inline void clear_timestamp() {
-        header &= 7;
-    }
+  inline int get_timestamp() const { return header >> 3; }
 
-    inline void set_tombstone(bool val=true) {
-        if (val) {
-            header |= 1;
-        } else {
-            header &= 0;
-        }
-    }
+  inline void clear_timestamp() { header &= 7; }
 
-    inline bool is_tombstone() const {
-        return header & 1;
+  inline void set_tombstone(bool val = true) {
+    if (val) {
+      header |= 1;
+    } else {
+      header &= 0;
     }
+  }
 
-    inline bool operator<(const Wrapped& other) const {
-        return rec < other.rec || (rec == other.rec && header < other.header);
-    }
+  inline bool is_tombstone() const { return header & 1; }
 
-    inline bool operator==(const Wrapped& other) const {
-        return rec == other.rec;
-    }
+  inline bool operator<(const Wrapped &other) const {
+    return rec < other.rec || (rec == other.rec && header < other.header);
+  }
 
+  inline bool operator==(const Wrapped &other) const {
+    return rec == other.rec;
+  }
 };
 
-template <typename K, typename V>
-struct Record {
-    K key;
-    V value;
+template <typename K, typename V> struct Record {
+  K key;
+  V value;
 
-    inline bool operator<(const Record& other) const {
-        return key < other.key || (key == other.key && value < other.value);
-    }
+  inline bool operator<(const Record &other) const {
+    return key < other.key || (key == other.key && value < other.value);
+  }
 
-    inline bool operator==(const Record& other) const {
-        return key == other.key && value == other.value;
-    }
+  inline bool operator==(const Record &other) const {
+    return key == other.key && value == other.value;
+  }
 };
 
-template<typename V>
-struct Record<const char*, V> {
-    const char* key;
-    V value;
-    size_t len;
+template <typename V> struct Record<const char *, V> {
+  const char *key;
+  V value;
+  size_t len;
 
-    inline bool operator<(const Record& other) const {
-        size_t n = std::min(len, other.len) + 1;
-        return strncmp(key, other.key, n) < 0;
-    }
+  inline bool operator<(const Record &other) const {
+    size_t n = std::min(len, other.len) + 1;
+    return strncmp(key, other.key, n) < 0;
+  }
 
-    inline bool operator==(const Record& other) const {
-        size_t n = std::min(len, other.len) + 1;
-        return strncmp(key, other.key, n) == 0;
-    }
+  inline bool operator==(const Record &other) const {
+    size_t n = std::min(len, other.len) + 1;
+    return strncmp(key, other.key, n) == 0;
+  }
 };
 
-template <typename K, typename V, typename W>
-struct WeightedRecord {
-    K key;
-    V value;
-    W weight = 1;
+template <typename K, typename V, typename W> struct WeightedRecord {
+  K key;
+  V value;
+  W weight = 1;
 
-    inline bool operator==(const WeightedRecord& other) const {
-        return key == other.key && value == other.value;
-    }
+  inline bool operator==(const WeightedRecord &other) const {
+    return key == other.key && value == other.value;
+  }
 
-   inline bool operator<(const WeightedRecord& other) const {
-        return key < other.key || (key == other.key && value < other.value);
-    }
+  inline bool operator<(const WeightedRecord &other) const {
+    return key < other.key || (key == other.key && value < other.value);
+  }
 };
 
+template <typename V, size_t D = 2> struct CosinePoint {
+  V data[D];
 
-template <typename V, size_t D=2>
-struct CosinePoint{
-    V data[D];
-
-    inline bool operator==(const CosinePoint& other) const {
-        for (size_t i=0; i<D; i++) {
-            if (data[i] != other.data[i]) {
-                return false;
-            }
-        }
-
-        return true;
+  inline bool operator==(const CosinePoint &other) const {
+    for (size_t i = 0; i < D; i++) {
+      if (data[i] != other.data[i]) {
+        return false;
+      }
     }
 
-    /* lexicographic order */
-    inline bool operator<(const CosinePoint& other) const {
-        for (size_t i=0; i<D; i++) {
-            if (data[i] < other.data[i]) {
-                return true;
-            } else if (data[i] > other.data[i]) {
-                return false;
-            }
-        }
+    return true;
+  }
 
+  /* lexicographic order */
+  inline bool operator<(const CosinePoint &other) const {
+    for (size_t i = 0; i < D; i++) {
+      if (data[i] < other.data[i]) {
+        return true;
+      } else if (data[i] > other.data[i]) {
         return false;
+      }
     }
 
-    inline double calc_distance(const CosinePoint& other) const {
+    return false;
+  }
 
-        double prod = 0;
-        double asquared = 0;
-        double bsquared = 0;
+  inline double calc_distance(const CosinePoint &other) const {
 
-        for (size_t i=0; i<D; i++) {
-            prod += data[i] * other.data[i];
-            asquared += data[i]*data[i];
-            bsquared += other.data[i]*other.data[i];
-        }
+    double prod = 0;
+    double asquared = 0;
+    double bsquared = 0;
 
-        return prod / std::sqrt(asquared * bsquared);
+    for (size_t i = 0; i < D; i++) {
+      prod += data[i] * other.data[i];
+      asquared += data[i] * data[i];
+      bsquared += other.data[i] * other.data[i];
     }
+
+    return prod / std::sqrt(asquared * bsquared);
+  }
 };
 
+template <typename V, size_t D = 2> struct EuclidPoint {
+  V data[D];
 
-template <typename V, size_t D=2>
-struct EuclidPoint{
-    V data[D];
+  inline bool operator==(const EuclidPoint &other) const {
+    for (size_t i = 0; i < D; i++) {
+      if (data[i] != other.data[i]) {
+        return false;
+      }
+    }
 
-    inline bool operator==(const EuclidPoint& other) const {
-        for (size_t i=0; i<D; i++) {
-            if (data[i] != other.data[i]) {
-                return false;
-            }
-        }
+    return true;
+  }
 
+  /* lexicographic order */
+  inline bool operator<(const EuclidPoint &other) const {
+    for (size_t i = 0; i < D; i++) {
+      if (data[i] < other.data[i]) {
         return true;
+      } else if (data[i] > other.data[i]) {
+        return false;
+      }
     }
 
-    /* lexicographic order */
-    inline bool operator<(const EuclidPoint& other) const {
-        for (size_t i=0; i<D; i++) {
-            if (data[i] < other.data[i]) {
-                return true;
-            } else if (data[i] > other.data[i]) {
-                return false;
-            }
-        }
+    return false;
+  }
 
-        return false;
+  inline double calc_distance(const EuclidPoint &other) const {
+    double dist = 0;
+    for (size_t i = 0; i < D; i++) {
+      dist += (data[i] - other.data[i]) * (data[i] - other.data[i]);
     }
 
-    inline double calc_distance(const EuclidPoint& other) const {
-        double dist = 0;
-        for (size_t i=0; i<D; i++) {
-            dist += (data[i] - other.data[i]) * (data[i] - other.data[i]);
-        }
-        
-        return std::sqrt(dist);
-    }
+    return std::sqrt(dist);
+  }
 };
 
-template<RecordInterface R>
-struct RecordHash {
-    size_t operator()(R const &rec) const {
-        return psudb::hash_bytes((std::byte *) &rec, sizeof(R));
-    }
+template <RecordInterface R> struct RecordHash {
+  size_t operator()(R const &rec) const {
+    return psudb::hash_bytes((std::byte *)&rec, sizeof(R));
+  }
 };
 
-template <typename R>
-class DistCmpMax {
+template <typename R> class DistCmpMax {
 public:
-    DistCmpMax(R *baseline) : P(baseline) {}
+  DistCmpMax(R *baseline) : P(baseline) {}
 
-    inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
-        return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); 
-    }
+  inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
+    return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec);
+  }
 
-    inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){
-        return a->calc_distance(*P) > b->calc_distance(*P); 
-    }
+  inline bool operator()(const R *a,
+                         const R *b) requires(!WrappedInterface<R>) {
+    return a->calc_distance(*P) > b->calc_distance(*P);
+  }
 
 private:
-    R *P;
+  R *P;
 };
-}
+} // namespace de
diff --git a/include/framework/interface/Scheduler.h b/include/framework/interface/Scheduler.h
index 451ddd2..d76a6c8 100644
--- a/include/framework/interface/Scheduler.h
+++ b/include/framework/interface/Scheduler.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/interface/Scheduler.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -10,10 +10,11 @@
 
 #include "framework/scheduling/Task.h"
 
-template <typename S>
-concept SchedulerInterface = requires(S s, size_t i, void *vp, de::Job j) {
-    {S(i, i)};
-    {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>;
-    {s.shutdown()};
-    {s.print_statistics()};
+template <typename SchedType>
+concept SchedulerInterface = requires(SchedType s, size_t i, void *vp,
+                                      de::Job j) {
+  {SchedType(i, i)};
+  {s.schedule_job(j, i, vp, i)} -> std::convertible_to<void>;
+  {s.shutdown()};
+  {s.print_statistics()};
 };
diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h
index c4a9180..bd980c0 100644
--- a/include/framework/interface/Shard.h
+++ b/include/framework/interface/Shard.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/interface/Shard.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -12,25 +12,57 @@
 
 namespace de {
 
-template <typename S, typename R>
-concept ShardInterface = RecordInterface<R> && requires(S s, std::vector<S*> spp, void *p, bool b, size_t i, BufferView<R> bv, R r) {
-    {S(spp)};
-    {S(std::move(bv))};
+template <typename SHARD>
+concept ShardInterface = RecordInterface<typename SHARD::RECORD> &&
+    requires(SHARD shard, const std::vector<SHARD *> &shard_vector, bool b,
+             BufferView<typename SHARD::RECORD> bv,
+             typename SHARD::RECORD rec) {
+  /* construct a shard from a vector of shards of the same type */
+  {SHARD(shard_vector)};
 
-    {s.point_lookup(r, b) } -> std::same_as<Wrapped<R>*>;
-    {s.get_data()} -> std::same_as<Wrapped<R>*>;
+  /* construct a shard from a buffer view (i.e., unsorted array of records) */
+  {SHARD(std::move(bv))};
+
+  /* perform a lookup for a record matching rec and return a pointer to it */
+  {
+    shard.point_lookup(rec, b)
+    } -> std::same_as<Wrapped<typename SHARD::RECORD> *>;
+
+  /*
+   * return the number of records in the shard -- used to determine when
+   * reconstructions occur
+   */
+  { shard.get_record_count() } -> std::convertible_to<size_t>;
+
+  /*
+   * return the number of tombstones in the shard -- can simply return
+   * 0 if tombstones are not in use.
+   */
+  { shard.get_tombstone_count() } -> std::convertible_to<size_t>;
+
+  /*
+   * return the number of bytes of memory used by the main data structure
+   * within the shard -- informational use only at the moment
+   */
+  { shard.get_memory_usage() } -> std::convertible_to<size_t>;
+
+  /*
+   * return the number of bytes of memory used by auxilliary data
+   * structures (bloom filters, etc.) within the shard -- informational
+   * use only at the moment
+   */
+  { shard.get_aux_memory_usage() } -> std::convertible_to<size_t>;
 
-    {s.get_record_count()} -> std::convertible_to<size_t>;
-    {s.get_tombstone_count()} -> std::convertible_to<size_t>;
-    {s.get_memory_usage()} -> std::convertible_to<size_t>;
-    {s.get_aux_memory_usage()} -> std::convertible_to<size_t>;
 };
 
-template <typename S, typename R>
-concept SortedShardInterface = ShardInterface<S, R> && requires(S s, R r, R *rp, size_t i) {
-    {s.lower_bound(r)} -> std::convertible_to<size_t>;
-    {s.upper_bound(r)} -> std::convertible_to<size_t>;
-    {s.get_record_at(i)} -> std::same_as<Wrapped<R>*>;
+template <typename SHARD>
+concept SortedShardInterface = ShardInterface<SHARD> &&
+    requires(SHARD shard, typename SHARD::RECORD rec, size_t index) {
+  { shard.lower_bound(rec) } -> std::convertible_to<size_t>;
+  { shard.upper_bound(rec) } -> std::convertible_to<size_t>;
+  {
+    shard.get_record_at(index)
+    } -> std::same_as<Wrapped<typename SHARD::RECORD> *>;
 };
 
-}
+} // namespace de
diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h
index 9377fb0..03675b1 100644
--- a/include/framework/scheduling/Epoch.h
+++ b/include/framework/scheduling/Epoch.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/scheduling/Epoch.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -11,133 +11,120 @@
 #include <condition_variable>
 #include <mutex>
 
-#include "framework/structure/MutableBuffer.h"
-#include "framework/structure/ExtensionStructure.h"
 #include "framework/structure/BufferView.h"
+#include "framework/structure/ExtensionStructure.h"
+#include "framework/structure/MutableBuffer.h"
 
 namespace de {
 
-
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType,
+          LayoutPolicy L>
 class Epoch {
 private:
-    typedef MutableBuffer<R> Buffer;
-    typedef ExtensionStructure<R, S, Q, L> Structure;
-    typedef BufferView<R> BufView;
-public:
-    Epoch(size_t number=0)
-        : m_buffer(nullptr)
-        , m_structure(nullptr)
-        , m_active_merge(false)
-        , m_epoch_number(number)
-        , m_buffer_head(0)
-    {}
-
-    Epoch(size_t number, Structure *structure, Buffer *buff, size_t head)
-        : m_buffer(buff)
-        , m_structure(structure)
-        , m_active_merge(false)
-        , m_epoch_number(number)
-        , m_buffer_head(head)
-    {
-        structure->take_reference();
-    }
-
-    ~Epoch() {
-        if (m_structure) {
-            m_structure->release_reference();
-        }
-
-        if (m_structure->get_reference_count() == 0) {
-            delete m_structure;
-        }
+  typedef typename ShardType::RECORD RecordType;
+  typedef MutableBuffer<RecordType> Buffer;
+  typedef ExtensionStructure<ShardType, QueryType, L> Structure;
+  typedef BufferView<RecordType> BufView;
 
+public:
+  Epoch(size_t number = 0)
+      : m_buffer(nullptr), m_structure(nullptr), m_active_merge(false),
+        m_epoch_number(number), m_buffer_head(0) {}
+
+  Epoch(size_t number, Structure *structure, Buffer *buff, size_t head)
+      : m_buffer(buff), m_structure(structure), m_active_merge(false),
+        m_epoch_number(number), m_buffer_head(head) {
+    structure->take_reference();
+  }
+
+  ~Epoch() {
+    if (m_structure) {
+      m_structure->release_reference();
     }
 
-    /*
-     * Epochs are *not* copyable or movable. Only one can exist, and all users
-     * of it work with pointers
-     */
-    Epoch(const Epoch&) = delete;
-    Epoch(Epoch&&) = delete;
-    Epoch &operator=(const Epoch&) = delete;
-    Epoch &operator=(Epoch&&) = delete;
-
-    size_t get_epoch_number() {
-        return m_epoch_number;
+    if (m_structure->get_reference_count() == 0) {
+      delete m_structure;
     }
-
-    Structure *get_structure() {
-        return m_structure;
+  }
+
+  /*
+   * Epochs are *not* copyable or movable. Only one can exist, and all users
+   * of it work with pointers
+   */
+  Epoch(const Epoch &) = delete;
+  Epoch(Epoch &&) = delete;
+  Epoch &operator=(const Epoch &) = delete;
+  Epoch &operator=(Epoch &&) = delete;
+
+  size_t get_epoch_number() { return m_epoch_number; }
+
+  Structure *get_structure() { return m_structure; }
+
+  BufView get_buffer() { return m_buffer->get_buffer_view(m_buffer_head); }
+
+  /*
+   * Returns a new Epoch object that is a copy of this one. The new object
+   * will also contain a copy of the m_structure, rather than a reference to
+   * the same one. The epoch number of the new epoch will be set to the
+   * provided argument.
+   */
+  Epoch *clone(size_t number) {
+    std::unique_lock<std::mutex> m_buffer_lock;
+    auto epoch = new Epoch(number);
+    epoch->m_buffer = m_buffer;
+    epoch->m_buffer_head = m_buffer_head;
+
+    if (m_structure) {
+      epoch->m_structure = m_structure->copy();
+      /* the copy routine returns a structure with 0 references */
+      epoch->m_structure->take_reference();
     }
 
-    BufView get_buffer() {
-        return m_buffer->get_buffer_view(m_buffer_head);
+    return epoch;
+  }
+
+  /*
+   * Check if a merge can be started from this Epoch. At present, without
+   * concurrent merging, this simply checks if there is currently a scheduled
+   * merge based on this Epoch. If there is, returns false. If there isn't,
+   * return true and set a flag indicating that there is an active merge.
+   */
+  bool prepare_reconstruction() {
+    auto old = m_active_merge.load();
+    if (old) {
+      return false;
     }
 
-    /*
-     * Returns a new Epoch object that is a copy of this one. The new object
-     * will also contain a copy of the m_structure, rather than a reference to
-     * the same one. The epoch number of the new epoch will be set to the
-     * provided argument.
-     */
-    Epoch *clone(size_t number) {
-        std::unique_lock<std::mutex> m_buffer_lock;
-        auto epoch = new Epoch(number);
-        epoch->m_buffer = m_buffer;
-        epoch->m_buffer_head = m_buffer_head;
-
-        if (m_structure) {
-            epoch->m_structure = m_structure->copy();
-            /* the copy routine returns a structure with 0 references */
-            epoch->m_structure->take_reference();
-        }
-
-        return epoch;
+    // FIXME: this needs cleaned up
+    while (!m_active_merge.compare_exchange_strong(old, true)) {
+      old = m_active_merge.load();
+      if (old) {
+        return false;
+      }
     }
 
-    /*
-     * Check if a merge can be started from this Epoch. At present, without
-     * concurrent merging, this simply checks if there is currently a scheduled
-     * merge based on this Epoch. If there is, returns false. If there isn't,
-     * return true and set a flag indicating that there is an active merge.
-     */
-    bool prepare_reconstruction() {
-        auto old = m_active_merge.load();
-        if (old) {
-            return false;
-        }
-
-        // FIXME: this needs cleaned up
-        while (!m_active_merge.compare_exchange_strong(old, true)) {
-            old = m_active_merge.load();
-            if (old) {
-                return false;
-            }
-        }
-
-        return true;
-    }
+    return true;
+  }
 
-    bool advance_buffer_head(size_t head) {
-        m_buffer_head = head;
-        return m_buffer->advance_head(m_buffer_head);
-    }
+  bool advance_buffer_head(size_t head) {
+    m_buffer_head = head;
+    return m_buffer->advance_head(m_buffer_head);
+  }
 
 private:
-    Structure *m_structure;
-    Buffer *m_buffer;
-
-    std::mutex m_buffer_lock;
-    std::atomic<bool> m_active_merge;
-
-    /*
-     * The number of currently active jobs
-     * (queries/merges) operating on this
-     * epoch. An epoch can only be retired
-     * when this number is 0.
-     */
-    size_t m_epoch_number;
-    size_t m_buffer_head;
+  Buffer *m_buffer;
+  Structure *m_structure;
+
+  std::mutex m_buffer_lock;
+  std::atomic<bool> m_active_merge;
+
+  /*
+   * The number of currently active jobs
+   * (queries/merges) operating on this
+   * epoch. An epoch can only be retired
+   * when this number is 0.
+   */
+  size_t m_epoch_number;
+  size_t m_buffer_head;
 };
-}
+} // namespace de
diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h
index 3ed4f49..7cb6d20 100644
--- a/include/framework/scheduling/FIFOScheduler.h
+++ b/include/framework/scheduling/FIFOScheduler.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/scheduling/FIFOScheduler.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -17,11 +17,11 @@
  */
 #pragma once
 
-#include <thread>
-#include <condition_variable>
-#include <chrono>
 #include "framework/scheduling/Task.h"
 #include "framework/scheduling/statistics.h"
+#include <chrono>
+#include <condition_variable>
+#include <thread>
 
 #include "ctpl/ctpl.h"
 #include "psu-ds/LockedPriorityQueue.h"
@@ -30,100 +30,95 @@ namespace de {
 
 using namespace std::literals::chrono_literals;
 
-
 class FIFOScheduler {
 private:
-    static const size_t DEFAULT_MAX_THREADS = 8;
+  static const size_t DEFAULT_MAX_THREADS = 8;
 
 public:
-    FIFOScheduler(size_t memory_budget, size_t thread_cnt)
-      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
-      , m_thrd_cnt((thread_cnt) ? thread_cnt: DEFAULT_MAX_THREADS)
-      , m_used_memory(0)
-      , m_used_thrds(0)
-      , m_shutdown(false)
-    {
-        m_sched_thrd = std::thread(&FIFOScheduler::run, this);
-        m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this);
-        m_thrd_pool.resize(m_thrd_cnt);
+  FIFOScheduler(size_t memory_budget, size_t thread_cnt)
+      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX),
+        m_thrd_cnt((thread_cnt) ? thread_cnt : DEFAULT_MAX_THREADS),
+        m_used_memory(0), m_used_thrds(0), m_shutdown(false) {
+    m_sched_thrd = std::thread(&FIFOScheduler::run, this);
+    m_sched_wakeup_thrd = std::thread(&FIFOScheduler::periodic_wakeup, this);
+    m_thrd_pool.resize(m_thrd_cnt);
+  }
+
+  ~FIFOScheduler() {
+    if (!m_shutdown.load()) {
+      shutdown();
     }
 
-    ~FIFOScheduler() {
-        if (!m_shutdown.load()) {
-            shutdown();
-        }
+    m_sched_thrd.join();
+    m_sched_wakeup_thrd.join();
+  }
 
-        m_sched_thrd.join();
-        m_sched_wakeup_thrd.join();
-    }
+  void schedule_job(std::function<void(void *)> job, size_t size, void *args,
+                    size_t type = 0) {
+    std::unique_lock<std::mutex> lk(m_cv_lock);
+    size_t ts = m_counter.fetch_add(1);
 
-    void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
-        std::unique_lock<std::mutex> lk(m_cv_lock);
-        size_t ts = m_counter.fetch_add(1);
+    m_stats.job_queued(ts, type, size);
+    m_task_queue.push(Task(size, ts, job, args, type, &m_stats));
 
-        m_stats.job_queued(ts, type, size);
-        m_task_queue.push(Task(size, ts, job, args, type, &m_stats));
+    m_cv.notify_all();
+  }
 
-        m_cv.notify_all();
-    }
-
-    void shutdown() {
-        m_shutdown.store(true);
-        m_thrd_pool.stop(true);
-        m_cv.notify_all();
-    }
+  void shutdown() {
+    m_shutdown.store(true);
+    m_thrd_pool.stop(true);
+    m_cv.notify_all();
+  }
 
-    void print_statistics() {
-        m_stats.print_statistics();
-    }
+  void print_statistics() { m_stats.print_statistics(); }
 
 private:
-    psudb::LockedPriorityQueue<Task> m_task_queue;
+  psudb::LockedPriorityQueue<Task> m_task_queue;
 
-    size_t m_memory_budget;
-    size_t m_thrd_cnt;
+  [[maybe_unused]] size_t m_memory_budget;
+  size_t m_thrd_cnt;
 
-    std::atomic<bool> m_shutdown; 
 
-    std::atomic<size_t> m_counter;
-    std::mutex m_cv_lock;
-    std::condition_variable m_cv;
+  std::atomic<size_t> m_counter;
+  std::mutex m_cv_lock;
+  std::condition_variable m_cv;
 
-    std::thread m_sched_thrd;
-    std::thread m_sched_wakeup_thrd;
-    ctpl::thread_pool m_thrd_pool;
+  std::thread m_sched_thrd;
+  std::thread m_sched_wakeup_thrd;
+  ctpl::thread_pool m_thrd_pool;
 
-    std::atomic<size_t> m_used_thrds;
-    std::atomic<size_t> m_used_memory;
+  std::atomic<size_t> m_used_memory;
+  std::atomic<size_t> m_used_thrds;
 
-    SchedulerStatistics m_stats;
+  std::atomic<bool> m_shutdown;
 
-    void periodic_wakeup() {
-        do {
-            std::this_thread::sleep_for(10us);
-            m_cv.notify_all();
-        } while (!m_shutdown.load());
-    }
+  SchedulerStatistics m_stats;
 
-    void schedule_next() {
-        assert(m_task_queue.size() > 0);
-        auto t = m_task_queue.pop();
-        m_stats.job_scheduled(t.m_timestamp);
+  void periodic_wakeup() {
+    do {
+      std::this_thread::sleep_for(10us);
+      m_cv.notify_all();
+    } while (!m_shutdown.load());
+  }
 
-        m_thrd_pool.push(t);
-    }
+  void schedule_next() {
+    assert(m_task_queue.size() > 0);
+    auto t = m_task_queue.pop();
+    m_stats.job_scheduled(t.m_timestamp);
 
-    void run() {
-        do {
-            std::unique_lock<std::mutex> cv_lock(m_cv_lock);
-            m_cv.wait(cv_lock);
+    m_thrd_pool.push(t);
+  }
 
-            while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) {
-                schedule_next();
-            }
-        } while(!m_shutdown.load());
-    }
+  void run() {
+    do {
+      std::unique_lock<std::mutex> cv_lock(m_cv_lock);
+      m_cv.wait(cv_lock);
 
+      while (m_task_queue.size() > 0 && m_thrd_pool.n_idle() > 0) {
+        schedule_next();
+      }
+    } while (!m_shutdown.load());
+  }
 };
 
-}
+} // namespace de
diff --git a/include/framework/scheduling/SerialScheduler.h b/include/framework/scheduling/SerialScheduler.h
index ac59301..7cd9cfc 100644
--- a/include/framework/scheduling/SerialScheduler.h
+++ b/include/framework/scheduling/SerialScheduler.h
@@ -1,13 +1,13 @@
 /*
  * include/framework/scheduling/SerialScheduler.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
  * IMPORTANT: This "scheduler" is a shim implementation for allowing
- * strictly serial, single-threaded operation of the framework. It should 
- * never be used in multi-threaded contexts. A call to the schedule_job 
+ * strictly serial, single-threaded operation of the framework. It should
+ * never be used in multi-threaded contexts. A call to the schedule_job
  * function will immediately run the job and block on its completion before
  * returning.
  *
@@ -21,42 +21,36 @@ namespace de {
 
 class SerialScheduler {
 public:
-    SerialScheduler(size_t memory_budget, size_t thread_cnt) 
-      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX)
-      , m_thrd_cnt((thread_cnt) ? thread_cnt: UINT64_MAX)
-      , m_used_memory(0)
-      , m_used_thrds(0)
-      , m_counter(0)
-    {}
-
-    ~SerialScheduler() = default;
-
-    void schedule_job(std::function<void(void*)> job, size_t size, void *args, size_t type=0) {
-        size_t ts = m_counter++;
-        m_stats.job_queued(ts, type, size);
-        m_stats.job_scheduled(ts);
-        auto t = Task(size, ts, job, args, type, &m_stats);
-        t(0);
-    }
-
-    void shutdown() {
-        /* intentionally left blank */
-    }
-
-    void print_statistics() {
-        m_stats.print_statistics();
-    }
+  SerialScheduler(size_t memory_budget, size_t thread_cnt)
+      : m_memory_budget((memory_budget) ? memory_budget : UINT64_MAX),
+        m_thrd_cnt((thread_cnt) ? thread_cnt : UINT64_MAX), m_used_memory(0),
+        m_used_thrds(0), m_counter(0) {}
+
+  ~SerialScheduler() = default;
+
+  void schedule_job(std::function<void(void *)> job, size_t size, void *args,
+                    size_t type = 0) {
+    size_t ts = m_counter++;
+    m_stats.job_queued(ts, type, size);
+    m_stats.job_scheduled(ts);
+    auto t = Task(size, ts, job, args, type, &m_stats);
+    t(0);
+  }
+
+  void shutdown() { /* intentionally left blank */ }
+
+  void print_statistics() { m_stats.print_statistics(); }
 
 private:
-    size_t m_memory_budget;
-    size_t m_thrd_cnt;
+  [[maybe_unused]] size_t m_memory_budget;
+  [[maybe_unused]] size_t m_thrd_cnt;
 
-    size_t m_used_thrds;
-    size_t m_used_memory;
+  [[maybe_unused]] size_t m_used_memory;
+  [[maybe_unused]] size_t m_used_thrds;
 
-    size_t m_counter;
+  size_t m_counter;
 
-    SchedulerStatistics m_stats;
+  SchedulerStatistics m_stats;
 };
 
-}
+} // namespace de
diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h
index bd53090..6b6f040 100644
--- a/include/framework/scheduling/Task.h
+++ b/include/framework/scheduling/Task.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/scheduling/Task.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -13,77 +13,76 @@
  */
 #pragma once
 
-#include <future>
-#include <functional>
 #include <chrono>
+#include <functional>
+#include <future>
 
-#include "framework/util/Configuration.h"
 #include "framework/scheduling/Epoch.h"
 #include "framework/scheduling/statistics.h"
+#include "framework/util/Configuration.h"
 
 namespace de {
 
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType,
+          LayoutPolicy L>
 struct ReconstructionArgs {
-    Epoch<R, S, Q, L> *epoch;
-    ReconstructionVector merges;
-    std::promise<bool> result;
-    bool compaction;
-    void *extension;
+  typedef typename ShardType::RECORD RecordType;
+  Epoch<ShardType, QueryType, L> *epoch;
+  ReconstructionVector merges;
+  std::promise<bool> result;
+  bool compaction;
+  void *extension;
 };
 
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L>
-struct QueryArgs {
-    std::promise<std::vector<R>> result_set;
-    void *query_parms;
-    void *extension;
+template <ShardInterface S, QueryInterface<S> Q, typename DE> struct QueryArgs {
+  std::promise<std::vector<typename Q::ResultType>> result_set;
+  typename Q::Parameters query_parms;
+  DE *extension;
 };
 
-typedef std::function<void(void*)> Job;
+typedef std::function<void(void *)> Job;
 
 struct Task {
-    Task(size_t size, size_t ts, Job job, void *args, size_t type=0, SchedulerStatistics *stats=nullptr) 
-      : m_job(job)
-      , m_size(size)
-      , m_timestamp(ts)
-      , m_args(args)
-      , m_type(type)
-      , m_stats(stats)
-    {}
+  Task(size_t size, size_t ts, Job job, void *args, size_t type = 0,
+       SchedulerStatistics *stats = nullptr)
+      : m_job(job), m_size(size), m_timestamp(ts), m_args(args), m_type(type),
+        m_stats(stats) {}
 
-    Job m_job;
-    size_t m_size;
-    size_t m_timestamp;
-    void *m_args;
-    size_t m_type;
-    SchedulerStatistics *m_stats;
+  Job m_job;
+  size_t m_size;
+  size_t m_timestamp;
+  void *m_args;
+  size_t m_type;
+  SchedulerStatistics *m_stats;
 
-    friend bool operator<(const Task &self, const Task &other) {
-        return self.m_timestamp < other.m_timestamp;
-    }
+  friend bool operator<(const Task &self, const Task &other) {
+    return self.m_timestamp < other.m_timestamp;
+  }
 
-    friend bool operator>(const Task &self, const Task &other) {
-        return self.m_timestamp > other.m_timestamp;
-    }
+  friend bool operator>(const Task &self, const Task &other) {
+    return self.m_timestamp > other.m_timestamp;
+  }
 
-    void operator()(size_t thrd_id) {
-        auto start = std::chrono::high_resolution_clock::now();
-        if (m_stats) {
-            m_stats->job_begin(m_timestamp);
-        }
+  void operator()(size_t thrd_id) {
+    auto start = std::chrono::high_resolution_clock::now();
+    if (m_stats) {
+      m_stats->job_begin(m_timestamp);
+    }
 
-        m_job(m_args);
+    m_job(m_args);
 
-        if (m_stats) {
-            m_stats->job_complete(m_timestamp);
-        }
-        auto stop = std::chrono::high_resolution_clock::now();
+    if (m_stats) {
+      m_stats->job_complete(m_timestamp);
+    }
+    auto stop = std::chrono::high_resolution_clock::now();
 
-        if (m_stats) {
-            auto time = std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
-            m_stats->log_time_data(time, m_type);
-        }
+    if (m_stats) {
+      auto time =
+          std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start)
+              .count();
+      m_stats->log_time_data(time, m_type);
     }
+  }
 };
 
-}
+} // namespace de
diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h
index 6c479cd..48c186f 100644
--- a/include/framework/scheduling/statistics.h
+++ b/include/framework/scheduling/statistics.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/scheduling/statistics.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -13,106 +13,94 @@
  */
 #pragma once
 
-#include <cstdlib>
+#include <atomic>
 #include <cassert>
+#include <chrono>
+#include <cstdlib>
+#include <mutex>
 #include <unordered_map>
 #include <vector>
-#include <mutex>
-#include <chrono>
-#include <atomic>
 
 namespace de {
 
 class SchedulerStatistics {
 private:
-    enum class EventType {
-        QUEUED,
-        SCHEDULED,
-        STARTED, 
-        FINISHED
-    };
+  enum class EventType { QUEUED, SCHEDULED, STARTED, FINISHED };
 
-    struct Event {
-        size_t id;
-        EventType type;
-    };
-
-    struct JobInfo {
-        size_t id;
-        size_t size;
-        size_t type;
-    };
+  struct Event {
+    size_t id;
+    EventType type;
+  };
 
+  struct JobInfo {
+    size_t id;
+    size_t size;
+    size_t type;
+  };
 
 public:
-    SchedulerStatistics() = default;
-    ~SchedulerStatistics() = default;
+  SchedulerStatistics() = default;
+  ~SchedulerStatistics() = default;
 
-    void job_queued(size_t id, size_t type, size_t size) {
-        auto time = std::chrono::high_resolution_clock::now();
-    }
+  void job_queued(size_t id, size_t type, size_t size) { }
 
-    void job_scheduled(size_t id) {
-        std::unique_lock<std::mutex> lk(m_mutex);
+  void job_scheduled(size_t id) { std::unique_lock<std::mutex> lk(m_mutex); }
 
-    }
+  void job_begin(size_t id) {}
 
-    void job_begin(size_t id) {
+  void job_complete(size_t id) {}
 
-    }
+  /* FIXME: This is just a temporary approach */
+  void log_time_data(size_t length, size_t type) {
+    assert(type == 1 || type == 2);
 
-    void job_complete(size_t id) {
+    if (type == 1) {
+      m_type_1_cnt.fetch_add(1);
+      m_type_1_total_time.fetch_add(length);
 
-    }
+      if (length > m_type_1_largest_time) {
+        m_type_1_largest_time.store(length);
+      }
+    } else {
+      m_type_2_cnt.fetch_add(1);
+      m_type_2_total_time.fetch_add(length);
 
-    /* FIXME: This is just a temporary approach */
-    void log_time_data(size_t length, size_t type) {
-        assert(type == 1 || type == 2);
-
-        if (type == 1) {
-            m_type_1_cnt.fetch_add(1);
-            m_type_1_total_time.fetch_add(length);
-            
-            if (length > m_type_1_largest_time) {
-                m_type_1_largest_time.store(length);
-            }
-        } else {
-            m_type_2_cnt.fetch_add(1);
-            m_type_2_total_time.fetch_add(length);
-
-            if (length > m_type_2_largest_time) {
-                m_type_2_largest_time.store(length);
-            }
-        }
+      if (length > m_type_2_largest_time) {
+        m_type_2_largest_time.store(length);
+      }
     }
-
-    void print_statistics() {
-        if (m_type_1_cnt > 0) {
-            fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", 
-                    m_type_1_cnt.load(), 
-                    m_type_1_total_time.load() / m_type_1_cnt.load(),
-                    m_type_1_largest_time.load());
-        }
-        if (m_type_2_cnt > 0) {
-            fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n",
-                    m_type_2_cnt.load(),
-                    m_type_2_total_time.load() / m_type_2_cnt.load(),
-                    m_type_2_largest_time.load());
-        }
+  }
+
+  void print_statistics() {
+    if (m_type_1_cnt > 0) {
+      fprintf(
+          stdout,
+          "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n",
+          m_type_1_cnt.load(), m_type_1_total_time.load() / m_type_1_cnt.load(),
+          m_type_1_largest_time.load());
+    }
+    if (m_type_2_cnt > 0) {
+      fprintf(stdout,
+              "Reconstruction Count: %ld\tReconstruction Avg. Latency: "
+              "%ld\tMax Recon. Latency:%ld\n",
+              m_type_2_cnt.load(),
+              m_type_2_total_time.load() / m_type_2_cnt.load(),
+              m_type_2_largest_time.load());
     }
+  }
 
 private:
-    std::mutex m_mutex;
-    std::unordered_map<size_t, JobInfo> m_jobs;
-    std::vector<Event> m_event_log;
+  std::mutex m_mutex;
+  std::unordered_map<size_t, JobInfo> m_jobs;
+  std::vector<Event> m_event_log;
 
-    std::atomic<size_t> m_type_1_cnt;
-    std::atomic<size_t> m_type_1_total_time;
+  std::atomic<size_t> m_type_1_cnt;
+  std::atomic<size_t> m_type_1_total_time;
 
-    std::atomic<size_t> m_type_2_cnt;
-    std::atomic<size_t> m_type_2_total_time;
+  std::atomic<size_t> m_type_2_cnt;
+  std::atomic<size_t> m_type_2_total_time;
 
-    std::atomic<size_t> m_type_1_largest_time;
-    std::atomic<size_t> m_type_2_largest_time;
+  std::atomic<size_t> m_type_1_largest_time;
+  std::atomic<size_t> m_type_2_largest_time;
 };
-}
+} // namespace de
diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h
index e95a799..acf1201 100644
--- a/include/framework/structure/BufferView.h
+++ b/include/framework/structure/BufferView.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/structure/BufferView.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -9,166 +9,150 @@
  */
 #pragma once
 
-#include <cstdlib>
 #include <cassert>
+#include <cstdlib>
 #include <functional>
 #include <utility>
 
-#include "psu-util/alignment.h"
-#include "psu-ds/BloomFilter.h"
 #include "framework/interface/Record.h"
+#include "psu-ds/BloomFilter.h"
+#include "psu-util/alignment.h"
 
 namespace de {
 
-typedef std::function<void(void)> ReleaseFunction; 
+typedef std::function<void(void)> ReleaseFunction;
 
-template <RecordInterface R>
-class BufferView {
+template <RecordInterface R> class BufferView {
 public:
-    BufferView() = default;
-
-    /* 
-     * the BufferView's lifetime is tightly linked to buffer versioning, and so
-     * copying and assignment are disabled.
-     */
-    BufferView(const BufferView&) = delete;
-    BufferView &operator=(BufferView &) = delete;
-
-    BufferView(BufferView &&other) 
-        : m_data(std::exchange(other.m_data, nullptr))
-        , m_release(std::move(other.m_release))
-        , m_head(std::exchange(other.m_head, 0))
-        , m_tail(std::exchange(other.m_tail, 0))
-        , m_start(std::exchange(other.m_start, 0))
-        , m_stop(std::exchange(other.m_stop, 0))
-        , m_cap(std::exchange(other.m_cap, 0))
-        , m_approx_ts_cnt(std::exchange(other.m_approx_ts_cnt, 0))
-        , m_tombstone_filter(std::exchange(other.m_tombstone_filter, nullptr))
-        , m_active(std::exchange(other.m_active, false)) {}
-
-    BufferView &operator=(BufferView &&other) = delete;
-
-
-    BufferView(Wrapped<R> *buffer, size_t cap, size_t head, size_t tail, size_t tombstone_cnt, psudb::BloomFilter<R> *filter,
-               ReleaseFunction release) 
-        : m_data(buffer)
-        , m_release(release)
-        , m_head(head)
-        , m_tail(tail)
-        , m_start(m_head % cap)
-        , m_stop(m_tail % cap)
-        , m_cap(cap)
-        , m_approx_ts_cnt(tombstone_cnt)
-        , m_tombstone_filter(filter)
-        , m_active(true) {}
-
-    ~BufferView() {
-        if (m_active) {
-            m_release();
-        }
+  BufferView() = default;
+
+  /*
+   * the BufferView's lifetime is tightly linked to buffer versioning, so
+   * copying and assignment are disabled.
+   */
+  BufferView(const BufferView &) = delete;
+  BufferView &operator=(BufferView &) = delete;
+
+  BufferView(BufferView &&other)
+      : m_data(std::exchange(other.m_data, nullptr)),
+        m_release(std::move(other.m_release)),
+        m_head(std::exchange(other.m_head, 0)),
+        m_tail(std::exchange(other.m_tail, 0)),
+        m_start(std::exchange(other.m_start, 0)),
+        m_stop(std::exchange(other.m_stop, 0)),
+        m_cap(std::exchange(other.m_cap, 0)),
+        m_approx_ts_cnt(std::exchange(other.m_approx_ts_cnt, 0)),
+        m_tombstone_filter(std::exchange(other.m_tombstone_filter, nullptr)),
+        m_active(std::exchange(other.m_active, false)) {}
+
+  BufferView &operator=(BufferView &&other) = delete;
+
+  BufferView(Wrapped<R> *buffer, size_t cap, size_t head, size_t tail,
+             size_t tombstone_cnt, psudb::BloomFilter<R> *filter,
+             ReleaseFunction release)
+      : m_data(buffer), m_release(release), m_head(head), m_tail(tail),
+        m_start(m_head % cap), m_stop(m_tail % cap), m_cap(cap),
+        m_approx_ts_cnt(tombstone_cnt), m_tombstone_filter(filter),
+        m_active(true) {}
+
+  ~BufferView() {
+    if (m_active) {
+      m_release();
     }
+  }
 
-    bool check_tombstone(const R& rec) {
-        if (m_tombstone_filter && !m_tombstone_filter->lookup(rec)) return false;
-
-        for (size_t i=0; i<get_record_count(); i++) {
-            if (m_data[to_idx(i)].rec == rec && m_data[to_idx(i)].is_tombstone()) {
-                return true;
-            }
-        }
+  bool check_tombstone(const R &rec) {
+    if (m_tombstone_filter && !m_tombstone_filter->lookup(rec))
+      return false;
 
-        return false;
+    for (size_t i = 0; i < get_record_count(); i++) {
+      if (m_data[to_idx(i)].rec == rec && m_data[to_idx(i)].is_tombstone()) {
+        return true;
+      }
     }
 
-    bool delete_record(const R& rec) {
-        if (m_start < m_stop) {
-            for (size_t i=m_start; i<m_stop; i++) {
-                if (m_data[i].rec == rec) {
-                    m_data[i].set_delete();
-                    return true;
-                }
-            }
-        } else {
-            for (size_t i=m_start; i<m_cap; i++) {
-                if (m_data[i].rec == rec) {
-                    m_data[i].set_delete();
-                    return true;
-                }
-            }
-
-            for (size_t i=0; i<m_stop; i++) {
-                if (m_data[i].rec == rec) {
-                    m_data[i].set_delete();
-                    return true;
-                }
-
-            }
+    return false;
+  }
 
+  bool delete_record(const R &rec) {
+    if (m_start < m_stop) {
+      for (size_t i = m_start; i < m_stop; i++) {
+        if (m_data[i].rec == rec) {
+          m_data[i].set_delete();
+          return true;
         }
+      }
+    } else {
+      for (size_t i = m_start; i < m_cap; i++) {
+        if (m_data[i].rec == rec) {
+          m_data[i].set_delete();
+          return true;
+        }
+      }
 
-        return false;
-    }
-
-    size_t get_record_count() {
-        return m_tail - m_head;
-    }
-
-    size_t get_capacity() {
-        return m_cap;
-    }
-    
-    /*
-     * NOTE: This function returns an upper bound on the number
-     *       of tombstones within the view. There may be less than
-     *       this, due to synchronization issues during view creation.
-     */
-    size_t get_tombstone_count() {
-        return m_approx_ts_cnt;
-    }
-
-    Wrapped<R> *get(size_t i) {
-        //assert(i < get_record_count());
-        return m_data + to_idx(i);
-    }
-
-    void copy_to_buffer(psudb::byte *buffer) {
-        /* check if the region to be copied circles back to start. If so, do it in two steps */
-        if (m_start > m_stop) { 
-            size_t split_idx = m_cap - m_start;
-
-            memcpy(buffer, (std::byte*) (m_data + m_start), split_idx* sizeof(Wrapped<R>));
-            memcpy(buffer + (split_idx * sizeof(Wrapped<R>)), (std::byte*) m_data, m_stop * sizeof(Wrapped<R>));
-        } else {
-            memcpy(buffer, (std::byte*) (m_data + m_start), get_record_count() * sizeof(Wrapped<R>));
+      for (size_t i = 0; i < m_stop; i++) {
+        if (m_data[i].rec == rec) {
+          m_data[i].set_delete();
+          return true;
         }
+      }
     }
 
-    size_t get_tail() {
-        return m_tail;
+    return false;
+  }
+
+  size_t get_record_count() { return m_tail - m_head; }
+
+  size_t get_capacity() { return m_cap; }
+
+  /*
+   * NOTE: This function returns an upper bound on the number
+   *       of tombstones within the view. There may be less than
+   *       this, due to synchronization issues during view creation.
+   */
+  size_t get_tombstone_count() { return m_approx_ts_cnt; }
+
+  Wrapped<R> *get(size_t i) {
+    return m_data + to_idx(i);
+  }
+
+  void copy_to_buffer(psudb::byte *buffer) {
+    /* check if the region to be copied circles back to start. If so, do it in
+     * two steps */
+    if (m_start > m_stop) {
+      size_t split_idx = m_cap - m_start;
+
+      memcpy(buffer, (std::byte *)(m_data + m_start),
+             split_idx * sizeof(Wrapped<R>));
+      memcpy(buffer + (split_idx * sizeof(Wrapped<R>)), (std::byte *)m_data,
+             m_stop * sizeof(Wrapped<R>));
+    } else {
+      memcpy(buffer, (std::byte *)(m_data + m_start),
+             get_record_count() * sizeof(Wrapped<R>));
     }
+  }
 
-    size_t get_head() {
-        return m_head;
-    }
+  size_t get_tail() { return m_tail; }
+
+  size_t get_head() { return m_head; }
 
 private:
-    Wrapped<R>* m_data;
-    ReleaseFunction m_release;
-    size_t m_head;
-    size_t m_tail;
-    size_t m_start;
-    size_t m_stop;
-    size_t m_cap;
-    size_t m_approx_ts_cnt;
-    psudb::BloomFilter<R> *m_tombstone_filter;
-    bool m_active;
-
-    size_t to_idx(size_t i) {
-        size_t idx = (m_start + i >= m_cap) ? i - (m_cap - m_start) 
-                                            : m_start + i;
-        assert(idx < m_cap);
-        return idx;
-    }
+  Wrapped<R> *m_data;
+  ReleaseFunction m_release;
+  size_t m_head;
+  size_t m_tail;
+  size_t m_start;
+  size_t m_stop;
+  size_t m_cap;
+  size_t m_approx_ts_cnt;
+  psudb::BloomFilter<R> *m_tombstone_filter;
+  bool m_active;
+
+  size_t to_idx(size_t i) {
+    size_t idx = (m_start + i >= m_cap) ? i - (m_cap - m_start) : m_start + i;
+    assert(idx < m_cap);
+    return idx;
+  }
 };
 
-}
+} // namespace de
diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h
index b83674b..2728246 100644
--- a/include/framework/structure/ExtensionStructure.h
+++ b/include/framework/structure/ExtensionStructure.h
@@ -1,8 +1,8 @@
 /*
  * include/framework/structure/ExtensionStructure.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -22,622 +22,660 @@
 
 namespace de {
 
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q, LayoutPolicy L=LayoutPolicy::TEIRING>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType,
+          LayoutPolicy L = LayoutPolicy::TEIRING>
 class ExtensionStructure {
-    typedef S Shard;
-    typedef BufferView<R> BuffView;
+  typedef typename ShardType::RECORD RecordType;
+  typedef BufferView<RecordType> BuffView;
 
-    typedef struct {
-        size_t reccnt;
-        size_t reccap;
+  typedef struct {
+    size_t reccnt;
+    size_t reccap;
 
-        size_t shardcnt;
-        size_t shardcap;
-    } level_state;
+    size_t shardcnt;
+    size_t shardcap;
+  } level_state;
 
-    typedef std::vector<level_state> state_vector;
+  typedef std::vector<level_state> state_vector;
 
 public:
-    ExtensionStructure(size_t buffer_size, size_t scale_factor, double max_delete_prop)
-        : m_scale_factor(scale_factor)
-        , m_max_delete_prop(max_delete_prop)
-        , m_buffer_size(buffer_size)
-    {}
-
-    ~ExtensionStructure() = default;
-
-    /*
-     * Create a shallow copy of this extension structure. The copy will share
-     * references to the same levels/shards as the original, but will have its
-     * own lists. As all of the shards are immutable (with the exception of
-     * deletes), the copy can be restructured with reconstructions and flushes
-     * without affecting the original. The copied structure will be returned
-     * with a reference count of 0; generally you will want to immediately call
-     * take_reference() on it.
-     *
-     * NOTE: When using tagged deletes, a delete of a record in the original
-     * structure will affect the copy, so long as the copy retains a reference
-     * to the same shard as the original. This could cause synchronization
-     * problems under tagging with concurrency. Any deletes in this context will
-     * need to be forwarded to the appropriate structures manually.
-     */
-    ExtensionStructure<R, S, Q, L> *copy() {
-        auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, 
-                                                             m_max_delete_prop);
-        for (size_t i=0; i<m_levels.size(); i++) {
-            new_struct->m_levels.push_back(m_levels[i]->clone());
-        }
-
-        new_struct->m_refcnt = 0;
-        new_struct->m_current_state = m_current_state;
+  ExtensionStructure(size_t buffer_size, size_t scale_factor,
+                     double max_delete_prop)
+      : m_scale_factor(scale_factor), m_max_delete_prop(max_delete_prop),
+        m_buffer_size(buffer_size) {}
+
+  ~ExtensionStructure() = default;
+
+  /*
+   * Create a shallow copy of this extension structure. The copy will share
+   * references to the same levels/shards as the original, but will have its
+   * own lists. As all of the shards are immutable (with the exception of
+   * deletes), the copy can be restructured with reconstructions and flushes
+   * without affecting the original. The copied structure will be returned
+   * with a reference count of 0; generally you will want to immediately call
+   * take_reference() on it.
+   *
+   * NOTE: When using tagged deletes, a delete of a record in the original
+   * structure will affect the copy, so long as the copy retains a reference
+   * to the same shard as the original. This could cause synchronization
+   * problems under tagging with concurrency. Any deletes in this context will
+   * need to be forwarded to the appropriate structures manually.
+   */
+  ExtensionStructure<ShardType, QueryType, L> *copy() {
+    auto new_struct = new ExtensionStructure<ShardType, QueryType, L>(
+        m_buffer_size, m_scale_factor, m_max_delete_prop);
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      new_struct->m_levels.push_back(m_levels[i]->clone());
+    }
 
-        return new_struct;
+    new_struct->m_refcnt = 0;
+    new_struct->m_current_state = m_current_state;
+
+    return new_struct;
+  }
+
+  /*
+   * Search for a record matching the argument and mark it deleted by
+   * setting the delete bit in its wrapped header. Returns 1 if a matching
+   * record was found and deleted, and 0 if a matching record was not found.
+   *
+   * This function will stop after finding the first matching record. It is
+   * assumed that no duplicate records exist. In the case of duplicates, this
+   * function will still "work", but in the sense of "delete first match".
+   */
+  int tagged_delete(const RecordType &rec) {
+    for (auto level : m_levels) {
+      if (level && level->delete_record(rec)) {
+        return 1;
+      }
     }
 
     /*
-     * Search for a record matching the argument and mark it deleted by
-     * setting the delete bit in its wrapped header. Returns 1 if a matching
-     * record was found and deleted, and 0 if a matching record was not found.
-     *
-     * This function will stop after finding the first matching record. It is
-     * assumed that no duplicate records exist. In the case of duplicates, this
-     * function will still "work", but in the sense of "delete first match".
+     * If the record to be erased wasn't found, return 0. The
+     * DynamicExtension itself will then search the active
+     * Buffers.
      */
-    int tagged_delete(const R &rec) {
-        for (auto level : m_levels) {
-            if (level && level->delete_record(rec)) {
-                return 1;
-            }
-        }
-
-        /*
-         * If the record to be erased wasn't found, return 0. The
-         * DynamicExtension itself will then search the active
-         * Buffers.
-         */
-        return 0;
+    return 0;
+  }
+
+  /*
+   * Flush a buffer into the extension structure, performing any necessary
+   * reconstructions to free up room in L0.
+   *
+   * FIXME: arguably, this should be a method attached to the buffer that
+   * takes a structure as input.
+   */
+  inline bool flush_buffer(BuffView buffer) {
+    state_vector tmp = m_current_state;
+
+    if (tmp.size() == 0) {
+      grow(tmp);
     }
 
-    /*
-     * Flush a buffer into the extension structure, performing any necessary
-     * reconstructions to free up room in L0.
-     *
-     * FIXME: arguably, this should be a method attached to the buffer that
-     * takes a structure as input.
-     */
-    inline bool flush_buffer(BuffView buffer) {
-        state_vector tmp = m_current_state;
+    assert(can_reconstruct_with(0, buffer.get_record_count(), tmp));
+    flush_buffer_into_l0(std::move(buffer));
 
-        if (tmp.size() == 0) {
-            grow(tmp);
-        }
+    return true;
+  }
 
-        assert(can_reconstruct_with(0, buffer.get_record_count(), tmp));
-        flush_buffer_into_l0(std::move(buffer));
+  /*
+   * Return the total number of records (including tombstones) within all
+   * of the levels of the structure.
+   */
+  size_t get_record_count() {
+    size_t cnt = 0;
 
-        return true;
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      if (m_levels[i])
+        cnt += m_levels[i]->get_record_count();
     }
 
-    /*
-     * Return the total number of records (including tombstones) within all
-     * of the levels of the structure.
-     */
-    size_t get_record_count() {
-        size_t cnt = 0;
+    return cnt;
+  }
 
-        for (size_t i=0; i<m_levels.size(); i++) {
-            if (m_levels[i]) cnt += m_levels[i]->get_record_count();
-        }
+  /*
+   * Return the total number of tombstones contained within all of the
+   * levels of the structure.
+   */
+  size_t get_tombstone_count() {
+    size_t cnt = 0;
 
-        return cnt;
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      if (m_levels[i])
+        cnt += m_levels[i]->get_tombstone_count();
     }
 
-    /*
-     * Return the total number of tombstones contained within all of the
-     * levels of the structure.
-     */
-    size_t get_tombstone_count() {
-        size_t cnt = 0;
-
-        for (size_t i=0; i<m_levels.size(); i++) {
-            if (m_levels[i]) cnt += m_levels[i]->get_tombstone_count();
-        }
-
-        return cnt;
+    return cnt;
+  }
+
+  /*
+   * Return the number of levels within the structure. Note that not
+   * all of these levels are necessarily populated.
+   */
+  size_t get_height() { return m_levels.size(); }
+
+  /*
+   * Return the amount of memory (in bytes) used by the shards within the
+   * structure for storing the primary data structure and raw data.
+   */
+  size_t get_memory_usage() {
+    size_t cnt = 0;
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      if (m_levels[i])
+        cnt += m_levels[i]->get_memory_usage();
     }
 
-    /*
-     * Return the number of levels within the structure. Note that not
-     * all of these levels are necessarily populated.
-     */
-    size_t get_height() {
-        return m_levels.size();
+    return cnt;
+  }
+
+  /*
+   * Return the amount of memory (in bytes) used by the shards within the
+   * structure for storing auxiliary data structures. This total does not
+   * include memory used for the main data structure, or raw data.
+   */
+  size_t get_aux_memory_usage() {
+    size_t cnt = 0;
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      if (m_levels[i]) {
+        cnt += m_levels[i]->get_aux_memory_usage();
+      }
     }
 
-    /*
-     * Return the amount of memory (in bytes) used by the shards within the
-     * structure for storing the primary data structure and raw data.
-     */
-    size_t get_memory_usage() {
-        size_t cnt = 0;
-        for (size_t i=0; i<m_levels.size(); i++) {
-            if (m_levels[i]) cnt += m_levels[i]->get_memory_usage();
+    return cnt;
+  }
+
+  /*
+   * Validate that no level in the structure exceeds its maximum tombstone
+   * capacity. This is used to trigger preemptive compactions at the end of
+   * the reconstruction process.
+   */
+  bool validate_tombstone_proportion() {
+    long double ts_prop;
+    for (size_t i = 0; i < m_levels.size(); i++) {
+      if (m_levels[i]) {
+        ts_prop = (long double)m_levels[i]->get_tombstone_count() /
+                  (long double)calc_level_record_capacity(i);
+        if (ts_prop > (long double)m_max_delete_prop) {
+          return false;
         }
-
-        return cnt;
+      }
     }
 
-    /*
-     * Return the amount of memory (in bytes) used by the shards within the
-     * structure for storing auxiliary data structures. This total does not
-     * include memory used for the main data structure, or raw data.
+    return true;
+  }
+
+  bool validate_tombstone_proportion(level_index level) {
+    long double ts_prop = (long double)m_levels[level]->get_tombstone_count() /
+                          (long double)calc_level_record_capacity(level);
+    return ts_prop <= (long double)m_max_delete_prop;
+  }
+
+  /*
+   * Return a reference to the underlying vector of levels within the
+   * structure.
+   */
+  std::vector<std::shared_ptr<InternalLevel<ShardType, QueryType>>> &
+  get_levels() {
+    return m_levels;
+  }
+
+  /*
+   * NOTE: This cannot be simulated, because tombstone cancellation is not
+   * cheaply predictable. It is possible that the worst case number could
+   * be used instead, to allow for prediction, but compaction isn't a
+   * major concern outside of sampling; at least for now. So I'm not
+   * going to focus too much time on it at the moment.
+   */
+  ReconstructionVector get_compaction_tasks() {
+    ReconstructionVector tasks;
+    state_vector scratch_state = m_current_state;
+
+    /* if the tombstone/delete invariant is satisfied, no need for compactions
      */
-    size_t get_aux_memory_usage() {
-        size_t cnt = 0;
-        for (size_t i=0; i<m_levels.size(); i++) {
-            if (m_levels[i]) {
-                cnt += m_levels[i]->get_aux_memory_usage();
-            }
-        }
-
-        return cnt;
+    if (validate_tombstone_proportion()) {
+      return tasks;
     }
 
-    /*
-     * Validate that no level in the structure exceeds its maximum tombstone
-     * capacity. This is used to trigger preemptive compactions at the end of
-     * the reconstruction process.
-     */
-    bool validate_tombstone_proportion() {
-      long double ts_prop;
-      for (size_t i = 0; i < m_levels.size(); i++) {
-        if (m_levels[i]) {
-          ts_prop = (long double)m_levels[i]->get_tombstone_count() /
-                    (long double)calc_level_record_capacity(i);
-          if (ts_prop > (long double)m_max_delete_prop) {
-            return false;
-          }
-        }
+    /* locate the first level to violate the invariant */
+    level_index violation_idx = -1;
+    for (level_index i = 0; i < m_levels.size(); i++) {
+      if (!validate_tombstone_proportion(i)) {
+        violation_idx = i;
+        break;
       }
-
-      return true;
     }
 
-    bool validate_tombstone_proportion(level_index level) {
-        long double ts_prop =  (long double) m_levels[level]->get_tombstone_count() / (long double) calc_level_record_capacity(level);
-        return ts_prop <= (long double) m_max_delete_prop;
-    }
+    assert(violation_idx != -1);
 
-    /*
-     * Return a reference to the underlying vector of levels within the
-     * structure.
-     */
-    std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> &get_levels() {
-        return m_levels;
+    level_index base_level =
+        find_reconstruction_target(violation_idx, scratch_state);
+    if (base_level == -1) {
+      base_level = grow(scratch_state);
     }
 
-    /* 
-     * NOTE: This cannot be simulated, because tombstone cancellation is not
-     * cheaply predictable. It is possible that the worst case number could
-     * be used instead, to allow for prediction, but compaction isn't a
-     * major concern outside of sampling; at least for now. So I'm not
-     * going to focus too much time on it at the moment.
-     */
-    ReconstructionVector get_compaction_tasks() {
-        ReconstructionVector tasks;
-        state_vector scratch_state = m_current_state;
-
-        /* if the tombstone/delete invariant is satisfied, no need for compactions */
-        if (validate_tombstone_proportion()) {
-            return tasks;
-        }
-
-        /* locate the first level to violate the invariant */
-        level_index violation_idx = -1;
-        for (level_index i=0; i<m_levels.size(); i++) {
-            if (!validate_tombstone_proportion(i))  {
-                violation_idx = i;
-                break;
-            }
-        }
-
-        assert(violation_idx != -1);
-
-        level_index base_level = find_reconstruction_target(violation_idx, scratch_state);
-        if (base_level == -1) {
-            base_level = grow(scratch_state);
-        }
-
-        for (level_index i=base_level; i>0; i--) {
-            /*
-             * The amount of storage required for the reconstruction accounts
-             * for the cost of storing the new records, along with the
-             * cost of retaining the old records during the process
-             * (hence the 2x multiplier).
-             *
-             * FIXME: currently does not account for the *actual* size
-             * of the shards, only the storage for the records
-             * themselves.
-             */
-            size_t reccnt = m_levels[i - 1]->get_record_count();
-            if constexpr (L == LayoutPolicy::LEVELING) {
-                if (can_reconstruct_with(i, reccnt, scratch_state)) {
-                    reccnt += m_levels[i]->get_record_count();
-                }
-            }
-            tasks.add_reconstruction(i-i, i, reccnt);
+    for (level_index i = base_level; i > 0; i--) {
+      /*
+       * The amount of storage required for the reconstruction accounts
+       * for the cost of storing the new records, along with the
+       * cost of retaining the old records during the process
+       * (hence the 2x multiplier).
+       *
+       * FIXME: currently does not account for the *actual* size
+       * of the shards, only the storage for the records
+       * themselves.
+       */
+      size_t reccnt = m_levels[i - 1]->get_record_count();
+      if constexpr (L == LayoutPolicy::LEVELING) {
+        if (can_reconstruct_with(i, reccnt, scratch_state)) {
+          reccnt += m_levels[i]->get_record_count();
         }
-
-        return tasks;
+      }
+      tasks.add_reconstruction(i - i, i, reccnt);
     }
 
+    return tasks;
+  }
+
+  /*
+   *
+   */
+  ReconstructionVector
+  get_reconstruction_tasks(size_t buffer_reccnt,
+                           state_vector scratch_state = {}) {
     /*
-     *
+     * If no scratch state vector is provided, use a copy of the
+     * current one. The only time an empty vector could be used as
+     * *real* input to this function is when the current state is also
+     * empty, so this should would even in that case.
      */
-    ReconstructionVector get_reconstruction_tasks(size_t buffer_reccnt, 
-                                                 state_vector scratch_state={}) {
-        /* 
-         * If no scratch state vector is provided, use a copy of the
-         * current one. The only time an empty vector could be used as
-         * *real* input to this function is when the current state is also
-         * empty, so this should would even in that case.
-         */
-        if (scratch_state.size() == 0) {
-            scratch_state = m_current_state;
-        }
-
-        ReconstructionVector reconstructions;
-        size_t LOOKAHEAD = 1;
-        for (size_t i=0; i<LOOKAHEAD; i++) {
-            /*
-             * If L0 cannot support a direct buffer flush, figure out what 
-             * work must be done to free up space first. Otherwise, the
-             * reconstruction vector will be initially empty.
-             */
-            if (!can_reconstruct_with(0, buffer_reccnt, scratch_state)) {
-                auto local_recon = get_reconstruction_tasks_from_level(0, scratch_state);
-
-                /* 
-                 * for the first iteration, we need to do all of the
-                 * reconstructions, so use these to initially the returned 
-                 * reconstruction list
-                 */
-                if (i == 0) {
-                    reconstructions = local_recon;
-                    /*
-                     * Quick sanity test of idea: if the next reconstruction
-                     * would be larger than this one, steal the largest
-                     * task from it and run it now instead.
-                     */
-                } else if (local_recon.get_total_reccnt() > reconstructions.get_total_reccnt()) {
-                    auto t = local_recon.remove_reconstruction(0);
-                    reconstructions.add_reconstruction(t);
-                }
-            }
-
-            /* simulate the buffer flush in the scratch state */
-            scratch_state[0].reccnt += buffer_reccnt;
-            if (L == LayoutPolicy::TEIRING || scratch_state[0].shardcnt == 0) {
-                scratch_state[0].shardcnt += 1;
-            }
-
-        }
-
-        return std::move(reconstructions);
+    if (scratch_state.size() == 0) {
+      scratch_state = m_current_state;
     }
 
-
-    /*
-     *
-     */
-    ReconstructionVector get_reconstruction_tasks_from_level(level_index source_level, state_vector &scratch_state) {
-        ReconstructionVector reconstructions;
+    ReconstructionVector reconstructions;
+    size_t LOOKAHEAD = 1;
+    for (size_t i = 0; i < LOOKAHEAD; i++) {
+      /*
+       * If L0 cannot support a direct buffer flush, figure out what
+       * work must be done to free up space first. Otherwise, the
+       * reconstruction vector will be initially empty.
+       */
+      if (!can_reconstruct_with(0, buffer_reccnt, scratch_state)) {
+        auto local_recon =
+            get_reconstruction_tasks_from_level(0, scratch_state);
 
         /*
-         * Find the first level capable of sustaining a reconstruction from
-         * the level above it. If no such level exists, add a new one at
-         * the bottom of the structure.
+         * for the first iteration, we need to do all of the
+         * reconstructions, so use these to initially the returned
+         * reconstruction list
          */
-        level_index base_level = find_reconstruction_target(source_level, scratch_state);
-        if (base_level == -1) {
-            base_level = grow(scratch_state);
+        if (i == 0) {
+          reconstructions = local_recon;
+          /*
+           * Quick sanity test of idea: if the next reconstruction
+           * would be larger than this one, steal the largest
+           * task from it and run it now instead.
+           */
+        } else if (local_recon.get_total_reccnt() >
+                   reconstructions.get_total_reccnt()) {
+          auto t = local_recon.remove_reconstruction(0);
+          reconstructions.add_reconstruction(t);
         }
+      }
 
-        if constexpr (L == LayoutPolicy::BSM) {
-            if (base_level == 0) {
-                return std::move(reconstructions);
-            }
-
-            ReconstructionTask task; 
-            task.target = base_level;
-
-            size_t base_reccnt = 0;
-            for (level_index i=base_level; i>source_level; i--) {
-                auto recon_reccnt = scratch_state[i-1].reccnt;
-                base_reccnt += recon_reccnt;
-                scratch_state[i-1].reccnt = 0;
-                scratch_state[i-1].shardcnt = 0;
-                task.add_source(i-1, recon_reccnt);
-            }
+      /* simulate the buffer flush in the scratch state */
+      scratch_state[0].reccnt += buffer_reccnt;
+      if (L == LayoutPolicy::TEIRING || scratch_state[0].shardcnt == 0) {
+        scratch_state[0].shardcnt += 1;
+      }
+    }
 
-            reconstructions.add_reconstruction(task);
-            scratch_state[base_level].reccnt = base_reccnt;
-            scratch_state[base_level].shardcnt = 1;
+    return reconstructions;
+  }
 
-            return std::move(reconstructions);
-        }
+  /*
+   *
+   */
+  ReconstructionVector
+  get_reconstruction_tasks_from_level(level_index source_level,
+                                      state_vector &scratch_state) {
+    ReconstructionVector reconstructions;
 
-        /*
-         * Determine the full set of reconstructions necessary to open up
-         * space in the source level.
-         */
-        for (level_index i=base_level; i>source_level; i--) {
-            size_t recon_reccnt = scratch_state[i-1].reccnt;
-            size_t base_reccnt = recon_reccnt;
-
-            /*
-             * If using Leveling, the total reconstruction size will be the
-             * records in *both* base and target, because they will need to
-             * be merged (assuming that target isn't empty).
-             */
-            if constexpr (L == LayoutPolicy::LEVELING) {
-                if (can_reconstruct_with(i, base_reccnt, scratch_state)) {
-                    recon_reccnt += scratch_state[i].reccnt;
-                }
-            }
-            reconstructions.add_reconstruction(i-1, i, recon_reccnt);
-
-            /*
-             * The base level will be emptied and its records moved to
-             * the target.
-             */
-            scratch_state[i-1].reccnt = 0;
-            scratch_state[i-1].shardcnt = 0;
-
-            /*
-             * The target level will have the records from the base level
-             * added to it, and potentially gain a shard if the LayoutPolicy
-             * is tiering or the level currently lacks any shards at all.
-             */
-            scratch_state[i].reccnt += base_reccnt;
-            if (L == LayoutPolicy::TEIRING || scratch_state[i].shardcnt == 0) {
-                scratch_state[i].shardcnt += 1;
-            }
-        }
-
-        return std::move(reconstructions);
+    /*
+     * Find the first level capable of sustaining a reconstruction from
+     * the level above it. If no such level exists, add a new one at
+     * the bottom of the structure.
+     */
+    level_index base_level =
+        find_reconstruction_target(source_level, scratch_state);
+    if (base_level == -1) {
+      base_level = grow(scratch_state);
     }
 
-    inline void reconstruction(ReconstructionTask task) {
-        static_assert(L == LayoutPolicy::BSM);
-        std::vector<InternalLevel<R, Shard, Q>*> levels(task.sources.size());
-        for (size_t i=0; i<task.sources.size(); i++) {
-            levels[i] = m_levels[task.sources[i]].get();
-        }
+    if constexpr (L == LayoutPolicy::BSM) {
+      if (base_level == 0) {
+        return reconstructions;
+      }
 
-        auto new_level = InternalLevel<R, Shard, Q>::reconstruction(levels, task.target);
-        if (task.target >= m_levels.size()) {
-            m_current_state.push_back({new_level->get_record_count(), calc_level_record_capacity(task.target),
-                1, 1});
-            m_levels.emplace_back(new_level);
-        } else {
-            m_current_state[task.target] = {new_level->get_record_count(), calc_level_record_capacity(task.target),
-            1, 1};
-            m_levels[task.target] = new_level;
-        }
+      ReconstructionTask task;
+      task.target = base_level;
 
-        /* remove all of the levels that have been flattened */
-        for (size_t i=0; i<task.sources.size(); i++) {
-            m_levels[task.sources[i]] = std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(task.sources[i], 1));
-            m_current_state[task.sources[i]] = {0, calc_level_record_capacity(task.target), 0, 1};
-        }
+      size_t base_reccnt = 0;
+      for (level_index i = base_level; i > source_level; i--) {
+        auto recon_reccnt = scratch_state[i - 1].reccnt;
+        base_reccnt += recon_reccnt;
+        scratch_state[i - 1].reccnt = 0;
+        scratch_state[i - 1].shardcnt = 0;
+        task.add_source(i - 1, recon_reccnt);
+      }
+
+      reconstructions.add_reconstruction(task);
+      scratch_state[base_level].reccnt = base_reccnt;
+      scratch_state[base_level].shardcnt = 1;
 
-        return;
+      return reconstructions;
     }
 
     /*
-     * Combine incoming_level with base_level and reconstruct the shard,
-     * placing it in base_level. The two levels should be sequential--i.e. no
-     * levels are skipped in the reconstruction process--otherwise the
-     * tombstone ordering invariant may be violated.
+     * Determine the full set of reconstructions necessary to open up
+     * space in the source level.
      */
-    inline void reconstruction(level_index base_level, level_index incoming_level) {
-        size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
-
-        if (base_level >= m_levels.size()) {
-            m_levels.emplace_back(std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(base_level, shard_capacity)));
-            m_current_state.push_back({0, calc_level_record_capacity(base_level), 
-                                       0, shard_capacity});
-        }
-
-        if constexpr (L == LayoutPolicy::LEVELING) {
-            /* if the base level has a shard, merge the base and incoming together to make a new one */
-            if (m_levels[base_level]->get_shard_count() > 0) {
-                m_levels[base_level] = InternalLevel<R, Shard, Q>::reconstruction(m_levels[base_level].get(), m_levels[incoming_level].get());
-            /* otherwise, we can just move the incoming to the base */
-            } else {
-                m_levels[base_level] = m_levels[incoming_level];
-            }
-
-        } else {
-            m_levels[base_level]->append_level(m_levels[incoming_level].get());
-            m_levels[base_level]->finalize();
+    for (level_index i = base_level; i > source_level; i--) {
+      size_t recon_reccnt = scratch_state[i - 1].reccnt;
+      size_t base_reccnt = recon_reccnt;
+
+      /*
+       * If using Leveling, the total reconstruction size will be the
+       * records in *both* base and target, because they will need to
+       * be merged (assuming that target isn't empty).
+       */
+      if constexpr (L == LayoutPolicy::LEVELING) {
+        if (can_reconstruct_with(i, base_reccnt, scratch_state)) {
+          recon_reccnt += scratch_state[i].reccnt;
         }
+      }
+      reconstructions.add_reconstruction(i - 1, i, recon_reccnt);
+
+      /*
+       * The base level will be emptied and its records moved to
+       * the target.
+       */
+      scratch_state[i - 1].reccnt = 0;
+      scratch_state[i - 1].shardcnt = 0;
+
+      /*
+       * The target level will have the records from the base level
+       * added to it, and potentially gain a shard if the LayoutPolicy
+       * is tiering or the level currently lacks any shards at all.
+       */
+      scratch_state[i].reccnt += base_reccnt;
+      if (L == LayoutPolicy::TEIRING || scratch_state[i].shardcnt == 0) {
+        scratch_state[i].shardcnt += 1;
+      }
+    }
 
-        /* place a new, empty level where the incoming level used to be */
-        m_levels[incoming_level] = std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor));
+    return reconstructions;
+  }
 
-        /* 
-         * Update the state vector to match the *real* state following
-         * the reconstruction 
-         */
-        m_current_state[base_level] = {m_levels[base_level]->get_record_count(), 
-            calc_level_record_capacity(base_level), m_levels[base_level]->get_shard_count(), shard_capacity};
-        m_current_state[incoming_level] = {0, calc_level_record_capacity(incoming_level), 0, shard_capacity};
+  inline void reconstruction(ReconstructionTask task) {
+    static_assert(L == LayoutPolicy::BSM);
+    std::vector<InternalLevel<ShardType, QueryType> *> levels(
+        task.sources.size());
+    for (size_t i = 0; i < task.sources.size(); i++) {
+      levels[i] = m_levels[task.sources[i]].get();
     }
 
-    bool take_reference() {
-        m_refcnt.fetch_add(1);
-        return true;
+    auto new_level = InternalLevel<ShardType, QueryType>::reconstruction(
+        levels, task.target);
+    if (task.target >= m_levels.size()) {
+      m_current_state.push_back({new_level->get_record_count(),
+                                 calc_level_record_capacity(task.target), 1,
+                                 1});
+      m_levels.emplace_back(new_level);
+    } else {
+      m_current_state[task.target] = {new_level->get_record_count(),
+                                      calc_level_record_capacity(task.target),
+                                      1, 1};
+      m_levels[task.target] = new_level;
     }
 
-    bool release_reference() {
-        assert(m_refcnt.load() > 0);
-        m_refcnt.fetch_add(-1);
-        return true;
+    /* remove all of the levels that have been flattened */
+    for (size_t i = 0; i < task.sources.size(); i++) {
+      m_levels[task.sources[i]] =
+          std::shared_ptr<InternalLevel<ShardType, QueryType>>(
+              new InternalLevel<ShardType, QueryType>(task.sources[i], 1));
+      m_current_state[task.sources[i]] = {
+          0, calc_level_record_capacity(task.target), 0, 1};
     }
 
-    size_t get_reference_count() {
-        return m_refcnt.load();
+    return;
+  }
+
+  /*
+   * Combine incoming_level with base_level and reconstruct the shard,
+   * placing it in base_level. The two levels should be sequential--i.e. no
+   * levels are skipped in the reconstruction process--otherwise the
+   * tombstone ordering invariant may be violated.
+   */
+  inline void reconstruction(level_index base_level,
+                             level_index incoming_level) {
+    size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
+
+    if (base_level >= m_levels.size()) {
+      m_levels.emplace_back(
+          std::shared_ptr<InternalLevel<ShardType, QueryType>>(
+              new InternalLevel<ShardType, QueryType>(base_level,
+                                                      shard_capacity)));
+      m_current_state.push_back(
+          {0, calc_level_record_capacity(base_level), 0, shard_capacity});
     }
 
-    std::vector<void *> get_query_states(std::vector<std::pair<ShardID, Shard*>> &shards, void *parms) {
-        std::vector<void*> states;
-
-        for (auto &level : m_levels) {
-            level->get_query_states(shards, states, parms);
-        }
+    if constexpr (L == LayoutPolicy::LEVELING) {
+      /* if the base level has a shard, merge the base and incoming together to
+       * make a new one */
+      if (m_levels[base_level]->get_shard_count() > 0) {
+        m_levels[base_level] =
+            InternalLevel<ShardType, QueryType>::reconstruction(
+                m_levels[base_level].get(), m_levels[incoming_level].get());
+        /* otherwise, we can just move the incoming to the base */
+      } else {
+        m_levels[base_level] = m_levels[incoming_level];
+      }
 
-        return states;
+    } else {
+      m_levels[base_level]->append_level(m_levels[incoming_level].get());
+      m_levels[base_level]->finalize();
     }
 
-private:
-    size_t m_scale_factor;
-    double m_max_delete_prop;
-    size_t m_buffer_size;
-
-    std::atomic<size_t> m_refcnt;
-
-    std::vector<std::shared_ptr<InternalLevel<R, S, Q>>> m_levels;
-
-    /* 
-     * A pair of <record_count, shard_count> for each level in the
-     * structure. Record counts may be slightly inaccurate due to
-     * deletes.
-     */
-    state_vector m_current_state;
+    /* place a new, empty level where the incoming level used to be */
+    m_levels[incoming_level] =
+        std::shared_ptr<InternalLevel<ShardType, QueryType>>(
+            new InternalLevel<ShardType, QueryType>(
+                incoming_level,
+                (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor));
 
     /*
-     * Add a new level to the scratch state and return its index.
-     *
-     * IMPORTANT: This does _not_ add a level to the extension structure
-     * anymore. This is handled by the appropriate reconstruction and flush
-     * methods as needed. This function is for use in "simulated"
-     * reconstructions.
+     * Update the state vector to match the *real* state following
+     * the reconstruction
      */
-    inline level_index grow(state_vector &scratch_state) {
-        level_index new_idx = m_levels.size();
-        size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
-
-        scratch_state.push_back({0, calc_level_record_capacity(new_idx), 
-                                   0, new_shard_cap});
-        return new_idx;
+    m_current_state[base_level] = {m_levels[base_level]->get_record_count(),
+                                   calc_level_record_capacity(base_level),
+                                   m_levels[base_level]->get_shard_count(),
+                                   shard_capacity};
+    m_current_state[incoming_level] = {
+        0, calc_level_record_capacity(incoming_level), 0, shard_capacity};
+  }
+
+  bool take_reference() {
+    m_refcnt.fetch_add(1);
+    return true;
+  }
+
+  bool release_reference() {
+    assert(m_refcnt.load() > 0);
+    m_refcnt.fetch_add(-1);
+    return true;
+  }
+
+  size_t get_reference_count() { return m_refcnt.load(); }
+
+  std::vector<typename QueryType::LocalQuery *>
+  get_local_queries(std::vector<std::pair<ShardID, ShardType *>> &shards,
+                    typename QueryType::Parameters *parms) {
+
+    std::vector<typename QueryType::LocalQuery *> queries;
+
+    for (auto &level : m_levels) {
+      level->get_local_queries(shards, queries, parms);
     }
 
-    /*
-     * Find the first level below the level indicated by idx that
-     * is capable of sustaining a reconstruction and return its
-     * level index. If no such level exists, returns -1. Also
-     * returns -1 if idx==0, and no such level exists, to simplify
-     * the logic of the first buffer flush.
-     */
-    inline level_index find_reconstruction_target(level_index idx, state_vector &state) {
+    return queries;
+  }
 
-        /* 
-         * this handles the very first buffer flush, when the state vector
-         * is empty.
-         */
-        if (idx == 0 && state.size() == 0) return -1;
+private:
+  size_t m_scale_factor;
+  double m_max_delete_prop;
+  size_t m_buffer_size;
+
+  std::atomic<size_t> m_refcnt;
+
+  std::vector<std::shared_ptr<InternalLevel<ShardType, QueryType>>> m_levels;
+
+  /*
+   * A pair of <record_count, shard_count> for each level in the
+   * structure. Record counts may be slightly inaccurate due to
+   * deletes.
+   */
+  state_vector m_current_state;
+
+  /*
+   * Add a new level to the scratch state and return its index.
+   *
+   * IMPORTANT: This does _not_ add a level to the extension structure
+   * anymore. This is handled by the appropriate reconstruction and flush
+   * methods as needed. This function is for use in "simulated"
+   * reconstructions.
+   */
+  inline level_index grow(state_vector &scratch_state) {
+    level_index new_idx = m_levels.size();
+    size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
+
+    scratch_state.push_back(
+        {0, calc_level_record_capacity(new_idx), 0, new_shard_cap});
+    return new_idx;
+  }
+
+  /*
+   * Find the first level below the level indicated by idx that
+   * is capable of sustaining a reconstruction and return its
+   * level index. If no such level exists, returns -1. Also
+   * returns -1 if idx==0, and no such level exists, to simplify
+   * the logic of the first buffer flush.
+   */
+  inline level_index find_reconstruction_target(level_index idx,
+                                                state_vector &state) {
 
-        size_t incoming_rec_cnt = state[idx].reccnt;
-        for (level_index i=idx+1; i<state.size(); i++) {
-            if (can_reconstruct_with(i, incoming_rec_cnt, state)) {
-                return i;
-            }
+    /*
+     * this handles the very first buffer flush, when the state vector
+     * is empty.
+     */
+    if (idx == 0 && state.size() == 0)
+      return -1;
 
-            incoming_rec_cnt = state[idx].reccnt;
-        }
+    size_t incoming_rec_cnt = state[idx].reccnt;
+    for (level_index i = idx + 1; i < state.size(); i++) {
+      if (can_reconstruct_with(i, incoming_rec_cnt, state)) {
+        return i;
+      }
 
-        return -1;
+      incoming_rec_cnt = state[idx].reccnt;
     }
 
-    inline void flush_buffer_into_l0(BuffView buffer) {
-        size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
-
-        if (m_levels.size() == 0) {
-            m_levels.emplace_back(std::shared_ptr<InternalLevel<R, Shard, Q>>(new InternalLevel<R, Shard, Q>(0, shard_capacity)));
+    return -1;
+  }
 
-            m_current_state.push_back({0, calc_level_record_capacity(0), 
-                                       0, shard_capacity});
-        }
+  inline void flush_buffer_into_l0(BuffView buffer) {
+    size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor;
 
-        if constexpr (L == LayoutPolicy::LEVELING) {
-            // FIXME: Kludgey implementation due to interface constraints.
-            auto old_level = m_levels[0].get();
-            auto temp_level = new InternalLevel<R, Shard, Q>(0, 1);
-            temp_level->append_buffer(std::move(buffer));
-
-            if (old_level->get_shard_count() > 0) {
-                m_levels[0] = InternalLevel<R, Shard, Q>::reconstruction(old_level, temp_level);
-                delete temp_level;
-            } else {
-                m_levels[0] = std::shared_ptr<InternalLevel<R, Shard, Q>>(temp_level);
-            }
-        } else {
-            m_levels[0]->append_buffer(std::move(buffer));
-        }
+    if (m_levels.size() == 0) {
+      m_levels.emplace_back(
+          std::shared_ptr<InternalLevel<ShardType, QueryType>>(
+              new InternalLevel<ShardType, QueryType>(0, shard_capacity)));
 
-        /* update the state vector */
-        m_current_state[0].reccnt = m_levels[0]->get_record_count();
-        m_current_state[0].shardcnt = m_levels[0]->get_shard_count(); 
+      m_current_state.push_back(
+          {0, calc_level_record_capacity(0), 0, shard_capacity});
     }
 
-    /*
-     * Mark a given memory level as no-longer in use by the tree. For now this
-     * will just free the level. In future, this will be more complex as the
-     * level may not be able to immediately be deleted, depending upon who
-     * else is using it.
-     */ 
-    inline void mark_as_unused(std::shared_ptr<InternalLevel<R, Shard, Q>> level) {
-        level.reset();
+    if constexpr (L == LayoutPolicy::LEVELING) {
+      // FIXME: Kludgey implementation due to interface constraints.
+      auto old_level = m_levels[0].get();
+      auto temp_level = new InternalLevel<ShardType, QueryType>(0, 1);
+      temp_level->append_buffer(std::move(buffer));
+
+      if (old_level->get_shard_count() > 0) {
+        m_levels[0] = InternalLevel<ShardType, QueryType>::reconstruction(
+            old_level, temp_level);
+        delete temp_level;
+      } else {
+        m_levels[0] =
+            std::shared_ptr<InternalLevel<ShardType, QueryType>>(temp_level);
+      }
+    } else {
+      m_levels[0]->append_buffer(std::move(buffer));
     }
 
-    /*
-     * Assume that level "0" should be larger than the buffer. The buffer
-     * itself is index -1, which should return simply the buffer capacity.
-     */
-    inline size_t calc_level_record_capacity(level_index idx) {
-        return m_buffer_size * pow(m_scale_factor, idx+1);
+    /* update the state vector */
+    m_current_state[0].reccnt = m_levels[0]->get_record_count();
+    m_current_state[0].shardcnt = m_levels[0]->get_shard_count();
+  }
+
+  /*
+   * Mark a given memory level as no-longer in use by the tree. For now this
+   * will just free the level. In future, this will be more complex as the
+   * level may not be able to immediately be deleted, depending upon who
+   * else is using it.
+   */
+  inline void
+  mark_as_unused(std::shared_ptr<InternalLevel<ShardType, QueryType>> level) {
+    level.reset();
+  }
+
+  /*
+   * Assume that level "0" should be larger than the buffer. The buffer
+   * itself is index -1, which should return simply the buffer capacity.
+   */
+  inline size_t calc_level_record_capacity(level_index idx) {
+    return m_buffer_size * pow(m_scale_factor, idx + 1);
+  }
+
+  /*
+   * Returns the number of records present on a specified level.
+   */
+  inline size_t get_level_record_count(level_index idx) {
+    return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0;
+  }
+
+  /*
+   * Determines if a level can sustain a reconstruction with incoming_rec_cnt
+   * additional records without exceeding its capacity.
+   */
+  inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt,
+                                   state_vector &state) {
+    if (idx >= state.size()) {
+      return false;
     }
 
-    /*
-     * Returns the number of records present on a specified level. 
-     */
-    inline size_t get_level_record_count(level_index idx) {
-        return (m_levels[idx]) ? m_levels[idx]->get_record_count() : 0;
+    if constexpr (L == LayoutPolicy::LEVELING) {
+      return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap;
+    } else if constexpr (L == LayoutPolicy::BSM) {
+      return state[idx].reccnt == 0;
+    } else {
+      return state[idx].shardcnt < state[idx].shardcap;
     }
 
-    /*
-     * Determines if a level can sustain a reconstruction with incoming_rec_cnt
-     * additional records without exceeding its capacity.
-     */
-    inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt, state_vector &state) {
-        if (idx >= state.size()) {
-            return false;
-        }
-
-        if constexpr (L == LayoutPolicy::LEVELING) {
-            return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap; 
-        } else if constexpr (L == LayoutPolicy::BSM) {
-            return state[idx].reccnt == 0;
-        } else { 
-            return state[idx].shardcnt < state[idx].shardcap;
-        }
-
-        /* unreachable */
-        assert(true);
-    }
+    /* unreachable */
+    assert(true);
+  }
 };
 
-}
-
+} // namespace de
diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h
index b962dcc..a4cf94d 100644
--- a/include/framework/structure/InternalLevel.h
+++ b/include/framework/structure/InternalLevel.h
@@ -1,8 +1,8 @@
 /*
  * include/framework/structure/InternalLevel.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -15,276 +15,281 @@
  */
 #pragma once
 
-#include <vector>
 #include <memory>
+#include <vector>
 
-#include "util/types.h"
-#include "framework/interface/Shard.h"
 #include "framework/interface/Query.h"
 #include "framework/interface/Record.h"
+#include "framework/interface/Shard.h"
 #include "framework/structure/BufferView.h"
+#include "util/types.h"
 
 namespace de {
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType>
 class InternalLevel;
 
-
-
-template <RecordInterface R, ShardInterface<R> S, QueryInterface<R, S> Q>
+template <ShardInterface ShardType, QueryInterface<ShardType> QueryType>
 class InternalLevel {
-    typedef S Shard;
-    typedef BufferView<R> BuffView;
-public:
-    InternalLevel(ssize_t level_no, size_t shard_cap)
-    : m_level_no(level_no)
-    , m_shard_cnt(0)
-    , m_shards(shard_cap, nullptr)
-    , m_pending_shard(nullptr)
-    {}
-
-    ~InternalLevel() { 
-        delete m_pending_shard;
-    }
+  typedef typename ShardType::RECORD RecordType;
+  typedef BufferView<RecordType> BuffView;
 
-    /*
-     * Create a new shard combining the records from base_level and new_level,
-     * and return a shared_ptr to a new level containing this shard. This is used
-     * for reconstructions under the leveling layout policy.
-     *
-     * No changes are made to the levels provided as arguments.
-     */
-    static std::shared_ptr<InternalLevel> reconstruction(InternalLevel* base_level, InternalLevel* new_level) {
-        assert(base_level->m_level_no > new_level->m_level_no || (base_level->m_level_no == 0 && new_level->m_level_no == 0));
-        auto res = new InternalLevel(base_level->m_level_no, 1);
-        res->m_shard_cnt = 1;
-        std::vector<Shard *> shards = {base_level->m_shards[0].get(),
+public:
+  InternalLevel(ssize_t level_no, size_t shard_cap)
+      : m_level_no(level_no), m_shard_cnt(0), m_shards(shard_cap, nullptr),
+        m_pending_shard(nullptr) {}
+
+  ~InternalLevel() { delete m_pending_shard; }
+
+  /*
+   * Create a new shard combining the records from base_level and new_level,
+   * and return a shared_ptr to a new level containing this shard. This is used
+   * for reconstructions under the leveling layout policy.
+   *
+   * No changes are made to the levels provided as arguments.
+   */
+  static std::shared_ptr<InternalLevel>
+  reconstruction(InternalLevel *base_level, InternalLevel *new_level) {
+    assert(base_level->m_level_no > new_level->m_level_no ||
+           (base_level->m_level_no == 0 && new_level->m_level_no == 0));
+    auto res = new InternalLevel(base_level->m_level_no, 1);
+    res->m_shard_cnt = 1;
+    std::vector<ShardType *> shards = {base_level->m_shards[0].get(),
                                        new_level->m_shards[0].get()};
 
-        res->m_shards[0] = std::make_shared<S>(shards);
-        return std::shared_ptr<InternalLevel>(res);
+    res->m_shards[0] = std::make_shared<ShardType>(shards);
+    return std::shared_ptr<InternalLevel>(res);
+  }
+
+  static std::shared_ptr<InternalLevel>
+  reconstruction(std::vector<InternalLevel *> levels, size_t level_idx) {
+    std::vector<ShardType *> shards;
+    for (auto level : levels) {
+      for (auto shard : level->m_shards) {
+        if (shard)
+          shards.emplace_back(shard.get());
+      }
     }
 
-    static std::shared_ptr<InternalLevel> reconstruction(std::vector<InternalLevel*> levels, size_t level_idx) {
-        std::vector<Shard *> shards; 
-        for (auto level : levels) {
-            for (auto shard : level->m_shards) {
-                if (shard) shards.emplace_back(shard.get());
-            }
-        }
-
-        auto res = new InternalLevel(level_idx, 1);
-        res->m_shard_cnt = 1;
-        res->m_shards[0] = std::make_shared<S>(shards);
+    auto res = new InternalLevel(level_idx, 1);
+    res->m_shard_cnt = 1;
+    res->m_shards[0] = std::make_shared<ShardType>(shards);
+
+    return std::shared_ptr<InternalLevel>(res);
+  }
+
+  /*
+   * Create a new shard combining the records from all of
+   * the shards in level, and append this new shard into
+   * this level. This is used for reconstructions under
+   * the tiering layout policy.
+   *
+   * No changes are made to the level provided as an argument.
+   */
+  void append_level(InternalLevel *level) {
+    // FIXME: that this is happening probably means that
+    // something is going terribly wrong earlier in the
+    // reconstruction logic.
+    if (level->get_shard_count() == 0) {
+      return;
+    }
 
-        return std::shared_ptr<InternalLevel>(res);
+    std::vector<ShardType *> shards;
+    for (auto shard : level->m_shards) {
+      if (shard)
+        shards.emplace_back(shard.get());
     }
 
-    /*
-     * Create a new shard combining the records from all of
-     * the shards in level, and append this new shard into
-     * this level. This is used for reconstructions under
-     * the tiering layout policy.
-     *
-     * No changes are made to the level provided as an argument.
-     */
-    void append_level(InternalLevel* level) {
-        // FIXME: that this is happening probably means that
-        // something is going terribly wrong earlier in the
-        // reconstruction logic.
-        if (level->get_shard_count() == 0) {
-            return;
-        }
+    if (m_shard_cnt == m_shards.size()) {
+      m_pending_shard = new ShardType(shards);
+      return;
+    }
 
-        std::vector<S*> shards;
-        for (auto shard : level->m_shards) {
-            if (shard) shards.emplace_back(shard.get());
-        }
+    auto tmp = new ShardType(shards);
+    m_shards[m_shard_cnt] = std::shared_ptr<ShardType>(tmp);
+
+    ++m_shard_cnt;
+  }
+
+  /*
+   * Create a new shard using the records in the
+   * provided buffer, and append this new shard
+   * into this level. This is used for buffer
+   * flushes under the tiering layout policy.
+   */
+  void append_buffer(BuffView buffer) {
+    if (m_shard_cnt == m_shards.size()) {
+      assert(m_pending_shard == nullptr);
+      m_pending_shard = new ShardType(std::move(buffer));
+      return;
+    }
 
-        if (m_shard_cnt == m_shards.size()) {
-            m_pending_shard = new S(shards);
-            return;
-        }
+    m_shards[m_shard_cnt] = std::make_shared<ShardType>(std::move(buffer));
+    ++m_shard_cnt;
+  }
 
-        auto tmp = new S(shards);
-        m_shards[m_shard_cnt] = std::shared_ptr<S>(tmp);
+  void finalize() {
+    if (m_pending_shard) {
+      for (size_t i = 0; i < m_shards.size(); i++) {
+        m_shards[i] = nullptr;
+      }
 
-        ++m_shard_cnt;
+      m_shards[0] = std::shared_ptr<ShardType>(m_pending_shard);
+      m_pending_shard = nullptr;
+      m_shard_cnt = 1;
     }
-
-    /*
-     * Create a new shard using the records in the
-     * provided buffer, and append this new shard
-     * into this level. This is used for buffer
-     * flushes under the tiering layout policy.
-     */
-    void append_buffer(BuffView buffer) {
-        if (m_shard_cnt == m_shards.size()) {
-            assert(m_pending_shard == nullptr);
-            m_pending_shard = new S(std::move(buffer));
-            return;
-        }
-
-        m_shards[m_shard_cnt] = std::make_shared<S>(std::move(buffer));
-        ++m_shard_cnt;
+  }
+
+  /*
+   * Create a new shard containing the combined records
+   * from all shards on this level and return it.
+   *
+   * No changes are made to this level.
+   */
+  ShardType *get_combined_shard() {
+    if (m_shard_cnt == 0) {
+      return nullptr;
     }
 
-    void finalize() {
-        if (m_pending_shard) {
-            for (size_t i=0; i<m_shards.size(); i++) {
-                m_shards[i] = nullptr;
-            }
-
-            m_shards[0] = std::shared_ptr<S>(m_pending_shard);
-            m_pending_shard = nullptr;
-            m_shard_cnt = 1;
-        }
+    std::vector<ShardType *> shards;
+    for (auto shard : m_shards) {
+      if (shard)
+        shards.emplace_back(shard.get());
     }
 
-    /*
-     * Create a new shard containing the combined records
-     * from all shards on this level and return it.
-     *
-     * No changes are made to this level.
-     */
-    Shard *get_combined_shard() {
-        if (m_shard_cnt == 0) {
-            return nullptr;
-        }
-
-        std::vector<Shard *> shards;
-        for (auto shard : m_shards) {
-            if (shard) shards.emplace_back(shard.get());
-        }
-
-        return new S(shards);
+    return new ShardType(shards);
+  }
+
+  void get_local_queries(
+      std::vector<std::pair<ShardID, ShardType *>> &shards,
+      std::vector<typename QueryType::LocalQuery *> &local_queries,
+      typename QueryType::Parameters *query_parms) {
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      if (m_shards[i]) {
+        auto local_query =
+            QueryType::local_preproc(m_shards[i].get(), query_parms);
+        shards.push_back({{m_level_no, (ssize_t)i}, m_shards[i].get()});
+        local_queries.emplace_back(local_query);
+      }
     }
+  }
 
-    void get_query_states(std::vector<std::pair<ShardID, Shard *>> &shards, std::vector<void*>& shard_states, void *query_parms) {
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            if (m_shards[i]) {
-                auto shard_state = Q::get_query_state(m_shards[i].get(), query_parms);
-                shards.push_back({{m_level_no, (ssize_t) i}, m_shards[i].get()});
-                shard_states.emplace_back(shard_state);
-            }
+  bool check_tombstone(size_t shard_stop, const RecordType &rec) {
+    if (m_shard_cnt == 0)
+      return false;
+
+    for (int i = m_shard_cnt - 1; i >= (ssize_t)shard_stop; i--) {
+      if (m_shards[i]) {
+        auto res = m_shards[i]->point_lookup(rec, true);
+        if (res && res->is_tombstone()) {
+          return true;
         }
+      }
     }
-
-    bool check_tombstone(size_t shard_stop, const R& rec) {
-        if (m_shard_cnt == 0) return false;
-
-        for (int i = m_shard_cnt - 1; i >= (ssize_t) shard_stop;  i--) {
-            if (m_shards[i]) {
-                auto res = m_shards[i]->point_lookup(rec, true);
-                if (res && res->is_tombstone()) {
-                    return true;
-                }
-            }
+    return false;
+  }
+
+  bool delete_record(const RecordType &rec) {
+    if (m_shard_cnt == 0)
+      return false;
+
+    for (size_t i = 0; i < m_shards.size(); ++i) {
+      if (m_shards[i]) {
+        auto res = m_shards[i]->point_lookup(rec);
+        if (res) {
+          res->set_delete();
+          return true;
         }
-        return false;
+      }
     }
 
-    bool delete_record(const R &rec) {
-        if (m_shard_cnt == 0) return false;
-
-        for (size_t i = 0; i < m_shards.size();  ++i) {
-            if (m_shards[i]) {
-                auto res = m_shards[i]->point_lookup(rec);
-                if (res) {
-                    res->set_delete();
-                    return true;
-                }
-            }
-        }
+    return false;
+  }
 
-        return false;
+  ShardType *get_shard(size_t idx) {
+    if (idx >= m_shard_cnt) {
+      return nullptr;
     }
 
-    Shard* get_shard(size_t idx) {
-        if (idx >= m_shard_cnt) {
-            return nullptr;
-        }
+    return m_shards[idx].get();
+  }
 
-        return m_shards[idx].get();
-    }
+  size_t get_shard_count() { return m_shard_cnt; }
 
-    size_t get_shard_count() {
-        return m_shard_cnt;
+  size_t get_record_count() {
+    size_t cnt = 0;
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      if (m_shards[i]) {
+        cnt += m_shards[i]->get_record_count();
+      }
     }
 
-    size_t get_record_count() {
-        size_t cnt = 0;
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            if (m_shards[i]) {
-                cnt += m_shards[i]->get_record_count();
-            }
-        }
+    return cnt;
+  }
 
-        return cnt;
+  size_t get_tombstone_count() {
+    size_t res = 0;
+    for (size_t i = 0; i < m_shard_cnt; ++i) {
+      if (m_shards[i]) {
+        res += m_shards[i]->get_tombstone_count();
+      }
     }
-    
-    size_t get_tombstone_count() {
-        size_t res = 0;
-        for (size_t i = 0; i < m_shard_cnt; ++i) {
-            if (m_shards[i]) {
-                res += m_shards[i]->get_tombstone_count();
-            }
-        }
-        return res;
+    return res;
+  }
+
+  size_t get_aux_memory_usage() {
+    size_t cnt = 0;
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      if (m_shards[i]) {
+        cnt += m_shards[i]->get_aux_memory_usage();
+      }
     }
 
-    size_t get_aux_memory_usage() {
-        size_t cnt = 0;
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            if (m_shards[i]){
-                cnt += m_shards[i]->get_aux_memory_usage();
-            }
-        }
+    return cnt;
+  }
 
-        return cnt;
+  size_t get_memory_usage() {
+    size_t cnt = 0;
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      if (m_shards[i]) {
+        cnt += m_shards[i]->get_memory_usage();
+      }
     }
 
-    size_t get_memory_usage() {
-        size_t cnt = 0;
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            if (m_shards[i]) {
-                cnt += m_shards[i]->get_memory_usage();
-            }
-        }
-
-        return cnt;
+    return cnt;
+  }
+
+  double get_tombstone_prop() {
+    size_t tscnt = 0;
+    size_t reccnt = 0;
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      if (m_shards[i]) {
+        tscnt += m_shards[i]->get_tombstone_count();
+        reccnt += m_shards[i]->get_record_count();
+      }
     }
 
-    double get_tombstone_prop() {
-        size_t tscnt = 0;
-        size_t reccnt = 0;
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            if (m_shards[i]) {
-                tscnt += m_shards[i]->get_tombstone_count();
-                reccnt += m_shards[i]->get_record_count();
-            }
-        }
+    return (double)tscnt / (double)(tscnt + reccnt);
+  }
 
-        return (double) tscnt / (double) (tscnt + reccnt);
+  std::shared_ptr<InternalLevel> clone() {
+    auto new_level =
+        std::make_shared<InternalLevel>(m_level_no, m_shards.size());
+    for (size_t i = 0; i < m_shard_cnt; i++) {
+      new_level->m_shards[i] = m_shards[i];
     }
+    new_level->m_shard_cnt = m_shard_cnt;
 
-    std::shared_ptr<InternalLevel> clone() {
-        auto new_level = std::make_shared<InternalLevel>(m_level_no, m_shards.size());
-        for (size_t i=0; i<m_shard_cnt; i++) {
-            new_level->m_shards[i] = m_shards[i];
-        }
-        new_level->m_shard_cnt = m_shard_cnt;
-
-        return new_level;
-    }
+    return new_level;
+  }
 
 private:
-    ssize_t m_level_no;
-    
-    size_t m_shard_cnt;
-    size_t m_shard_size_cap;
+  ssize_t m_level_no;
+
+  size_t m_shard_cnt;
+  size_t m_shard_size_cap;
 
-    std::vector<std::shared_ptr<Shard>> m_shards;
-    Shard *m_pending_shard;
+  std::vector<std::shared_ptr<ShardType>> m_shards;
+  ShardType *m_pending_shard;
 };
 
-}
+} // namespace de
diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h
index 7db3980..625b04b 100644
--- a/include/framework/structure/MutableBuffer.h
+++ b/include/framework/structure/MutableBuffer.h
@@ -1,8 +1,8 @@
 /*
  * include/framework/structure/MutableBuffer.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -18,301 +18,281 @@
  */
 #pragma once
 
-#include <cstdlib>
 #include <atomic>
 #include <cassert>
+#include <cstdlib>
 #include <immintrin.h>
 
-#include "psu-util/alignment.h"
-#include "util/bf_config.h"
-#include "psu-ds/BloomFilter.h"
 #include "framework/interface/Record.h"
 #include "framework/structure/BufferView.h"
-
-using psudb::CACHELINE_SIZE;
+#include "psu-ds/BloomFilter.h"
+#include "psu-util/alignment.h"
+#include "util/bf_config.h"
 
 namespace de {
 
-template <RecordInterface R>
-class MutableBuffer {
-    friend class BufferView<R>;
+template <RecordInterface R> class MutableBuffer {
+  friend class BufferView<R>;
 
-    struct buffer_head {
-        size_t head_idx;
-        size_t refcnt;
-    };
-    
-public:
-    MutableBuffer(size_t low_watermark, size_t high_watermark, size_t capacity=0) 
-        : m_lwm(low_watermark)
-        , m_hwm(high_watermark)
-        , m_cap((capacity == 0) ? 2 * high_watermark : capacity)
-        , m_tail(0)
-        , m_head({0, 0})
-        , m_old_head({high_watermark, 0})
-        //, m_data((Wrapped<R> *) psudb::sf_aligned_alloc(CACHELINE_SIZE, m_cap * sizeof(Wrapped<R>)))
-        , m_data(new Wrapped<R>[m_cap]())
-        , m_tombstone_filter(new psudb::BloomFilter<R>(BF_FPR, m_hwm, BF_HASH_FUNCS))
-        , m_tscnt(0)
-        , m_old_tscnt(0)
-        , m_active_head_advance(false) 
-    {
-        assert(m_cap > m_hwm);
-        assert(m_hwm >= m_lwm);
-    }
+  struct buffer_head {
+    size_t head_idx;
+    size_t refcnt;
+  };
 
-    ~MutableBuffer() {
-        delete[] m_data;
-        delete m_tombstone_filter;
+public:
+  MutableBuffer(size_t low_watermark, size_t high_watermark,
+                size_t capacity = 0)
+      : m_lwm(low_watermark), m_hwm(high_watermark),
+        m_cap((capacity == 0) ? 2 * high_watermark : capacity), m_tail(0),
+        m_head({0, 0}), m_old_head({high_watermark, 0}),
+        m_data(new Wrapped<R>[m_cap]()),
+        m_tombstone_filter(
+            new psudb::BloomFilter<R>(BF_FPR, m_hwm, BF_HASH_FUNCS)),
+        m_tscnt(0), m_old_tscnt(0), m_active_head_advance(false) {
+    assert(m_cap > m_hwm);
+    assert(m_hwm >= m_lwm);
+  }
+
+  ~MutableBuffer() {
+    delete[] m_data;
+    delete m_tombstone_filter;
+  }
+
+  int append(const R &rec, bool tombstone = false) {
+    int32_t tail = 0;
+    if ((tail = try_advance_tail()) == -1) {
+      return 0;
     }
 
-    int append(const R &rec, bool tombstone=false) {
-        int32_t tail = 0;
-        if ((tail = try_advance_tail()) == -1) {
-            return 0;
-        }
-
-        Wrapped<R> wrec;
-        wrec.rec = rec;
-        wrec.header = 0;
-        if (tombstone) wrec.set_tombstone();
+    Wrapped<R> wrec;
+    wrec.rec = rec;
+    wrec.header = 0;
+    if (tombstone)
+      wrec.set_tombstone();
 
-        // FIXME: because of the mod, it isn't correct to use `pos`
-        //        as the ordering timestamp in the header anymore. 
-        size_t pos = tail % m_cap;
-
-        m_data[pos] = wrec;
-        m_data[pos].set_timestamp(pos);
-
-        if (tombstone) {
-            m_tscnt.fetch_add(1);
-            if (m_tombstone_filter) m_tombstone_filter->insert(rec);
-        }
+    // FIXME: because of the mod, it isn't correct to use `pos`
+    //        as the ordering timestamp in the header anymore.
+    size_t pos = tail % m_cap;
 
-        m_data[pos].set_visible();
+    m_data[pos] = wrec;
+    m_data[pos].set_timestamp(pos);
 
-        return 1;     
+    if (tombstone) {
+      m_tscnt.fetch_add(1);
+      if (m_tombstone_filter)
+        m_tombstone_filter->insert(rec);
     }
 
-    bool truncate() {
-        m_tscnt.store(0);
-        m_tail.store(0);
-        if (m_tombstone_filter) m_tombstone_filter->clear();
+    m_data[pos].set_visible();
 
-        return true;
-    }
+    return 1;
+  }
 
-    size_t get_record_count() {
-        return m_tail.load() - m_head.load().head_idx;
-    }
-    
-    size_t get_capacity() {
-        return m_cap;
-    }
+  bool truncate() {
+    m_tscnt.store(0);
+    m_tail.store(0);
+    if (m_tombstone_filter)
+      m_tombstone_filter->clear();
 
-    bool is_full() {
-        return get_record_count() >= m_hwm;
-    }
+    return true;
+  }
 
-    bool is_at_low_watermark() {
-        return get_record_count() >= m_lwm;
-    }
+  size_t get_record_count() { return m_tail.load() - m_head.load().head_idx; }
 
-    size_t get_tombstone_count() {
-        return m_tscnt.load();
-    }
+  size_t get_capacity() { return m_cap; }
 
-    bool delete_record(const R& rec) {
-        return get_buffer_view().delete_record(rec);
-   }
+  bool is_full() { return get_record_count() >= m_hwm; }
 
-    bool check_tombstone(const R& rec) {
-        return get_buffer_view().check_tombstone(rec);
-    }
+  bool is_at_low_watermark() { return get_record_count() >= m_lwm; }
 
-    size_t get_memory_usage() {
-        return m_cap * sizeof(Wrapped<R>);
-    }
+  size_t get_tombstone_count() { return m_tscnt.load(); }
 
-    size_t get_aux_memory_usage() {
-        return m_tombstone_filter->get_memory_usage();
-    }
+  bool delete_record(const R &rec) {
+    return get_buffer_view().delete_record(rec);
+  }
 
-    BufferView<R> get_buffer_view(size_t target_head) {
-        size_t head = get_head(target_head);
-        auto f = std::bind(release_head_reference, (void *) this, head);
+  bool check_tombstone(const R &rec) {
+    return get_buffer_view().check_tombstone(rec);
+  }
 
-        return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f);
-    }
+  size_t get_memory_usage() { return m_cap * sizeof(Wrapped<R>); }
 
-    BufferView<R> get_buffer_view() {
-        size_t head = get_head(m_head.load().head_idx);
-        auto f = std::bind(release_head_reference, (void *) this, head);
+  size_t get_aux_memory_usage() {
+    return m_tombstone_filter->get_memory_usage();
+  }
 
-        return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(), m_tombstone_filter, f);
-    }
+  BufferView<R> get_buffer_view(size_t target_head) {
+    size_t head = get_head(target_head);
+    auto f = std::bind(release_head_reference, (void *)this, head);
 
-    /*
-     * Advance the buffer following a reconstruction. Move current
-     * head and head_refcnt into old_head and old_head_refcnt, then
-     * assign new_head to old_head.
-     */
-    bool advance_head(size_t new_head) {
-        assert(new_head > m_head.load().head_idx);
-        assert(new_head <= m_tail.load());
-
-        /* refuse to advance head while there is an old with one references */
-        if (m_old_head.load().refcnt > 0) {
-            //fprintf(stderr, "[W]: Refusing to advance head due to remaining reference counts\n");
-            return false;
-        }
-
-        m_active_head_advance.store(true);
+    return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(),
+                         m_tombstone_filter, f);
+  }
 
-        buffer_head new_hd = {new_head, 0};
-        buffer_head cur_hd;
+  BufferView<R> get_buffer_view() {
+    size_t head = get_head(m_head.load().head_idx);
+    auto f = std::bind(release_head_reference, (void *)this, head);
 
-        /* replace current head with new head */
-        do {
-            cur_hd = m_head.load();
-        } while(!m_head.compare_exchange_strong(cur_hd, new_hd));
+    return BufferView<R>(m_data, m_cap, head, m_tail.load(), m_tscnt.load(),
+                         m_tombstone_filter, f);
+  }
 
-        /* move the current head into the old head */
-        m_old_head.store(cur_hd);
+  /*
+   * Advance the buffer following a reconstruction. Move current
+   * head and head_refcnt into old_head and old_head_refcnt, then
+   * assign new_head to old_head.
+   */
+  bool advance_head(size_t new_head) {
+    assert(new_head > m_head.load().head_idx);
+    assert(new_head <= m_tail.load());
 
-        m_active_head_advance.store(false);
-        return true;
+    /* refuse to advance head while there is an old with one references */
+    if (m_old_head.load().refcnt > 0) {
+      // fprintf(stderr, "[W]: Refusing to advance head due to remaining
+      // reference counts\n");
+      return false;
     }
 
-    /*
-     * FIXME: If target_head does not match *either* the old_head or the
-     * current_head, this routine will loop infinitely.
-     */
-    size_t get_head(size_t target_head) {
-        buffer_head cur_hd, new_hd;
-        bool head_acquired = false;
-
-        do {
-            if (m_old_head.load().head_idx == target_head) {
-                cur_hd = m_old_head.load();
-                cur_hd.head_idx = target_head;
-                new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1};
-                head_acquired = m_old_head.compare_exchange_strong(cur_hd, new_hd);
-            } else if (m_head.load().head_idx == target_head){
-                cur_hd = m_head.load();
-                cur_hd.head_idx = target_head;
-                new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1};
-                head_acquired = m_head.compare_exchange_strong(cur_hd, new_hd);
-            }
-        } while(!head_acquired);
-
-        return new_hd.head_idx;
+    m_active_head_advance.store(true);
+
+    buffer_head new_hd = {new_head, 0};
+    buffer_head cur_hd;
+
+    /* replace current head with new head */
+    do {
+      cur_hd = m_head.load();
+    } while (!m_head.compare_exchange_strong(cur_hd, new_hd));
+
+    /* move the current head into the old head */
+    m_old_head.store(cur_hd);
+
+    m_active_head_advance.store(false);
+    return true;
+  }
+
+  /*
+   * FIXME: If target_head does not match *either* the old_head or the
+   * current_head, this routine will loop infinitely.
+   */
+  size_t get_head(size_t target_head) {
+    buffer_head cur_hd, new_hd;
+    bool head_acquired = false;
+
+    do {
+      if (m_old_head.load().head_idx == target_head) {
+        cur_hd = m_old_head.load();
+        cur_hd.head_idx = target_head;
+        new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1};
+        head_acquired = m_old_head.compare_exchange_strong(cur_hd, new_hd);
+      } else if (m_head.load().head_idx == target_head) {
+        cur_hd = m_head.load();
+        cur_hd.head_idx = target_head;
+        new_hd = {cur_hd.head_idx, cur_hd.refcnt + 1};
+        head_acquired = m_head.compare_exchange_strong(cur_hd, new_hd);
+      }
+    } while (!head_acquired);
+
+    return new_hd.head_idx;
+  }
+
+  void set_low_watermark(size_t lwm) {
+    assert(lwm < m_hwm);
+    m_lwm = lwm;
+  }
+
+  size_t get_low_watermark() { return m_lwm; }
+
+  void set_high_watermark(size_t hwm) {
+    assert(hwm > m_lwm);
+    assert(hwm < m_cap);
+    m_hwm = hwm;
+  }
+
+  size_t get_high_watermark() { return m_hwm; }
+
+  size_t get_tail() { return m_tail.load(); }
+
+  /*
+   * Note: this returns the available physical storage capacity,
+   * *not* now many more records can be inserted before the
+   * HWM is reached. It considers the old_head to be "free"
+   * when it has no remaining references. This should be true,
+   * but a buggy framework implementation may violate the
+   * assumption.
+   */
+  size_t get_available_capacity() {
+    if (m_old_head.load().refcnt == 0) {
+      return m_cap - (m_tail.load() - m_head.load().head_idx);
     }
 
-    void set_low_watermark(size_t lwm) {
-        assert(lwm < m_hwm);
-        m_lwm = lwm;
-    }
+    return m_cap - (m_tail.load() - m_old_head.load().head_idx);
+  }
 
-    size_t get_low_watermark() {
-        return m_lwm;
-    }
+private:
+  int64_t try_advance_tail() {
+    size_t old_value = m_tail.load();
 
-    void set_high_watermark(size_t hwm) {
-        assert(hwm > m_lwm);
-        assert(hwm < m_cap);
-        m_hwm = hwm;
+    /* if full, fail to advance the tail */
+    if (old_value - m_head.load().head_idx >= m_hwm) {
+      return -1;
     }
 
-    size_t get_high_watermark() {
-        return m_hwm;
-    }
+    while (!m_tail.compare_exchange_strong(old_value, old_value + 1)) {
+      /* if full, stop trying and fail to advance the tail */
+      if (m_tail.load() >= m_hwm) {
+        return -1;
+      }
 
-    size_t get_tail() {
-        return m_tail.load();
+      _mm_pause();
     }
 
-    /*
-     * Note: this returns the available physical storage capacity,
-     * *not* now many more records can be inserted before the
-     * HWM is reached. It considers the old_head to be "free"
-     * when it has no remaining references. This should be true,
-     * but a buggy framework implementation may violate the
-     * assumption.
-     */
-    size_t get_available_capacity() {
-        if (m_old_head.load().refcnt == 0) {
-            return m_cap - (m_tail.load() - m_head.load().head_idx);
-        }
+    return old_value;
+  }
 
-        return m_cap - (m_tail.load() - m_old_head.load().head_idx);
-    }
+  size_t to_idx(size_t i, size_t head) { return (head + i) % m_cap; }
 
-private:
-    int64_t try_advance_tail() {
-        size_t old_value = m_tail.load();
+  static void release_head_reference(void *buff, size_t head) {
+    MutableBuffer<R> *buffer = (MutableBuffer<R> *)buff;
 
-        /* if full, fail to advance the tail */
-        if (old_value - m_head.load().head_idx >= m_hwm) {
-            return -1;
+    buffer_head cur_hd, new_hd;
+    do {
+      if (buffer->m_old_head.load().head_idx == head) {
+        cur_hd = buffer->m_old_head;
+        if (cur_hd.refcnt == 0)
+          continue;
+        new_hd = {cur_hd.head_idx, cur_hd.refcnt - 1};
+        if (buffer->m_old_head.compare_exchange_strong(cur_hd, new_hd)) {
+          break;
         }
-
-        while (!m_tail.compare_exchange_strong(old_value, old_value+1)) {
-            /* if full, stop trying and fail to advance the tail */
-            if (m_tail.load() >= m_hwm) {
-                return -1;
-            }
-
-            _mm_pause();
+      } else {
+        cur_hd = buffer->m_head;
+        if (cur_hd.refcnt == 0)
+          continue;
+        new_hd = {cur_hd.head_idx, cur_hd.refcnt - 1};
+
+        if (buffer->m_head.compare_exchange_strong(cur_hd, new_hd)) {
+          break;
         }
+      }
+      _mm_pause();
+    } while (true);
+  }
 
-        return old_value;
-    }
+  size_t m_lwm;
+  size_t m_hwm;
+  size_t m_cap;
 
-    size_t to_idx(size_t i, size_t head) {
-        return (head + i) % m_cap;
-    }
+  alignas(64) std::atomic<size_t> m_tail;
 
-    static void release_head_reference(void *buff, size_t head) {
-        MutableBuffer<R> *buffer = (MutableBuffer<R> *) buff;
-
-        buffer_head cur_hd, new_hd;
-        do {
-            if (buffer->m_old_head.load().head_idx == head) {
-                cur_hd = buffer->m_old_head;
-                if (cur_hd.refcnt == 0) continue;
-                new_hd = {cur_hd.head_idx, cur_hd.refcnt-1};
-                if (buffer->m_old_head.compare_exchange_strong(cur_hd, new_hd)) {
-                    break;
-                }
-            } else {
-                cur_hd = buffer->m_head;
-                if (cur_hd.refcnt == 0) continue;
-                new_hd = {cur_hd.head_idx, cur_hd.refcnt-1};
-
-                if (buffer->m_head.compare_exchange_strong(cur_hd, new_hd)) {
-                    break;
-                }
-            } 
-            _mm_pause();
-        } while(true);
-    }
+  alignas(64) std::atomic<buffer_head> m_head;
+  alignas(64) std::atomic<buffer_head> m_old_head;
+
+  Wrapped<R> *m_data;
+  psudb::BloomFilter<R> *m_tombstone_filter;
+  alignas(64) std::atomic<size_t> m_tscnt;
+  size_t m_old_tscnt;
 
-    size_t m_lwm;
-    size_t m_hwm;
-    size_t m_cap;
-    
-    alignas(64) std::atomic<size_t> m_tail;
-
-    alignas(64) std::atomic<buffer_head> m_head;
-    alignas(64) std::atomic<buffer_head> m_old_head;
-    
-    Wrapped<R>* m_data;
-    psudb::BloomFilter<R>* m_tombstone_filter;
-    alignas(64) std::atomic<size_t> m_tscnt;
-    size_t m_old_tscnt;
-
-    alignas(64) std::atomic<bool> m_active_head_advance;
+  alignas(64) std::atomic<bool> m_active_head_advance;
 };
 
-}
+} // namespace de
diff --git a/include/framework/util/Configuration.h b/include/framework/util/Configuration.h
index 4a4524a..f4b0364 100644
--- a/include/framework/util/Configuration.h
+++ b/include/framework/util/Configuration.h
@@ -1,7 +1,7 @@
 /*
  * include/framework/util/Configuration.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -13,35 +13,8 @@
 
 namespace de {
 
-static thread_local size_t sampling_attempts = 0;
-static thread_local size_t sampling_rejections = 0;
-static thread_local size_t deletion_rejections = 0;
-static thread_local size_t bounds_rejections = 0;
-static thread_local size_t tombstone_rejections = 0;
-static thread_local size_t buffer_rejections = 0;
+enum class LayoutPolicy { LEVELING, TEIRING, BSM };
 
-/*
- * thread_local size_t various_sampling_times go here.
- */
-static thread_local size_t sample_range_time = 0;
-static thread_local size_t alias_time = 0;
-static thread_local size_t alias_query_time = 0;
-static thread_local size_t rejection_check_time = 0;
-static thread_local size_t buffer_sample_time = 0;
-static thread_local size_t memlevel_sample_time = 0;
-static thread_local size_t disklevel_sample_time = 0;
-static thread_local size_t sampling_bailouts = 0;
-
-
-enum class LayoutPolicy {
-    LEVELING,
-    TEIRING,
-    BSM
-};
-
-enum class DeletePolicy {
-    TOMBSTONE,
-    TAGGING
-};
+enum class DeletePolicy { TOMBSTONE, TAGGING };
 
-}
+} // namespace de
diff --git a/include/query/irs.h b/include/query/irs.h
index 879d070..6dec850 100644
--- a/include/query/irs.h
+++ b/include/query/irs.h
@@ -1,12 +1,12 @@
 /*
  * include/query/irs.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
- * A query class for independent range sampling. This query requires 
- * that the shard support get_lower_bound(key), get_upper_bound(key), 
+ * A query class for independent range sampling. This query requires
+ * that the shard support get_lower_bound(key), get_upper_bound(key),
  * and get_record_at(index).
  */
 #pragma once
@@ -14,237 +14,227 @@
 #include "framework/QueryRequirements.h"
 #include "psu-ds/Alias.h"
 
-namespace de { namespace irs {
+namespace de {
+namespace irs {
 
-template <RecordInterface R>
-struct Parms {
+template <ShardInterface S, bool REJECTION = true> class Query {
+  typedef typename S::RECORD R;
+
+public:
+  struct Parameters {
     decltype(R::key) lower_bound;
     decltype(R::key) upper_bound;
     size_t sample_size;
     gsl_rng *rng;
-};
+  };
 
-
-template <RecordInterface R>
-struct State {
-    size_t lower_bound;
-    size_t upper_bound;
-    size_t sample_size;
+  struct LocalQuery {
+    size_t lower_idx;
+    size_t upper_idx;
     size_t total_weight;
-};
+    size_t sample_size;
+    Parameters global_parms;
+  };
+
+  struct LocalQueryBuffer {
+    BufferView<R> *buffer;
 
-template <RecordInterface R>
-struct BufferState {
     size_t cutoff;
     std::vector<Wrapped<R>> records;
+    std::unique_ptr<psudb::Alias> alias;
     size_t sample_size;
-    BufferView<R> *buffer;
 
-    psudb::Alias *alias;
+    Parameters global_parms;
+  };
 
-    BufferState(BufferView<R> *buffer) : buffer(buffer) {}
-    ~BufferState() {
-        delete alias;
-    }
-};
+  typedef Wrapped<R> LocalResultType;
+  typedef R ResultType;
 
-template <RecordInterface R, ShardInterface<R> S, bool Rejection=true>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
+  constexpr static bool EARLY_ABORT = false;
+  constexpr static bool SKIP_DELETE_FILTER = false;
 
-    static void *get_query_state(S *shard, void *parms) {
-        auto res = new State<R>();
-        decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound;
-        decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound;
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
 
-        res->lower_bound = shard->get_lower_bound(lower_key);
-        res->upper_bound = shard->get_upper_bound(upper_key);
+    query->global_parms = *parms;
 
-        if (res->lower_bound == shard->get_record_count()) {
-            res->total_weight = 0;
-        } else {
-            res->total_weight = res->upper_bound - res->lower_bound;
-        }
+    query->lower_idx = shard->get_lower_bound(query->global_parms.lower_bound);
+    query->upper_idx = shard->get_upper_bound(query->global_parms.upper_bound);
 
-        res->sample_size = 0;
-        return res;
+    if (query->lower_idx == shard->get_record_count()) {
+      query->total_weight = 0;
+    } else {
+      query->total_weight = query->upper_idx - query->lower_idx; 
     }
 
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        auto res = new BufferState<R>(buffer);
-
-        res->cutoff = res->buffer->get_record_count();
-        res->sample_size = 0;
-        res->alias = nullptr;
+    query->sample_size = 0;
+    return query;
+  }
 
-        if constexpr (Rejection) {
-            return res;
-        }
-
-        auto lower_key = ((Parms<R> *) parms)->lower_bound;
-        auto upper_key = ((Parms<R> *) parms)->upper_bound;
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
+    query->buffer = buffer;
 
-        for (size_t i=0; i<res->cutoff; i++) {
-            if ((res->buffer->get(i)->rec.key >= lower_key) && (buffer->get(i)->rec.key <= upper_key)) { 
-                res->records.emplace_back(*(res->buffer->get(i)));
-            }
-        }
+    query->cutoff = query->buffer->get_record_count();
+    query->sample_size = 0;
+    query->alias = nullptr;
+    query->global_parms = *parms;
 
-        return res;
+    if constexpr (REJECTION) {
+      return query;
     }
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buffer_state) {
-        auto p = (Parms<R> *) query_parms;
-        auto bs = (buffer_state) ? (BufferState<R> *) buffer_state : nullptr;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
-        size_t buffer_sz = 0;
+    for (size_t i = 0; i < query->cutoff; i++) {
+      if ((query->buffer->get(i)->rec.key >= query->global_parms.lower_bound) &&
+          (buffer->get(i)->rec.key <= query->global_parms.upper_bound)) {
+        query->records.emplace_back(*(query->buffer->get(i)));
+      }
+    }
 
-        /* for simplicity of static structure testing */
-        if (!bs) {
-            assert(shard_states.size() == 1);
-            auto state = (State<R> *) shard_states[0];
-            state->sample_size = p->sample_size;
-            return;
-        }
+    return query;
+  }
 
-        /* we only need to build the shard alias on the first call */
-        if (bs->alias == nullptr) {
-            std::vector<size_t> weights;
-            if constexpr (Rejection) {
-                weights.push_back((bs) ? bs->cutoff : 0);
-            } else {
-                weights.push_back((bs) ? bs->records.size() : 0);
-            }
-
-            size_t total_weight = weights[0];
-            for (auto &s : shard_states) {
-                auto state = (State<R> *) s;
-                total_weight += state->total_weight;
-                weights.push_back(state->total_weight);
-            }
-
-            // if no valid records fall within the query range, just
-            // set all of the sample sizes to 0 and bail out.
-            if (total_weight == 0) {
-                for (size_t i=0; i<shard_states.size(); i++) {
-                    auto state = (State<R> *) shard_states[i];
-                    state->sample_size = 0;
-                }
-
-                return;
-            }
-
-            std::vector<double> normalized_weights;
-            for (auto w : weights) {
-                normalized_weights.push_back((double) w / (double) total_weight);
-            }
-            
-            bs->alias = new psudb::Alias(normalized_weights);
-        }
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
 
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = bs->alias->get(p->rng);            
-            if (idx == 0) {
-                buffer_sz++;
-            } else {
-                shard_sample_sizes[idx - 1]++;
-            }
-        }
+    std::vector<size_t> shard_sample_sizes(local_queries.size() + 1, 0);
+    size_t buffer_sz = 0;
 
-        if (bs) {
-            bs->sample_size = buffer_sz;
-        }
-        for (size_t i=0; i<shard_states.size(); i++) {
-            auto state = (State<R> *) shard_states[i];
-            state->sample_size = shard_sample_sizes[i+1];
-        }
+    /* for simplicity of static structure testing */
+    if (!buffer_query) {
+      assert(local_queries.size() == 1);
+      local_queries[0]->sample_size =
+          local_queries[0]->global_parms.sample_size;
+      return;
     }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { 
-        auto lower_key = ((Parms<R> *) parms)->lower_bound;
-        auto upper_key = ((Parms<R> *) parms)->upper_bound;
-        auto rng = ((Parms<R> *) parms)->rng;
-
-        auto state = (State<R> *) q_state;
-        auto sample_sz = state->sample_size;
-
-        std::vector<Wrapped<R>> result_set;
-
-        if (sample_sz == 0 || state->lower_bound == shard->get_record_count()) {
-            return result_set;
+    /* we only need to build the shard alias on the first call */
+    if (buffer_query->alias == nullptr) {
+      std::vector<size_t> weights;
+      if constexpr (REJECTION) {
+        weights.push_back(buffer_query->cutoff);
+      } else {
+        weights.push_back(buffer_query->records.size());
+      }
+
+      size_t total_weight = weights[0];
+      for (auto &q : local_queries) {
+        total_weight += q->total_weight;
+        weights.push_back(q->total_weight);
+      }
+
+      /*
+       * if no valid records fall within the query range,
+       * set all of the sample sizes to 0 and bail out.
+       */
+      if (total_weight == 0) {
+        for (auto q : local_queries) {
+          q->sample_size = 0;
         }
 
-        size_t attempts = 0;
-        size_t range_length = state->upper_bound - state->lower_bound;
-        do {
-            attempts++;
-            size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0;
-            result_set.emplace_back(*shard->get_record_at(state->lower_bound + idx));
-        } while (attempts < sample_sz);
+        return;
+      }
 
-        return result_set;
-    }
+      std::vector<double> normalized_weights;
+      for (auto w : weights) {
+        normalized_weights.push_back((double)w / (double)total_weight);
+      }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto st = (BufferState<R> *) state;
-        auto p = (Parms<R> *) parms;
+      buffer_query->alias = std::make_unique<psudb::Alias>(normalized_weights);
+    }
 
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
+    for (size_t i = 0; i < parms->sample_size; i++) {
+      auto idx = buffer_query->alias->get(parms->rng);
+      if (idx == 0) {
+        buffer_sz++;
+      } else {
+        shard_sample_sizes[idx - 1]++;
+      }
+    }
 
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = st->buffer->get(idx);
+    if (buffer_query) {
+      buffer_query->sample_size = buffer_sz;
+    }
 
-                if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                    result.emplace_back(*rec);
-                }
-            }
+    for (size_t i = 0; i < local_queries.size(); i++) {
+      local_queries[i]->sample_size = shard_sample_sizes[i];
+    }
+  }
 
-            return result;
-        }
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    auto sample_sz = query->sample_size;
 
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = gsl_rng_uniform_int(p->rng, st->records.size());
-            result.emplace_back(st->records[idx]);
-        }
+    std::vector<LocalResultType> result_set;
 
-        return result;
+    if (sample_sz == 0 || query->lower_idx == shard->get_record_count()) {
+      return result_set;
     }
 
-    static void merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
+    size_t attempts = 0;
+    size_t range_length = query->upper_idx - query->lower_idx;
+    do {
+      attempts++;
+      size_t idx =
+          (range_length > 0)
+              ? gsl_rng_uniform_int(query->global_parms.rng, range_length)
+              : 0;
+      result_set.emplace_back(*shard->get_record_at(query->lower_idx + idx));
+    } while (attempts < sample_sz);
+
+    return result_set;
+  }
+
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
+    std::vector<LocalResultType> result;
+    result.reserve(query->sample_size);
+
+    if constexpr (REJECTION) {
+      for (size_t i = 0; i < query->sample_size; i++) {
+        auto idx = gsl_rng_uniform_int(query->global_parms.rng, query->cutoff);
+        auto rec = query->buffer->get(idx);
+
+        if (rec->rec.key >= query->global_parms.lower_bound &&
+            rec->rec.key <= query->global_parms.upper_bound) {
+          result.emplace_back(*rec);
         }
-    }
+      }
 
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
+      return result;
     }
 
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
+    for (size_t i = 0; i < query->sample_size; i++) {
+      auto idx =
+          gsl_rng_uniform_int(query->global_parms.rng, query->records.size());
+      result.emplace_back(query->records[idx]);
     }
 
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        auto p = (Parms<R> *)  parms;
-
-        if (results.size() < p->sample_size) {
-            auto q = *p;
-            q.sample_size -= results.size();
-            process_query_states(&q, states, buffer_state);
-            return true;
-        }
+    return result;
+  }
 
-        return false;
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+    for (size_t i = 0; i < local_results.size(); i++) {
+      for (size_t j = 0; j < local_results[i].size(); j++) {
+        output.emplace_back(local_results[i][j].rec);
+      }
     }
+  }
+
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    if (output.size() < parms->sample_size) {
+      parms->sample_size -= output.size();
+      distribute_query(parms, local_queries, buffer_query);
+      return true;
+    }
+
+    return false;
+  }
 };
-}}
+} // namespace irs
+} // namespace de
diff --git a/include/query/knn.h b/include/query/knn.h
index a227293..87ea10a 100644
--- a/include/query/knn.h
+++ b/include/query/knn.h
@@ -6,7 +6,7 @@
  * Distributed under the Modified BSD License.
  *
  * A query class for k-NN queries, designed for use with the VPTree
- * shard. 
+ * shard.
  *
  * FIXME: no support for tombstone deletes just yet. This would require a
  * query resumption mechanism, most likely.
@@ -16,147 +16,147 @@
 #include "framework/QueryRequirements.h"
 #include "psu-ds/PriorityQueue.h"
 
-namespace de { namespace knn {
+namespace de {
+namespace knn {
 
 using psudb::PriorityQueue;
 
-template <NDRecordInterface R>
-struct Parms {
+template <ShardInterface S> class Query {
+  typedef typename S::RECORD R;
+
+public:
+  struct Parameters {
     R point;
     size_t k;
-};
+  };
 
-template <NDRecordInterface R>
-struct State {
-    size_t k;
-};
+  struct LocalQuery {
+    Parameters global_parms;
+  };
 
-template <NDRecordInterface R>
-struct BufferState {
+  struct LocalQueryBuffer {
     BufferView<R> *buffer;
+    Parameters global_parms;
+  };
 
-    BufferState(BufferView<R> *buffer) 
-        : buffer(buffer) {}
-};
+  typedef Wrapped<R> LocalResultType;
+  typedef R ResultType;
+  constexpr static bool EARLY_ABORT = false;
+  constexpr static bool SKIP_DELETE_FILTER = true;
 
-template <NDRecordInterface R, ShardInterface<R> S>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
+    query->global_parms = *parms;
 
-    static void *get_query_state(S *shard, void *parms) {
-        return nullptr;
-    }
+    return query;
+  }
 
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        return new BufferState<R>(buffer);
-    }
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
+    query->global_parms = *parms;
+    query->buffer = buffer;
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) {
-        return;
-    }
+    return query;
+  }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> results;
-        Parms<R> *p = (Parms<R> *) parms;
-        Wrapped<R> wrec;
-        wrec.rec = p->point;
-        wrec.header = 0;
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
+    return;
+  }
 
-        PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(p->k, &wrec);
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    std::vector<LocalResultType> results;
 
-        shard->search(p->point, p->k, pq);
+    Wrapped<R> wrec;
+    wrec.rec = query->global_parms.point;
+    wrec.header = 0;
 
-        while (pq.size() > 0) {
-            results.emplace_back(*pq.peek().data);
-            pq.pop();
-        }
+    PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(query->global_parms.k,
+                                                         &wrec);
 
-        return results;
+    shard->search(query->global_parms.point, query->global_parms.k, pq);
+
+    while (pq.size() > 0) {
+      results.emplace_back(*pq.peek().data);
+      pq.pop();
     }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        Parms<R> *p = (Parms<R> *) parms;
-        BufferState<R> *s = (BufferState<R> *) state;
-        Wrapped<R> wrec;
-        wrec.rec = p->point;
-        wrec.header = 0;
-
-        size_t k = p->k;
-
-        PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(k, &wrec);
-        for (size_t i=0; i<s->buffer->get_record_count(); i++) {
-            // Skip over deleted records (under tagging)
-            if (s->buffer->get(i)->is_deleted()) {
-                continue;
-            }
-
-            if (pq.size() < k) {
-                pq.push(s->buffer->get(i));
-            } else {
-                double head_dist = pq.peek().data->rec.calc_distance(wrec.rec);
-                double cur_dist = (s->buffer->get(i))->rec.calc_distance(wrec.rec);
-
-                if (cur_dist < head_dist) {
-                    pq.pop();
-                    pq.push(s->buffer->get(i));
-                }
-            }
-        }
+    return results;
+  }
 
-        std::vector<Wrapped<R>> results;
-        while (pq.size() > 0) {
-            results.emplace_back(*(pq.peek().data));
-            pq.pop();
-        }
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
 
-        return std::move(results);
-    }
+    std::vector<LocalResultType> results;
 
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        Parms<R> *p = (Parms<R> *) parms;
-        R rec = p->point;
-        size_t k = p->k;
-
-        PriorityQueue<R, DistCmpMax<R>> pq(k, &rec);
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                if (pq.size() < k) {
-                    pq.push(&results[i][j].rec);
-                } else {
-                    double head_dist = pq.peek().data->calc_distance(rec);
-                    double cur_dist = results[i][j].rec.calc_distance(rec);
-
-                    if (cur_dist < head_dist) {
-                        pq.pop();
-                        pq.push(&results[i][j].rec);
-                    }
-                }
-            }
-        }
+    Wrapped<R> wrec;
+    wrec.rec = query->global_parms.point;
+    wrec.header = 0;
 
-        while (pq.size() > 0) {
-            output.emplace_back(*pq.peek().data);
-            pq.pop();
-        }
+    PriorityQueue<Wrapped<R>, DistCmpMax<Wrapped<R>>> pq(query->global_parms.k,
+                                                         &wrec);
+
+    for (size_t i = 0; i < query->buffer->get_record_count(); i++) {
+      // Skip over deleted records (under tagging)
+      if (query->buffer->get(i)->is_deleted()) {
+        continue;
+      }
 
-        return std::move(output);
+      if (pq.size() < query->global_parms.k) {
+        pq.push(query->buffer->get(i));
+      } else {
+        double head_dist = pq.peek().data->rec.calc_distance(wrec.rec);
+        double cur_dist = (query->buffer->get(i))->rec.calc_distance(wrec.rec);
+
+        if (cur_dist < head_dist) {
+          pq.pop();
+          pq.push(query->buffer->get(i));
+        }
+      }
     }
 
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
+    while (pq.size() > 0) {
+      results.emplace_back(*(pq.peek().data));
+      pq.pop();
     }
 
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
+    return std::move(results);
+  }
+
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+
+    PriorityQueue<R, DistCmpMax<R>> pq(parms->k, &(parms->point));
+    for (size_t i = 0; i < local_results.size(); i++) {
+      for (size_t j = 0; j < local_results[i].size(); j++) {
+        if (pq.size() < parms->k) {
+          pq.push(&local_results[i][j].rec);
+        } else {
+          double head_dist = pq.peek().data->calc_distance(parms->point);
+          double cur_dist = local_results[i][j].rec.calc_distance(parms->point);
+
+          if (cur_dist < head_dist) {
+            pq.pop();
+            pq.push(&local_results[i][j].rec);
+          }
+        }
+      }
     }
 
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        return false;
+    while (pq.size() > 0) {
+      output.emplace_back(*pq.peek().data);
+      pq.pop();
     }
-};
+  }
 
-}}
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    return false;
+  }
+};
+} // namespace knn
+} // namespace de
diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h
index 94c2bce..f3788de 100644
--- a/include/query/pointlookup.h
+++ b/include/query/pointlookup.h
@@ -18,106 +18,102 @@
 
 #include "framework/QueryRequirements.h"
 
-namespace de { namespace pl {
+namespace de {
+namespace pl {
 
-template <RecordInterface R>
-struct Parms {
-    decltype(R::key) search_key;
-};
+template <ShardInterface S> class Query {
+  typedef typename S::RECORD R;
 
-template <RecordInterface R>
-struct State {
-};
-
-template <RecordInterface R>
-struct BufferState {
-    BufferView<R> *buffer;
-
-    BufferState(BufferView<R> *buffer) 
-        : buffer(buffer) {}
-};
-
-template <KVPInterface R, ShardInterface<R> S>
-class Query {
 public:
-    constexpr static bool EARLY_ABORT=true;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(S *shard, void *parms) {
-        return nullptr;
-    }
-
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        auto res = new BufferState<R>(buffer);
+  struct Parameters {
+    decltype(R::key) search_key;
+  };
 
-        return res;
-    }
+  struct LocalQuery {
+    Parameters global_parms;
+  };
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) {
-        return;
+  struct LocalQueryBuffer {
+    BufferView<R> *buffer;
+    Parameters global_parms;
+  };
+
+  typedef Wrapped<R> LocalResultType;
+  typedef R ResultType;
+    
+  constexpr static bool EARLY_ABORT = true;
+  constexpr static bool SKIP_DELETE_FILTER = true;
+
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
+    query->global_parms = *parms;
+    return query;
+  }
+
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
+    query->buffer = buffer;
+    query->global_parms = *parms;
+
+    return query;
+  }
+  
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
+    return;
+  }
+
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    std::vector<LocalResultType> result;
+
+    auto r = shard->point_lookup({query->global_parms.search_key, 0});
+
+    if (r) {
+      result.push_back(*r);
     }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        auto p = (Parms<R> *) parms;
-        auto s = (State<R> *) q_state;
-
-        std::vector<Wrapped<R>> result;
-
-        auto r = shard->point_lookup({p->search_key, 0});
+    return result;
+  }
+  
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
+    std::vector<LocalResultType> result;
 
-        if (r) {
-            result.push_back(*r);
-        }
+    for (size_t i = 0; i < query->buffer->get_record_count(); i++) {
+      auto rec = query->buffer->get(i);
 
+      if (rec->rec.key == query->global_parms.search_key) {
+        result.push_back(*rec);
         return result;
+      }
     }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto p = (Parms<R> *) parms;
-        auto s = (BufferState<R> *) state;
-
-        std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->buffer->get_record_count(); i++) {
-            auto rec = s->buffer->get(i);
-
-            if (rec->rec.key == p->search_key) {
-                records.push_back(*rec);
-                return records;
-            }
+    return result;
+  }
+  
+
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+    for (auto r : local_results) {
+      if (r.size() > 0) {
+        if (r[0].is_deleted() || r[0].is_tombstone()) {
+          return;
         }
 
-        return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        for (auto r : results) {
-            if (r.size() > 0) {
-                if (r[0].is_deleted() || r[0].is_tombstone()) {
-                    return output;
-                }
-
-                output.push_back(r[0].rec);
-                return output;
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
-    }
-
-
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        return false;
+        output.push_back(r[0].rec);
+        return;
+      }
     }
+  }
+    
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    return false;
+  }
 };
-
-}}
+} // namespace pl
+} // namespace de
diff --git a/include/query/rangecount.h b/include/query/rangecount.h
index 5b95cdd..68d304d 100644
--- a/include/query/rangecount.h
+++ b/include/query/rangecount.h
@@ -5,169 +5,168 @@
  *
  * Distributed under the Modified BSD License.
  *
- * A query class for single dimensional range count queries. This query 
- * requires that the shard support get_lower_bound(key) and 
+ * A query class for single dimensional range count queries. This query
+ * requires that the shard support get_lower_bound(key) and
  * get_record_at(index).
  */
 #pragma once
 
 #include "framework/QueryRequirements.h"
 
-namespace de { namespace rc {
+namespace de {
+namespace rc {
 
-template <RecordInterface R>
-struct Parms {
+template <ShardInterface S, bool FORCE_SCAN = true> class Query {
+  typedef typename S::RECORD R;
+
+public:
+  struct Parameters {
     decltype(R::key) lower_bound;
     decltype(R::key) upper_bound;
-};
+  };
 
-template <RecordInterface R>
-struct State {
+  struct LocalQuery {
     size_t start_idx;
     size_t stop_idx;
-};
+    Parameters global_parms;
+  };
 
-template <RecordInterface R>
-struct BufferState {
+  struct LocalQueryBuffer {
     BufferView<R> *buffer;
-
-    BufferState(BufferView<R> *buffer) 
-        : buffer(buffer) {}
-};
-
-template <KVPInterface R, ShardInterface<R> S, bool FORCE_SCAN=false>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(S *shard, void *parms) {
-        return nullptr;
-    }
-
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        auto res = new BufferState<R>(buffer);
-
-        return res;
+    Parameters global_parms;
+  };
+
+  struct LocalResultType {
+    size_t record_count;
+    size_t tombstone_count;
+
+    bool is_deleted() {return false;}
+    bool is_tombstone() {return false;}
+  };
+
+  typedef size_t ResultType;
+  constexpr static bool EARLY_ABORT = false;
+  constexpr static bool SKIP_DELETE_FILTER = true;
+
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
+
+    query->start_idx = shard->get_lower_bound(parms->lower_bound);
+    query->stop_idx = shard->get_record_count();
+    query->global_parms.lower_bound = parms->lower_bound;
+    query->global_parms.upper_bound = parms->upper_bound;
+
+    return query;
+  }
+
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
+    query->buffer = buffer;
+    query->global_parms.lower_bound = parms->lower_bound;
+    query->global_parms.upper_bound = parms->upper_bound;
+
+    return query;
+  }
+
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
+    return;
+  }
+
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    std::vector<LocalResultType> result;
+
+    /*
+     * if the returned index is one past the end of the
+     * records for the PGM, then there are not records
+     * in the index falling into the specified range.
+     */
+    if (query->start_idx == shard->get_record_count()) {
+      return result;
     }
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) {
-        return;
+    auto ptr = shard->get_record_at(query->start_idx);
+    size_t reccnt = 0;
+    size_t tscnt = 0;
+
+    /*
+     * roll the pointer forward to the first record that is
+     * greater than or equal to the lower bound.
+     */
+    while (ptr < shard->get_data() + query->stop_idx &&
+           ptr->rec.key < query->global_parms.lower_bound) {
+      ptr++;
     }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> records;
-        auto p = (Parms<R> *) parms;
-        auto s = (State<R> *) q_state;
-
-        size_t reccnt = 0;
-        size_t tscnt = 0;
-
-        Wrapped<R> res;
-        res.rec.key= 0; // records
-        res.rec.value = 0; // tombstones
-        records.emplace_back(res);
-
-
-        auto start_idx = shard->get_lower_bound(p->lower_bound);
-        auto stop_idx = shard->get_lower_bound(p->upper_bound);
+    while (ptr < shard->get_data() + query->stop_idx &&
+           ptr->rec.key <= query->global_parms.upper_bound) {
 
-        /* 
-         * if the returned index is one past the end of the
-         * records for the PGM, then there are not records
-         * in the index falling into the specified range.
-         */
-        if (start_idx == shard->get_record_count()) {
-            return records;
-        }
-
-        
-        /*
-         * roll the pointer forward to the first record that is
-         * greater than or equal to the lower bound.
-         */
-        auto recs = shard->get_data();
-        while(start_idx < stop_idx && recs[start_idx].rec.key < p->lower_bound) {
-            start_idx++;
-        }
-
-        while (stop_idx < shard->get_record_count() && recs[stop_idx].rec.key <= p->upper_bound) {
-            stop_idx++;
-        }
-        size_t idx = start_idx;
-        size_t ts_cnt = 0;
+      if (!ptr->is_deleted()) {
+        reccnt++;
 
-        while (idx < stop_idx) {
-            ts_cnt += recs[idx].is_tombstone() * 2 + recs[idx].is_deleted();
-            idx++;
+        if (ptr->is_tombstone()) {
+          tscnt++;
         }
+      }
 
-        records[0].rec.key = idx - start_idx;
-        records[0].rec.value = ts_cnt;
-
-        return records;
+      ptr++;
     }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto p = (Parms<R> *) parms;
-        auto s = (BufferState<R> *) state;
-
-        std::vector<Wrapped<R>> records;
-
-        Wrapped<R> res;
-        res.rec.key= 0; // records
-        res.rec.value = 0; // tombstones
-        records.emplace_back(res);
-
-        size_t stop_idx;
-        if constexpr (FORCE_SCAN) {
-            stop_idx = s->buffer->get_capacity() / 2;
-        } else {
-            stop_idx = s->buffer->get_record_count();
-        }
-
-        for (size_t i=0; i<s->buffer->get_record_count(); i++) {
-            auto rec = s->buffer->get(i);
-
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound
-                    && !rec->is_deleted()) {
-                if (rec->is_tombstone()) {
-                    records[0].rec.value++;
-                } else {
-                    records[0].rec.key++;
-                }
-            }
+    result.push_back({reccnt, tscnt});
+    return result;
+  }
+
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
+
+    std::vector<LocalResultType> result;
+    size_t reccnt = 0;
+    size_t tscnt = 0;
+    for (size_t i = 0; i < query->buffer->get_record_count(); i++) {
+      auto rec = query->buffer->get(i);
+      if (rec->rec.key >= query->global_parms.lower_bound &&
+          rec->rec.key <= query->global_parms.upper_bound) {
+        if (!rec->is_deleted()) {
+          reccnt++;
+          if (rec->is_tombstone()) {
+            tscnt++;
+          }
         }
-
-        return records;
+      }
     }
 
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        R res;
-        res.key = 0;
-        res.value = 0;
-        output.emplace_back(res);
+    result.push_back({reccnt, tscnt});
 
-        for (size_t i=0; i<results.size(); i++) {
-            output[0].key += results[i][0].rec.key; // records
-            output[0].value += results[i][0].rec.value; // tombstones
-        }
+    return result;
+  }
 
-        output[0].key -= output[0].value;
-        return output;
-    }
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+    size_t reccnt = 0;
+    size_t tscnt = 0;
 
-    static void delete_query_state(void *state) {
+    for (auto &local_result : local_results) {
+      reccnt += local_result[0].record_count;
+      tscnt += local_result[0].tombstone_count;
     }
 
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
+    /* if more tombstones than results, clamp the output at 0 */
+    if (tscnt > reccnt) {
+      tscnt = reccnt;
     }
 
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        return false;
-    }
+    output.push_back({reccnt - tscnt});
+  }
+
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    return false;
+  }
 };
 
-}}
+} // namespace rc
+} // namespace de
diff --git a/include/query/rangequery.h b/include/query/rangequery.h
index e0690e6..e7be39c 100644
--- a/include/query/rangequery.h
+++ b/include/query/rangequery.h
@@ -1,177 +1,186 @@
 /*
  * include/query/rangequery.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
- * A query class for single dimensional range queries. This query requires 
+ * A query class for single dimensional range queries. This query requires
  * that the shard support get_lower_bound(key) and get_record_at(index).
  */
 #pragma once
 
 #include "framework/QueryRequirements.h"
+#include "framework/interface/Record.h"
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 
-namespace de { namespace rq {
+namespace de {
+namespace rq {
 
-template <RecordInterface R>
-struct Parms {
+template <ShardInterface S> class Query {
+  typedef typename S::RECORD R;
+
+public:
+  struct Parameters {
     decltype(R::key) lower_bound;
     decltype(R::key) upper_bound;
-};
+  };
 
-template <RecordInterface R>
-struct State {
+  struct LocalQuery {
     size_t start_idx;
     size_t stop_idx;
-};
+    Parameters global_parms;
+  };
 
-template <RecordInterface R>
-struct BufferState {
+  struct LocalQueryBuffer {
     BufferView<R> *buffer;
-
-    BufferState(BufferView<R> *buffer) 
-        : buffer(buffer) {}
-};
-
-template <RecordInterface R, ShardInterface<R> S>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(S *shard, void *parms) {
-        auto res = new State<R>();
-        auto p = (Parms<R> *) parms;
-
-        res->start_idx = shard->get_lower_bound(p->lower_bound);
-        res->stop_idx = shard->get_record_count();
-
-        return res;
+    Parameters global_parms;
+  };
+
+  typedef Wrapped<R> LocalResultType;
+  typedef R ResultType;
+
+  constexpr static bool EARLY_ABORT = false;
+  constexpr static bool SKIP_DELETE_FILTER = true;
+
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
+
+    query->start_idx = shard->get_lower_bound(parms->lower_bound);
+    query->stop_idx = shard->get_record_count();
+    query->global_parms = *parms;
+
+    return query;
+  }
+
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
+    query->buffer = buffer;
+    query->global_parms = *parms;
+
+    return query;
+  }
+
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
+    return;
+  }
+
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    std::vector<LocalResultType> result;
+
+    /*
+     * if the returned index is one past the end of the
+     * records for the PGM, then there are not records
+     * in the index falling into the specified range.
+     */
+    if (query->start_idx == shard->get_record_count()) {
+      return result;
     }
 
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        auto res = new BufferState<R>(buffer);
+    auto ptr = shard->get_record_at(query->start_idx);
 
-        return res;
+    /*
+     * roll the pointer forward to the first record that is
+     * greater than or equal to the lower bound.
+     */
+    while (ptr < shard->get_data() + query->stop_idx &&
+           ptr->rec.key < query->global_parms.lower_bound) {
+      ptr++;
     }
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) {
-        return;
+    while (ptr < shard->get_data() + query->stop_idx &&
+           ptr->rec.key <= query->global_parms.upper_bound) {
+      result.emplace_back(*ptr);
+      ptr++;
     }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> records;
-        auto p = (Parms<R> *) parms;
-        auto s = (State<R> *) q_state;
-
-        /* 
-         * if the returned index is one past the end of the
-         * records for the PGM, then there are not records
-         * in the index falling into the specified range.
-         */
-        if (s->start_idx == shard->get_record_count()) {
-            return records;
-        }
-
-        auto ptr = shard->get_record_at(s->start_idx);
-        
-        /*
-         * roll the pointer forward to the first record that is
-         * greater than or equal to the lower bound.
-         */
-        while(ptr < shard->get_data() + s->stop_idx && ptr->rec.key < p->lower_bound) {
-            ptr++;
-        }
-
-        while (ptr < shard->get_data() + s->stop_idx && ptr->rec.key <= p->upper_bound) {
-            records.emplace_back(*ptr);
-            ptr++;
-        }
-
-        return records;
-    }
+    return result;
+  }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto p = (Parms<R> *) parms;
-        auto s = (BufferState<R> *) state;
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
 
-        std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->buffer->get_record_count(); i++) {
-            auto rec = s->buffer->get(i);
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                records.emplace_back(*rec);
-            }
-        }
-
-        return records;
+    std::vector<LocalResultType> result;
+    for (size_t i = 0; i < query->buffer->get_record_count(); i++) {
+      auto rec = query->buffer->get(i);
+      if (rec->rec.key >= query->global_parms.lower_bound &&
+          rec->rec.key <= query->global_parms.upper_bound) {
+        result.emplace_back(*rec);
+      }
     }
 
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(results.size());
-
-        psudb::PriorityQueue<Wrapped<R>> pq(results.size());
-        size_t total = 0;
-		size_t tmp_n = results.size();
-        
-
-        for (size_t i = 0; i < tmp_n; ++i)
-			if (results[i].size() > 0){
-	            auto base = results[i].data();
-		        cursors.emplace_back(Cursor<Wrapped<R>>{base, base + results[i].size(), 0, results[i].size()});
-				assert(i == cursors.size() - 1);
-			    total += results[i].size();
-				pq.push(cursors[i].ptr, tmp_n - i - 1);
-			} else {
-				cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-			}
-
-        if (total == 0) {
-            return std::vector<R>();
-        }
-
-        output.reserve(total);
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : psudb::queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[tmp_n - now.version - 1];
-                auto& cursor2 = cursors[tmp_n - next.version - 1];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[tmp_n - now.version - 1];
-                if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
-
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        return output;
+    return result;
+  }
+
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+    std::vector<Cursor<LocalResultType>> cursors;
+    cursors.reserve(local_results.size());
+
+    psudb::PriorityQueue<LocalResultType> pq(local_results.size());
+    size_t total = 0;
+    size_t tmp_n = local_results.size();
+
+    for (size_t i = 0; i < tmp_n; ++i)
+      if (local_results[i].size() > 0) {
+        auto base = local_results[i].data();
+        cursors.emplace_back(Cursor<LocalResultType>{
+            base, base + local_results[i].size(), 0, local_results[i].size()});
+        assert(i == cursors.size() - 1);
+        total += local_results[i].size();
+        pq.push(cursors[i].ptr, tmp_n - i - 1);
+      } else {
+        cursors.emplace_back(Cursor<LocalResultType>{nullptr, nullptr, 0, 0});
+      }
+
+    if (total == 0) {
+      return;
     }
 
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
+    output.reserve(total);
+
+    while (pq.size()) {
+      auto now = pq.peek();
+      auto next = pq.size() > 1
+                      ? pq.peek(1)
+                      : psudb::queue_record<LocalResultType>{nullptr, 0};
+      if (!now.data->is_tombstone() && next.data != nullptr &&
+          now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+        pq.pop();
+        pq.pop();
+        auto &cursor1 = cursors[tmp_n - now.version - 1];
+        auto &cursor2 = cursors[tmp_n - next.version - 1];
+        if (advance_cursor<LocalResultType>(cursor1))
+          pq.push(cursor1.ptr, now.version);
+        if (advance_cursor<LocalResultType>(cursor2))
+          pq.push(cursor2.ptr, next.version);
+      } else {
+        auto &cursor = cursors[tmp_n - now.version - 1];
+        if (!now.data->is_tombstone())
+          output.push_back(cursor.ptr->rec);
+
+        pq.pop();
+
+        if (advance_cursor<LocalResultType>(cursor))
+          pq.push(cursor.ptr, now.version);
+      }
     }
 
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
-    }
+    return;
+  }
 
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        return false;
-    }
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    return false;
+  }
 };
 
-}}
+} // namespace rq
+} // namespace de
diff --git a/include/query/wirs.h b/include/query/wirs.h
deleted file mode 100644
index 62b43f6..0000000
--- a/include/query/wirs.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * include/query/wirs.h
- *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *
- * Distributed under the Modified BSD License.
- *
- * A query class for weighted independent range sampling. This
- * class is tightly coupled with include/shard/AugBTree.h, and
- * so is probably of limited general utility.
- */
-#pragma once
-
-#include "framework/QueryRequirements.h"
-#include "psu-ds/Alias.h"
-
-namespace de { namespace wirs {
-
-template <WeightedRecordInterface R>
-struct Parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-    size_t sample_size;
-    gsl_rng *rng;
-};
-
-template <WeightedRecordInterface R>
-struct State {
-    decltype(R::weight) total_weight;
-    std::vector<void*> nodes;
-    psudb::Alias* top_level_alias;
-    size_t sample_size;
-
-    State() {
-        total_weight = 0;
-        top_level_alias = nullptr;
-    }
-
-    ~State() {
-        if (top_level_alias) delete top_level_alias;
-    }
-};
-
-template <RecordInterface R>
-struct BufferState {
-    size_t cutoff;
-    psudb::Alias* alias;
-    std::vector<Wrapped<R>> records;
-    decltype(R::weight) max_weight;
-    size_t sample_size;
-    decltype(R::weight) total_weight;
-    BufferView<R> *buffer;
-
-    ~BufferState() {
-        delete alias;
-    }
-};
-
-template <RecordInterface R, ShardInterface<R> S, bool Rejection=true>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(S *shard, void *parms) {
-        auto res = new State<R>();
-        decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound;
-        decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound;
-
-        std::vector<decltype(R::weight)> weights;
-        res->total_weight = shard->find_covering_nodes(lower_key, upper_key, res->nodes, weights);
-
-        std::vector<double> normalized_weights;
-        for (auto weight : weights) {
-            normalized_weights.emplace_back(weight / res->total_weight);
-        }
-
-        res->top_level_alias = new psudb::Alias(normalized_weights);
-        res->sample_size = 0;
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        BufferState<R> *state = new BufferState<R>();
-        auto parameters = (Parms<R>*) parms;
-
-        if constexpr (Rejection) {
-            state->cutoff = buffer->get_record_count() - 1;
-            state->max_weight = buffer->get_max_weight();
-            state->total_weight = buffer->get_total_weight();
-            state->sample_size = 0;
-            state->buffer = buffer;
-            return state;
-        }
-
-        std::vector<decltype(R::weight)> weights;
-
-        state->buffer = buffer;
-        decltype(R::weight) total_weight = 0;
-
-        for (size_t i = 0; i <= buffer->get_record_count(); i++) {
-            auto rec = buffer->get(i);
-
-            if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) {
-              weights.push_back(rec->rec.weight);
-              state->records.push_back(*rec);
-              total_weight += rec->rec.weight;
-            }
-        }
-
-        std::vector<double> normalized_weights;
-        for (size_t i = 0; i < weights.size(); i++) {
-            normalized_weights.push_back(weights[i] / total_weight);
-        }
-
-        state->total_weight = total_weight;
-        state->alias = new psudb::Alias(normalized_weights);
-        state->sample_size = 0;
-
-        return state;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
-        auto p = (Parms<R> *) query_parms;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0);
-        size_t buffer_sz = 0;
-
-        std::vector<decltype(R::weight)> weights;
-
-        decltype(R::weight) total_weight = 0;
-        for (auto &s : buffer_states) {
-            auto bs = (BufferState<R> *) s;
-            total_weight += bs->total_weight;
-            weights.push_back(bs->total_weight);
-        }
-
-        for (auto &s : shard_states) {
-            auto state = (State<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        std::vector<double> normalized_weights;
-        for (auto w : weights) {
-            normalized_weights.push_back((double) w / (double) total_weight);
-        }
-
-        auto shard_alias = psudb::Alias(normalized_weights);
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = shard_alias.get(p->rng);            
-
-            if (idx < buffer_states.size()) {
-                auto state = (BufferState<R> *) buffer_states[idx];
-                state->sample_size++;
-            } else {
-                auto state = (State<R> *) shard_states[idx - buffer_states.size()];
-                state->sample_size++;
-            }
-        }
-    }
-
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        auto lower_key = ((Parms<R> *) parms)->lower_bound;
-        auto upper_key = ((Parms<R> *) parms)->upper_bound;
-        auto rng = ((Parms<R> *) parms)->rng;
-
-        auto state = (State<R> *) q_state;
-        auto sample_size = state->sample_size;
-
-        std::vector<Wrapped<R>> result_set;
-
-        if (sample_size == 0) {
-            return result_set;
-        }
-        size_t cnt = 0;
-        size_t attempts = 0;
-
-        for (size_t i=0; i<sample_size; i++) {
-            auto rec = shard->get_weighted_sample(lower_key, upper_key,
-                                                  state->nodes[state->top_level_alias->get(rng)],
-                                                  rng);
-            if (rec) {
-                result_set.emplace_back(*rec);
-            }
-        }
-
-        return result_set;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto st = (BufferState<R> *) state;
-        auto p = (Parms<R> *) parms;
-        auto buffer = st->buffer;
-
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
-
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = buffer->get(idx);
-
-                auto test = gsl_rng_uniform(p->rng) * st->max_weight;
-
-                if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                    result.emplace_back(*rec);
-                }
-            }
-            return result;
-        }
-
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = st->alias->get(p->rng);
-            result.emplace_back(st->records[idx]);
-        }
-
-        return result;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
-    }
-
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        auto p = (Parms<R> *)  parms;
-
-        if (results.size() < p->sample_size) {
-            return true;
-        }
-        return false;
-    }
-};
-}}
diff --git a/include/query/wss.h b/include/query/wss.h
index fb0b414..54620ca 100644
--- a/include/query/wss.h
+++ b/include/query/wss.h
@@ -6,7 +6,7 @@
  * Distributed under the Modified BSD License.
  *
  * A query class for weighted set sampling. This
- * class is tightly coupled with include/shard/Alias.h, 
+ * class is tightly coupled with include/shard/Alias.h,
  * and so is probably of limited general utility.
  */
 #pragma once
@@ -14,203 +14,177 @@
 #include "framework/QueryRequirements.h"
 #include "psu-ds/Alias.h"
 
-namespace de { namespace wss {
+namespace de {
+namespace wss {
 
-template <WeightedRecordInterface R>
-struct Parms {
+template <ShardInterface S> class Query {
+  typedef typename S::RECORD R;
+
+public:
+  struct Parameters {
     size_t sample_size;
     gsl_rng *rng;
-};
+  };
 
-template <WeightedRecordInterface R>
-struct State {
-    decltype(R::weight) total_weight;
+  struct LocalQuery {
     size_t sample_size;
+    decltype(R::weight) total_weight;
 
-    State() {
-        total_weight = 0;
-    }
-};
+    Parameters global_parms;
+  };
+
+  struct LocalQueryBuffer {
+    BufferView<R> *buffer;
 
-template <RecordInterface R>
-struct BufferState {
-    size_t cutoff;
     size_t sample_size;
-    psudb::Alias *alias;
-    decltype(R::weight) max_weight;
     decltype(R::weight) total_weight;
-    BufferView<R> *buffer;
+    decltype(R::weight) max_weight;
+    size_t cutoff;
 
-    ~BufferState() {
-        delete alias;
-    }
-};
+    std::unique_ptr<psudb::Alias> alias;
 
-template <RecordInterface R, ShardInterface<R> S, bool Rejection=true>
-class Query {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
+    Parameters global_parms;
+  };
 
-    static void *get_query_state(S *shard, void *parms) {
-        auto res = new State<R>();
-        res->total_weight = shard->get_total_weight();
-        res->sample_size = 0;
+  constexpr static bool EARLY_ABORT = false;
+  constexpr static bool SKIP_DELETE_FILTER = false;
 
-        return res;
-    }
+  typedef Wrapped<R> LocalResultType;
+  typedef R ResultType;
 
-    static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) {
-        BufferState<R> *state = new BufferState<R>();
-        auto parameters = (Parms<R>*) parms;
-        if constexpr (Rejection) {
-            state->cutoff = buffer->get_record_count() - 1;
-            state->max_weight = buffer->get_max_weight();
-            state->total_weight = buffer->get_total_weight();
-            state->buffer = buffer;
-            return state;
-        }
+  static LocalQuery *local_preproc(S *shard, Parameters *parms) {
+    auto query = new LocalQuery();
 
-        std::vector<double> weights;
+    query->global_parms = *parms;
+    query->total_weight = shard->get_total_weight();
+    query->sample_size = 0;
 
-        double total_weight = 0.0;
-        state->buffer = buffer;
+    return query;
+  }
 
-        for (size_t i = 0; i <= buffer->get_record_count(); i++) {
-            auto rec = buffer->get_data(i);
-            weights.push_back(rec->rec.weight);
-            total_weight += rec->rec.weight;
-        }
+  static LocalQueryBuffer *local_preproc_buffer(BufferView<R> *buffer,
+                                                Parameters *parms) {
+    auto query = new LocalQueryBuffer();
 
-        for (size_t i = 0; i < weights.size(); i++) {
-            weights[i] = weights[i] / total_weight;
-        }
+    query->cutoff = buffer->get_record_count() - 1;
 
-        state->alias = new psudb::Alias(weights);
-        state->total_weight = total_weight;
+    query->max_weight = 0;
+    query->total_weight = 0;
 
-        return state;
-    }
+    for (size_t i = 0; i < buffer->get_record_count(); i++) {
+      auto weight = buffer->get(i)->rec.weight;
+      query->total_weight += weight;
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
-        auto p = (Parms<R> *) query_parms;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0);
-        size_t buffer_sz = 0;
-
-        std::vector<decltype(R::weight)> weights;
-
-        decltype(R::weight) total_weight = 0;
-        for (auto &s : buffer_states) {
-            auto bs = (BufferState<R> *) s;
-            total_weight += bs->total_weight;
-            weights.push_back(bs->total_weight);
-        }
-
-        for (auto &s : shard_states) {
-            auto state = (State<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        std::vector<double> normalized_weights;
-        for (auto w : weights) {
-            normalized_weights.push_back((double) w / (double) total_weight);
-        }
-
-        auto shard_alias = psudb::Alias(normalized_weights);
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = shard_alias.get(p->rng);            
-
-            if (idx < buffer_states.size()) {
-                auto state = (BufferState<R> *) buffer_states[idx];
-                state->sample_size++;
-            } else {
-                auto state = (State<R> *) shard_states[idx - buffer_states.size()];
-                state->sample_size++;
-            }
-        }
+      if (weight > query->max_weight) {
+        query->max_weight = weight;
+      }
     }
 
-    static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
-        auto rng = ((Parms<R> *) parms)->rng;
+    query->buffer = buffer;
+    query->global_parms = *parms;
 
-        auto state = (State<R> *) q_state;
-        auto sample_size = state->sample_size;
+    query->alias = nullptr;
 
-        std::vector<Wrapped<R>> result_set;
+    return query;
+  }
 
-        if (sample_size == 0) {
-            return result_set;
-        }
-        size_t attempts = 0;
-        do {
-            attempts++;
-            size_t idx = shard->get_weighted_sample(rng);
-            result_set.emplace_back(*shard->get_record_at(idx));
-        } while (attempts < sample_size);
+  static void distribute_query(Parameters *parms,
+                               std::vector<LocalQuery *> const &local_queries,
+                               LocalQueryBuffer *buffer_query) {
 
-        return result_set;
+    if (!buffer_query) {
+      assert(local_queries.size() == 1);
+      local_queries[0]->sample_size =
+          local_queries[0]->global_parms.sample_size;
+      return;
     }
 
-    static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) {
-        auto st = (BufferState<R> *) state;
-        auto p = (Parms<R> *) parms;
-        auto buffer = st->buffer;
+    if (!buffer_query->alias) {
+      std::vector<decltype(R::weight)> weights;
 
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
+      decltype(R::weight) total_weight = buffer_query->total_weight;
+      weights.push_back(total_weight);
 
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = buffer->get(idx);
+      for (auto &q : local_queries) {
+        total_weight += q->total_weight;
+        weights.push_back(q->total_weight);
+        q->sample_size = 0;
+      }
 
-                auto test = gsl_rng_uniform(p->rng) * st->max_weight;
+      std::vector<double> normalized_weights;
+      for (auto w : weights) {
+        normalized_weights.push_back((double)w / (double)total_weight);
+      }
 
-                if (test <= rec->rec.weight) {
-                    result.emplace_back(*rec);
-                }
-            }
-            return result;
-        }
+      buffer_query->alias = std::make_unique<psudb::Alias>(normalized_weights);
+    }
 
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = st->alias->get(p->rng);
-            result.emplace_back(*(buffer->get_data() + idx));
-        }
+    for (size_t i = 0; i < parms->sample_size; i++) {
+      auto idx = buffer_query->alias->get(parms->rng);
 
-        return result;
+      if (idx == 0) {
+        buffer_query->sample_size++;
+      } else {
+        local_queries[idx - 1]->sample_size++;
+      }
     }
+  }
 
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms, std::vector<R> &output) {
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
-        }
+  static std::vector<LocalResultType> local_query(S *shard, LocalQuery *query) {
+    std::vector<LocalResultType> result;
 
-        return output;
+    if (query->sample_size == 0) {
+      return result;
     }
 
-    static void delete_query_state(void *state) {
-        auto s = (State<R> *) state;
-        delete s;
+    for (size_t i = 0; i < query->sample_size; i++) {
+      size_t idx = shard->get_weighted_sample(query->global_parms.rng);
+      if (!shard->get_record_at(idx)->is_deleted()) {
+        result.emplace_back(*shard->get_record_at(idx));
+      }
     }
 
-    static void delete_buffer_query_state(void *state) {
-        auto s = (BufferState<R> *) state;
-        delete s;
+    return result;
+  }
+
+  static std::vector<LocalResultType>
+  local_query_buffer(LocalQueryBuffer *query) {
+    std::vector<LocalResultType> result;
+
+    for (size_t i = 0; i < query->sample_size; i++) {
+      auto idx = gsl_rng_uniform_int(query->global_parms.rng, query->cutoff);
+      auto rec = query->buffer->get(idx);
+
+      auto test = gsl_rng_uniform(query->global_parms.rng) * query->max_weight;
+      if (test <= rec->rec.weight && !rec->is_deleted()) {
+        result.emplace_back(*rec);
+      }
     }
 
-    static bool repeat(void *parms, std::vector<R> &results, std::vector<void*> states, void* buffer_state) {
-        auto p = (Parms<R> *)  parms;
+    return result;
+  }
 
-        if (results.size() < p->sample_size) {
-            return true;
-        }
-        return false;
+  static void
+  combine(std::vector<std::vector<LocalResultType>> const &local_results,
+          Parameters *parms, std::vector<ResultType> &output) {
+    for (size_t i = 0; i < local_results.size(); i++) {
+      for (size_t j = 0; j < local_results[i].size(); j++) {
+        output.emplace_back(local_results[i][j].rec);
+      }
+    }
+  }
+
+  static bool repeat(Parameters *parms, std::vector<ResultType> &output,
+                     std::vector<LocalQuery *> const &local_queries,
+                     LocalQueryBuffer *buffer_query) {
+    if (output.size() < parms->sample_size) {
+      parms->sample_size -= output.size();
+      distribute_query(parms, local_queries, buffer_query);
+      return true;
     }
-};
 
-}}
+    return false;
+  }
+};
+} // namespace wss
+} // namespace de
diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index 72147d7..8fe70a5 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -25,21 +25,20 @@
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
 using psudb::byte;
 
 namespace de {
 
-static thread_local size_t wss_cancelations = 0;
-
 template <WeightedRecordInterface R>
 class Alias {
+public:
+    typedef R RECORD;
 private:
     typedef decltype(R::key) K;
     typedef decltype(R::value) V;
     typedef decltype(R::weight) W;
 
+
 public:
     Alias(BufferView<R> buffer)
         : m_data(nullptr)
@@ -71,7 +70,7 @@ public:
         }
     }
 
-    Alias(std::vector<Alias*> &shards)
+    Alias(std::vector<Alias*> const &shards)
         : m_data(nullptr)
         , m_alias(nullptr)
         , m_total_weight(0)
@@ -167,7 +166,6 @@ public:
         size_t min = 0;
         size_t max = m_reccnt - 1;
 
-        const char * record_key;
         while (min < max) {
             size_t mid = (min + max) / 2;
 
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
deleted file mode 100644
index c60cbcd..0000000
--- a/include/shard/AugBTree.h
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * include/shard/AugBTree.h
- *
- * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
- *                    Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *
- * Distributed under the Modified BSD License.
- *
- * A shard shim around the alias augmented B-tree. Designed to be 
- * used along side the WIRS query in include/query/wirs.h, but
- * also supports the necessary methods for other common query
- * types.
- *
- * TODO: The code in this file is very poorly commented.
- */
-#pragma once
-
-
-#include <vector>
-#include <cassert>
-
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/Alias.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
-#include "util/SortedMerge.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::Alias;
-using psudb::byte;
-
-namespace de {
-
-template <WeightedRecordInterface R>
-struct AugBTreeNode {
-    struct AugBTreeNode<R> *left, *right;
-    decltype(R::key) low, high;
-    decltype(R::weight) weight;
-    Alias* alias;
-};
-
-template <WeightedRecordInterface R>
-class AugBTree {
-private:
-    typedef decltype(R::key) K;
-    typedef decltype(R::value) V;
-    typedef decltype(R::weight) W;
-
-public:
-    AugBTree(BufferView<R> buffer)
-    : m_data(nullptr)
-    , m_root(nullptr)
-    , m_reccnt(0)
-    , m_tombstone_cnt(0)
-    , m_group_size(0)
-    , m_alloc_size(0)
-    , m_node_cnt(0)
-    , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
-    {
-        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
-                                               buffer.get_record_count() * 
-                                                 sizeof(Wrapped<R>), 
-                                               (byte**) &m_data);
-
-        auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
-        m_reccnt = res.record_count;
-        m_tombstone_cnt = res.tombstone_count;
-
-        if (m_reccnt > 0) {
-            build_wirs_structure();
-        }
-    }
-
-    AugBTree(std::vector<AugBTree*> shards)
-    : m_data(nullptr)
-    , m_root(nullptr)
-    , m_reccnt(0)
-    , m_tombstone_cnt(0)
-    , m_group_size(0)
-    , m_alloc_size(0)
-    , m_node_cnt(0)
-    , m_bf(nullptr)
-    {
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count);
-
-        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
-                                               attemp_reccnt * sizeof(Wrapped<R>),
-                                               (byte **) &m_data);
-
-        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
-        m_reccnt = res.record_count;
-        m_tombstone_cnt = res.tombstone_count;
-
-        if (m_reccnt > 0) {
-            build_wirs_structure();
-        }
-   }
-
-    ~AugBTree() {
-        free(m_data);
-        for (size_t i=0; i<m_alias.size(); i++) {
-            delete m_alias[i];
-        }
-
-        delete m_bf;
-        free_tree(m_root);
-    }
-
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
-
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
-
-        while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx;
-
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
-
-        return nullptr;
-    }
-
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
-
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
-
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        if (idx >= m_reccnt) return nullptr;
-        return m_data + idx;
-    }
-
-    size_t get_memory_usage() {
-        return m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>);
-    }
-
-    size_t get_aux_memory_usage() {
-        return (m_bf) ? m_bf->memory_usage() : 0;
-    }
-
-    size_t get_lower_bound(const K& key) const {
-        size_t min = 0;
-        size_t max = m_reccnt - 1;
-
-        const char * record_key;
-        while (min < max) {
-            size_t mid = (min + max) / 2;
-
-            if (key > m_data[mid].rec.key) {
-                min = mid + 1;
-            } else {
-                max = mid;
-            }
-        }
-
-        return min;
-    }
-
-    W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) {
-        W total_weight = 0;
-        
-        /* Simulate a stack to unfold recursion. */       
-        struct AugBTreeNode<R>* st[64] = {0};
-        st[0] = m_root;
-        size_t top = 1;
-        while(top > 0) {
-            auto now = st[--top];
-            if (covered_by(now, lower_key, upper_key) ||
-                (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) {
-                nodes.emplace_back(now);
-                weights.emplace_back(now->weight);
-                total_weight += now->weight;
-            } else {
-                if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
-                if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
-            }
-        }
-        
-
-        return total_weight;
-    }
-
-    Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) {
-        /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */
-
-        /* first level */ 
-        auto node = (AugBTreeNode<R>*) internal_node;
-
-        /* second level */
-        auto fat_point = node->low + node->alias->get(rng);
-
-        /* third level */
-        size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng);
-        auto record = m_data + rec_offset;
-
-        /* bounds rejection */
-        if (lower_key > record->rec.key || upper_key < record->rec.key) {
-            return nullptr;
-        } 
-
-        return record;
-    }
-
-private:
-
-    bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
-        auto low_index = node->low * m_group_size;
-        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
-        return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key;
-    }
-
-    bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
-        auto low_index = node->low * m_group_size;
-        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
-        return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key;
-    }
-
-    void build_wirs_structure() {
-        m_group_size = std::ceil(std::log(m_reccnt));
-        size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size);
-        
-        // Fat point construction + low level alias....
-        double sum_weight = 0.0;
-        std::vector<W> weights;
-        std::vector<double> group_norm_weight;
-        size_t i = 0;
-        size_t group_no = 0;
-        while (i < m_reccnt) {
-            double group_weight = 0.0;
-            group_norm_weight.clear();
-            for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) {
-                auto w = m_data[i].rec.weight;
-                group_norm_weight.emplace_back(w);
-                group_weight += w;
-                sum_weight += w;
-            }
-
-            for (auto& w: group_norm_weight)
-                if (group_weight) w /= group_weight;
-                else w = 1.0 / group_norm_weight.size();
-            m_alias.emplace_back(new Alias(group_norm_weight));
-
-            
-            weights.emplace_back(group_weight);
-        }
-
-        assert(weights.size() == n_groups);
-
-        m_root = construct_AugBTreeNode(weights, 0, n_groups-1);
-    }
-
-     struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) {
-        if (low == high) {
-            return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
-        } else if (low > high) return nullptr;
-
-        std::vector<double> node_weights;
-        W sum = 0;
-        for (size_t i = low; i < high; ++i) {
-            node_weights.emplace_back(weights[i]);
-            sum += weights[i];
-        }
-
-        for (auto& w: node_weights)
-            if (sum) w /= sum;
-            else w = 1.0 / node_weights.size();
-        
-        m_node_cnt += 1; 
-        size_t mid = (low + high) / 2;
-        return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid),
-                                construct_AugBTreeNode(weights, mid + 1, high),
-                                low, high, sum, new Alias(node_weights)};
-    }
-
-    void free_tree(struct AugBTreeNode<R>* node) {
-        if (node) {
-            delete node->alias;
-            free_tree(node->left);
-            free_tree(node->right);
-            delete node;
-        }
-    }
-
-    Wrapped<R>* m_data;
-    std::vector<Alias *> m_alias;
-    AugBTreeNode<R>* m_root;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_group_size;
-    size_t m_alloc_size;
-    size_t m_node_cnt;
-    BloomFilter<R> *m_bf;
-};
-}
diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h
index 3783b38..4e51037 100644
--- a/include/shard/FSTrie.h
+++ b/include/shard/FSTrie.h
@@ -26,6 +26,8 @@ namespace de {
 
 template <KVPInterface R>
 class FSTrie {
+public:
+    typedef R RECORD;
 private:
 
     typedef decltype(R::key) K;
@@ -80,7 +82,7 @@ public:
         delete[] temp_buffer;
     }
 
-    FSTrie(std::vector<FSTrie*> &shards) 
+    FSTrie(std::vector<FSTrie*> const &shards) 
         : m_data(nullptr)
         , m_reccnt(0)
         , m_alloc_size(0)
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 1cca506..64c0b2b 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -1,8 +1,8 @@
 /*
  * include/shard/ISAMTree.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -12,258 +12,246 @@
  */
 #pragma once
 
-#include <vector>
 #include <cassert>
+#include <vector>
 
 #include "framework/ShardRequirements.h"
 
-#include "util/bf_config.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/SortedMerge.h"
+#include "util/bf_config.h"
 
-using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
 using psudb::byte;
+using psudb::CACHELINE_SIZE;
 
 namespace de {
 
-template <KVPInterface R>
-class ISAMTree {
+template <KVPInterface R> class ISAMTree {
 private:
+  typedef decltype(R::key) K;
+  typedef decltype(R::value) V;
 
-typedef decltype(R::key) K;
-typedef decltype(R::value) V;
-
-constexpr static size_t NODE_SZ = 256;
-constexpr static size_t INTERNAL_FANOUT = NODE_SZ / (sizeof(K) + sizeof(byte*));
+  constexpr static size_t NODE_SZ = 256;
+  constexpr static size_t INTERNAL_FANOUT =
+      NODE_SZ / (sizeof(K) + sizeof(byte *));
 
-struct InternalNode {
+  struct InternalNode {
     K keys[INTERNAL_FANOUT];
-    byte* child[INTERNAL_FANOUT];
-};
-
-static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match");
+    byte *child[INTERNAL_FANOUT];
+  };
 
-constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R);
+  static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match");
 
+  constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R);
 
 public:
-    ISAMTree(BufferView<R> buffer)
-        : m_bf(nullptr)
-        , m_isam_nodes(nullptr)
-        , m_root(nullptr)
-        , m_reccnt(0)
-        , m_tombstone_cnt(0)
-        , m_internal_node_cnt(0)
-        , m_deleted_cnt(0)
-        , m_alloc_size(0)
-    {
-        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
-                                               buffer.get_record_count() * 
-                                                 sizeof(Wrapped<R>), 
-                                               (byte**) &m_data);
-
-        auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
-        m_reccnt = res.record_count;
-        m_tombstone_cnt = res.tombstone_count;
-
-        if (m_reccnt > 0) {
-            build_internal_levels();
-        }
+  typedef R RECORD;
+
+  ISAMTree(BufferView<R> buffer)
+      : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0),
+        m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0),
+        m_alloc_size(0) {
+    m_alloc_size = psudb::sf_aligned_alloc(
+        CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>),
+        (byte **)&m_data);
+
+    auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
+    m_reccnt = res.record_count;
+    m_tombstone_cnt = res.tombstone_count;
+
+    if (m_reccnt > 0) {
+      build_internal_levels();
+    }
+  }
+
+  ISAMTree(std::vector<ISAMTree *> const &shards)
+      : m_bf(nullptr), m_isam_nodes(nullptr), m_root(nullptr), m_reccnt(0),
+        m_tombstone_cnt(0), m_internal_node_cnt(0), m_deleted_cnt(0),
+        m_alloc_size(0) {
+    size_t attemp_reccnt = 0;
+    size_t tombstone_count = 0;
+    auto cursors =
+        build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count);
+
+    m_bf = nullptr;
+    m_alloc_size = psudb::sf_aligned_alloc(
+        CACHELINE_SIZE, attemp_reccnt * sizeof(Wrapped<R>), (byte **)&m_data);
+
+    auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+    m_reccnt = res.record_count;
+    m_tombstone_cnt = res.tombstone_count;
+
+    if (m_reccnt > 0) {
+      build_internal_levels();
     }
+  }
 
-    ISAMTree(std::vector<ISAMTree*> &shards)
-        : m_bf(nullptr) 
-        , m_isam_nodes(nullptr)
-        , m_root(nullptr)
-        , m_reccnt(0)
-        , m_tombstone_cnt(0)
-        , m_internal_node_cnt(0)
-        , m_deleted_cnt(0)
-        , m_alloc_size(0)
-    {
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count);
-
-        m_bf = nullptr;
-        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
-                                               attemp_reccnt * sizeof(Wrapped<R>),
-                                               (byte **) &m_data);
-
-        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
-        m_reccnt = res.record_count;
-        m_tombstone_cnt = res.tombstone_count;
-
-        if (m_reccnt > 0) {
-            build_internal_levels();
-        }
+  ~ISAMTree() {
+    free(m_data);
+    free(m_isam_nodes);
+    delete m_bf;
+  }
+
+  Wrapped<R> *point_lookup(const R &rec, bool filter = false) {
+    if (filter && !m_bf->lookup(rec)) {
+      return nullptr;
     }
 
-    ~ISAMTree() {
-        free(m_data);
-        free(m_isam_nodes);
-        delete m_bf;
+    size_t idx = get_lower_bound(rec.key);
+    if (idx >= m_reccnt) {
+      return nullptr;
     }
 
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
+    while (idx < m_reccnt && m_data[idx].rec < rec)
+      ++idx;
 
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
+    if (m_data[idx].rec == rec) {
+      return m_data + idx;
+    }
 
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+    return nullptr;
+  }
 
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
+  Wrapped<R> *get_data() const { return m_data; }
 
-        return nullptr;
-    }
+  size_t get_record_count() const { return m_reccnt; }
 
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
+  size_t get_tombstone_count() const { return m_tombstone_cnt; }
 
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
+  size_t get_memory_usage() const { return m_internal_node_cnt * NODE_SZ; }
 
+  size_t get_aux_memory_usage() const { return (m_bf) ? m_bf->memory_usage() : 0; }
 
-    size_t get_memory_usage() {
-        return m_internal_node_cnt * NODE_SZ;
-    }
+  /* SortedShardInterface methods */
+  size_t get_lower_bound(const K &key) const {
+    const InternalNode *now = m_root;
+    while (!is_leaf(reinterpret_cast<const byte *>(now))) {
+      const InternalNode *next = nullptr;
+      for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
+        if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
+          next = reinterpret_cast<InternalNode *>(now->child[i]);
+          break;
+        }
+      }
 
-    size_t get_aux_memory_usage() {
-        return (m_bf) ? m_bf->memory_usage() : 0;
+      now = next ? next
+                 : reinterpret_cast<const InternalNode *>(
+                       now->child[INTERNAL_FANOUT - 1]);
     }
 
-    /* SortedShardInterface methods */
-    size_t get_lower_bound(const K& key) const {
-        const InternalNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const byte*>(now))) {
-            const InternalNode* next = nullptr;
-            for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
-                if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
-                    next = reinterpret_cast<InternalNode*>(now->child[i]);
-                    break;
-                }
-            }
-
-            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
+    const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now);
+    while (pos < m_data + m_reccnt && pos->rec.key < key)
+      pos++;
+
+    return pos - m_data;
+  }
+
+  size_t get_upper_bound(const K &key) const {
+    const InternalNode *now = m_root;
+    while (!is_leaf(reinterpret_cast<const byte *>(now))) {
+      const InternalNode *next = nullptr;
+      for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
+        if (now->child[i + 1] == nullptr || key < now->keys[i]) {
+          next = reinterpret_cast<InternalNode *>(now->child[i]);
+          break;
         }
+      }
 
-        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
-        while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
-
-        return pos - m_data;
+      now = next ? next
+                 : reinterpret_cast<const InternalNode *>(
+                       now->child[INTERNAL_FANOUT - 1]);
     }
 
-    size_t get_upper_bound(const K& key) const {
-        const InternalNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const byte*>(now))) {
-            const InternalNode* next = nullptr;
-            for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
-                if (now->child[i + 1] == nullptr || key < now->keys[i]) {
-                    next = reinterpret_cast<InternalNode*>(now->child[i]);
-                    break;
-                }
-            }
-
-            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
-        }
-
-        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
-        while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
+    const Wrapped<R> *pos = reinterpret_cast<const Wrapped<R> *>(now);
+    while (pos < m_data + m_reccnt && pos->rec.key <= key)
+      pos++;
 
-        return pos - m_data;
-    }
+    return pos - m_data;
+  }
 
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        return (idx < m_reccnt) ? m_data + idx : nullptr;
-    }
+  const Wrapped<R> *get_record_at(size_t idx) const {
+    return (idx < m_reccnt) ? m_data + idx : nullptr;
+  }
 
 private:
-    void build_internal_levels() {
-        size_t n_leaf_nodes = m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0);
-
-        size_t level_node_cnt = n_leaf_nodes;
-        size_t node_cnt = 0;
-        do {
-            level_node_cnt = level_node_cnt / INTERNAL_FANOUT + (level_node_cnt % INTERNAL_FANOUT != 0);
-            node_cnt += level_node_cnt;
-        } while (level_node_cnt > 1);
-
-        m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, (byte**) &m_isam_nodes);
-        m_internal_node_cnt = node_cnt;
-
-        InternalNode* current_node = m_isam_nodes;
-
-        const Wrapped<R>* leaf_base = m_data;
-        const Wrapped<R>* leaf_stop = m_data + m_reccnt;
-        while (leaf_base < leaf_stop) {
-            size_t fanout = 0;
-            for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
-                auto rec_ptr = leaf_base + LEAF_FANOUT * i;
-                if (rec_ptr >= leaf_stop) break;
-                const Wrapped<R>* sep_key = std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1);
-                current_node->keys[i] = sep_key->rec.key;
-                current_node->child[i] = (byte*)rec_ptr;
-                ++fanout;
-            }
-            current_node++;
-            leaf_base += fanout * LEAF_FANOUT;
-        }
-
-        auto level_start = m_isam_nodes;
-        auto level_stop = current_node;
-        auto current_level_node_cnt = level_stop - level_start;
-        while (current_level_node_cnt > 1) {
-            auto now = level_start;
-            while (now < level_stop) {
-                size_t child_cnt = 0;
-                for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
-                    auto node_ptr = now + i;
-                    ++child_cnt;
-                    if (node_ptr >= level_stop) break;
-                    current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1];
-                    current_node->child[i] = (byte*)node_ptr;
-                }
-                now += child_cnt;
-                current_node++;
-            }
-            level_start = level_stop;
-            level_stop = current_node;
-            current_level_node_cnt = level_stop - level_start;
-        }
-        
-        assert(current_level_node_cnt == 1);
-        m_root = level_start;
+  void build_internal_levels() {
+    size_t n_leaf_nodes =
+        m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0);
+
+    size_t level_node_cnt = n_leaf_nodes;
+    size_t node_cnt = 0;
+    do {
+      level_node_cnt = level_node_cnt / INTERNAL_FANOUT +
+                       (level_node_cnt % INTERNAL_FANOUT != 0);
+      node_cnt += level_node_cnt;
+    } while (level_node_cnt > 1);
+
+    m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ,
+                                             (byte **)&m_isam_nodes);
+    m_internal_node_cnt = node_cnt;
+
+    InternalNode *current_node = m_isam_nodes;
+
+    const Wrapped<R> *leaf_base = m_data;
+    const Wrapped<R> *leaf_stop = m_data + m_reccnt;
+    while (leaf_base < leaf_stop) {
+      size_t fanout = 0;
+      for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
+        auto rec_ptr = leaf_base + LEAF_FANOUT * i;
+        if (rec_ptr >= leaf_stop)
+          break;
+        const Wrapped<R> *sep_key =
+            std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1);
+        current_node->keys[i] = sep_key->rec.key;
+        current_node->child[i] = (byte *)rec_ptr;
+        ++fanout;
+      }
+      current_node++;
+      leaf_base += fanout * LEAF_FANOUT;
     }
 
-    bool is_leaf(const byte* ptr) const {
-        return ptr >= (const byte*)m_data && ptr < (const byte*)(m_data + m_reccnt);
+    auto level_start = m_isam_nodes;
+    auto level_stop = current_node;
+    auto current_level_node_cnt = level_stop - level_start;
+    while (current_level_node_cnt > 1) {
+      auto now = level_start;
+      while (now < level_stop) {
+        size_t child_cnt = 0;
+        for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
+          auto node_ptr = now + i;
+          ++child_cnt;
+          if (node_ptr >= level_stop)
+            break;
+          current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1];
+          current_node->child[i] = (byte *)node_ptr;
+        }
+        now += child_cnt;
+        current_node++;
+      }
+      level_start = level_stop;
+      level_stop = current_node;
+      current_level_node_cnt = level_stop - level_start;
     }
 
-    psudb::BloomFilter<R> *m_bf;
-    InternalNode* m_isam_nodes;
-    InternalNode* m_root;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_internal_node_cnt;
-    size_t m_deleted_cnt;
-    size_t m_alloc_size;
-
-    Wrapped<R>* m_data;
+    assert(current_level_node_cnt == 1);
+    m_root = level_start;
+  }
+
+  bool is_leaf(const byte *ptr) const {
+    return ptr >= (const byte *)m_data &&
+           ptr < (const byte *)(m_data + m_reccnt);
+  }
+
+  psudb::BloomFilter<R> *m_bf;
+  InternalNode *m_isam_nodes;
+  InternalNode *m_root;
+  size_t m_reccnt;
+  size_t m_tombstone_cnt;
+  size_t m_internal_node_cnt;
+  size_t m_deleted_cnt;
+  size_t m_alloc_size;
+
+  Wrapped<R> *m_data;
 };
-}
+} // namespace de
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 509796b..7d1f492 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -33,6 +33,8 @@ namespace de {
 
 template <RecordInterface R, size_t epsilon=128>
 class PGM {
+public:
+    typedef R RECORD;
 private:
     typedef decltype(R::key) K;
     typedef decltype(R::value) V;
@@ -109,7 +111,7 @@ public:
         }
     }
 
-    PGM(std::vector<PGM*> shards)
+    PGM(std::vector<PGM*> const &shards)
         : m_data(nullptr)
         , m_bf(nullptr)
         , m_reccnt(0)
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 581277e..9d8c3bb 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -30,6 +30,8 @@ namespace de {
 
 template <KVPInterface R, size_t E=1024>
 class TrieSpline {
+public:
+    typedef R RECORD;
 private:
     typedef decltype(R::key) K;
     typedef decltype(R::value) V;
@@ -122,7 +124,7 @@ public:
         }
     }
 
-    TrieSpline(std::vector<TrieSpline*> &shards) 
+    TrieSpline(std::vector<TrieSpline*> const &shards) 
         : m_reccnt(0)
         , m_tombstone_cnt(0)
         , m_alloc_size(0)
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index d5a2393..477db5c 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -21,13 +21,15 @@
 
 using psudb::CACHELINE_SIZE;
 using psudb::PriorityQueue;
-using psudb::queue_record;
 using psudb::byte;
 
 namespace de {
 
 template <NDRecordInterface R, size_t LEAFSZ=100, bool HMAP=false>
 class VPTree {
+public:
+    typedef R RECORD;
+
 private:
     struct vpnode {
         size_t start;
@@ -50,7 +52,7 @@ private:
 
 public:
     VPTree(BufferView<R> buffer)
-    : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
+    : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) {
 
 
         m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
@@ -59,8 +61,6 @@ public:
                                                (byte**) &m_data);
 
         m_ptrs = new vp_ptr[buffer.get_record_count()];
-
-        size_t offset = 0;
         m_reccnt = 0;
 
         // FIXME: will eventually need to figure out tombstones
@@ -87,7 +87,7 @@ public:
     }
 
     VPTree(std::vector<VPTree*> shards) 
-    : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
+    : m_reccnt(0), m_tombstone_cnt(0), m_node_cnt(0), m_root(nullptr) {
 
         size_t attemp_reccnt = 0;
         for (size_t i=0; i<shards.size(); i++) {
@@ -363,7 +363,6 @@ private:
 
         if (d < *farthest) {
             if (pq.size() == k) {
-                auto t = pq.peek().data->rec;
                 pq.pop();
             }
             pq.push(m_ptrs[node->start].ptr);
diff --git a/include/util/Cursor.h b/include/util/Cursor.h
index e8ba53d..e7963b1 100644
--- a/include/util/Cursor.h
+++ b/include/util/Cursor.h
@@ -1,8 +1,8 @@
 /*
  * include/util/Cursor.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -21,16 +21,15 @@
 #include <vector>
 
 namespace de {
-template<typename R>
-struct Cursor {
-    R *ptr;
-    R *end;
-    size_t cur_rec_idx;
-    size_t rec_cnt;
+template <typename R> struct Cursor {
+  const R *ptr;
+  const R *end;
+  size_t cur_rec_idx;
+  size_t rec_cnt;
 
-    friend bool operator==(const Cursor &a, const Cursor &b) {
-        return a.ptr == b.ptr && a.end == b.end;
-    }
+  friend bool operator==(const Cursor &a, const Cursor &b) {
+    return a.ptr == b.ptr && a.end == b.end;
+  }
 };
 
 /*
@@ -43,51 +42,55 @@ struct Cursor {
  * be updated to be equal to end, and false will be returned. Iterators will
  * not be closed.
  */
-template<typename R>
-inline static bool advance_cursor(Cursor<R> &cur) {
-    cur.ptr++;
-    cur.cur_rec_idx++;
+template <typename R> inline static bool advance_cursor(Cursor<R> &cur) {
+  cur.ptr++;
+  cur.cur_rec_idx++;
 
-    if (cur.cur_rec_idx >= cur.rec_cnt) return false;
+  if (cur.cur_rec_idx >= cur.rec_cnt)
+    return false;
 
-    if (cur.ptr >= cur.end) {
-        return false;
-    }
-    return true;
+  if (cur.ptr >= cur.end) {
+    return false;
+  }
+  return true;
 }
 
 /*
  *   Process the list of cursors to return the cursor containing the next
  *   largest element. Does not advance any of the cursors. If current is
- *   specified, then skip the current head of that cursor during checking. 
- *   This allows for "peaking" at the next largest element after the current 
+ *   specified, then skip the current head of that cursor during checking.
+ *   This allows for "peaking" at the next largest element after the current
  *   largest is processed.
  */
 template <typename R>
-inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) {
-    const R *min_rec = nullptr;
-    Cursor<R> *result = nullptr;
-    // FIXME: for large cursor vectors, it may be worth it to use a
-    //        PriorityQueue here instead of scanning.
-    for (size_t i=0; i< cursors.size(); i++) {
-        if (cursors[i] == (Cursor<R>) {0} ) continue;
-
-        const R *rec = (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr;
-        if (rec >= cursors[i].end) continue;
+inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors,
+                                  Cursor<R> *current = nullptr) {
+  const R *min_rec = nullptr;
+  Cursor<R> *result = nullptr;
+  // FIXME: for large cursor vectors, it may be worth it to use a
+  //        PriorityQueue here instead of scanning.
+  for (size_t i = 0; i < cursors.size(); i++) {
+    if (cursors[i] == (Cursor<R>){0})
+      continue;
 
-        if (min_rec == nullptr) {
-            result = &cursors[i];
-            min_rec = rec;
-            continue;
-        }
+    const R *rec =
+        (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr;
+    if (rec >= cursors[i].end)
+      continue;
 
-        if (*rec < *min_rec) {
-            result = &cursors[i];
-            min_rec = rec;
-        }
+    if (min_rec == nullptr) {
+      result = &cursors[i];
+      min_rec = rec;
+      continue;
     }
 
-    return result;
-} 
+    if (*rec < *min_rec) {
+      result = &cursors[i];
+      min_rec = rec;
+    }
+  }
 
+  return result;
 }
+
+} // namespace de
diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h
index c149189..b0a3215 100644
--- a/include/util/SortedMerge.h
+++ b/include/util/SortedMerge.h
@@ -1,72 +1,78 @@
 /*
  * include/util/SortedMerge.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
  * A sorted array merge routine for use in Shard construction, as many
- * shards will use a sorted array to represent their data. Also encapsulates 
+ * shards will use a sorted array to represent their data. Also encapsulates
  * the necessary tombstone-cancellation logic.
  *
- * FIXME: include generic per-record processing functionality for Shards that 
+ * FIXME: include generic per-record processing functionality for Shards that
  * need it, to avoid needing to reprocess the array in the shard after
  * creation.
  */
 #pragma once
 
-#include "util/Cursor.h"
+#include <algorithm>
+
 #include "framework/interface/Shard.h"
 #include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
 
 namespace de {
 
-using psudb::PriorityQueue;
 using psudb::BloomFilter;
-using psudb::queue_record;
 using psudb::byte;
 using psudb::CACHELINE_SIZE;
+using psudb::PriorityQueue;
+using psudb::queue_record;
 
 /*
- * A simple struct to return record_count and tombstone_count information 
- * back to the caller. Could've been an std::pair, but I like the more 
+ * A simple struct to return record_count and tombstone_count information
+ * back to the caller. Could've been an std::pair, but I like the more
  * explicit names.
  */
 struct merge_info {
-    size_t record_count;
-    size_t tombstone_count;
+  size_t record_count;
+  size_t tombstone_count;
 };
 
 /*
  * Build a vector of cursors corresponding to the records contained within
  * a vector of shards. The cursor at index i in the output will correspond
- * to the shard at index i in the input. 
+ * to the shard at index i in the input.
  *
  * The values of reccnt and tscnt will be updated with the sum of the
  * records contained within the shards. Note that these counts include deleted
  * records that may be removed during shard construction, and so constitute
  * upper bounds only.
  */
-template <RecordInterface R, ShardInterface<R> S>
-static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) {
-    std::vector<Cursor<Wrapped<R>>> cursors;
-    cursors.reserve(shards.size());
-
-    *reccnt = 0;
-    *tscnt = 0;
-    
-    for (size_t i = 0; i < shards.size(); ++i) {
-        if (shards[i]) {
-            auto base = shards[i]->get_data();
-            cursors.emplace_back(Cursor<Wrapped<R>>{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-            *reccnt += shards[i]->get_record_count();
-            *tscnt += shards[i]->get_tombstone_count();
-        } else {
-            cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-        }
+template <RecordInterface R, ShardInterface S>
+static std::vector<Cursor<Wrapped<R>>>
+build_cursor_vec(std::vector<S *> const &shards, size_t *reccnt,
+                 size_t *tscnt) {
+  std::vector<Cursor<Wrapped<R>>> cursors;
+  cursors.reserve(shards.size());
+
+  *reccnt = 0;
+  *tscnt = 0;
+
+  for (size_t i = 0; i < shards.size(); ++i) {
+    if (shards[i]) {
+      auto base = shards[i]->get_data();
+      cursors.emplace_back(
+          Cursor<Wrapped<R>>{base, base + shards[i]->get_record_count(), 0,
+                             shards[i]->get_record_count()});
+      *reccnt += shards[i]->get_record_count();
+      *tscnt += shards[i]->get_tombstone_count();
+    } else {
+      cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
     }
+  }
 
-    return cursors;
+  return cursors;
 }
 
 /*
@@ -80,126 +86,128 @@ static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards,
  * program will be aborted if the allocation fails.
  */
 template <RecordInterface R>
-static merge_info sorted_array_from_bufferview(BufferView<R> bv, 
-                                               Wrapped<R> *buffer,
-                                               psudb::BloomFilter<R> *bf=nullptr) {
-    /*
-     * Copy the contents of the buffer view into a temporary buffer, and
-     * sort them. We still need to iterate over these temporary records to 
-     * apply tombstone/deleted record filtering, as well as any possible
-     * per-record processing that is required by the shard being built.
-     */
-    auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, 
-                                                               bv.get_record_count(), 
-                                                               sizeof(Wrapped<R>));
-    bv.copy_to_buffer((byte *) temp_buffer);
-
-    auto base = temp_buffer;
-    auto stop = base + bv.get_record_count();
-    std::sort(base, stop, std::less<Wrapped<R>>());
-
-    merge_info info = {0, 0};
-
-    /* 
-     * Iterate over the temporary buffer to process the records, copying
-     * them into buffer as needed
-     */
-    while (base < stop) {
-        if (!base->is_tombstone() && (base + 1 < stop)
-            && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
-            base += 2;
-            continue;
-        } else if (base->is_deleted()) {
-            base += 1;
-            continue;
-        }
-
-        // FIXME: this shouldn't be necessary, but the tagged record
-        // bypass doesn't seem to be working on this code-path, so this
-        // ensures that tagged records from the buffer are able to be
-        // dropped, eventually. It should only need to be &= 1
-        base->header &= 3;
-        buffer[info.record_count++] = *base;
-
-        if (base->is_tombstone()) {
-            info.tombstone_count++;
-            if (bf){
-                bf->insert(base->rec);
-            }
-        }
+static merge_info
+sorted_array_from_bufferview(BufferView<R> bv, Wrapped<R> *buffer,
+                             psudb::BloomFilter<R> *bf = nullptr) {
+  /*
+   * Copy the contents of the buffer view into a temporary buffer, and
+   * sort them. We still need to iterate over these temporary records to
+   * apply tombstone/deleted record filtering, as well as any possible
+   * per-record processing that is required by the shard being built.
+   */
+  auto temp_buffer = (Wrapped<R> *)psudb::sf_aligned_calloc(
+      CACHELINE_SIZE, bv.get_record_count(), sizeof(Wrapped<R>));
+  bv.copy_to_buffer((byte *)temp_buffer);
+
+  auto base = temp_buffer;
+  auto stop = base + bv.get_record_count();
+  std::sort(base, stop, std::less<Wrapped<R>>());
+
+  merge_info info = {0, 0};
+
+  /*
+   * Iterate over the temporary buffer to process the records, copying
+   * them into buffer as needed
+   */
+  while (base < stop) {
+    if (!base->is_tombstone() && (base + 1 < stop) &&
+        base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
+      base += 2;
+      continue;
+    } else if (base->is_deleted()) {
+      base += 1;
+      continue;
+    }
 
-        base++;
+    // FIXME: this shouldn't be necessary, but the tagged record
+    // bypass doesn't seem to be working on this code-path, so this
+    // ensures that tagged records from the buffer are able to be
+    // dropped, eventually. It should only need to be &= 1
+    base->header &= 3;
+    buffer[info.record_count++] = *base;
+
+    if (base->is_tombstone()) {
+      info.tombstone_count++;
+      if (bf) {
+        bf->insert(base->rec);
+      }
     }
 
-    free(temp_buffer);
-    return info;
+    base++;
+  }
+
+  free(temp_buffer);
+  return info;
 }
 
 /*
  * Perform a sorted merge of the records within cursors into the provided
  * buffer. Includes tombstone and tagged delete cancellation logic, and
- * will insert tombstones into a bloom filter, if one is provided. 
+ * will insert tombstones into a bloom filter, if one is provided.
  *
  * The behavior of this function is undefined if the provided buffer does
  * not have space to contain all of the records within the input cursors.
  */
 template <RecordInterface R>
-static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors, 
-                              Wrapped<R> *buffer, 
-                              psudb::BloomFilter<R> *bf=nullptr) {
-    
-    // FIXME: For smaller cursor arrays, it may be more efficient to skip
-    //        the priority queue and just do a scan.
-    PriorityQueue<Wrapped<R>> pq(cursors.size());
-    for (size_t i=0; i<cursors.size(); i++) {
-        pq.push(cursors[i].ptr, i);
-    }
-
-    merge_info info = {0, 0};
-    while (pq.size()) {
-        auto now = pq.peek();
-        auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-        /* 
-         * if the current record is not a tombstone, and the next record is
-         * a tombstone that matches the current one, then the current one
-         * has been deleted, and both it and its tombstone can be skipped
-         * over.
+static merge_info sorted_array_merge(std::vector<Cursor<Wrapped<R>>> &cursors,
+                                     Wrapped<R> *buffer,
+                                     psudb::BloomFilter<R> *bf = nullptr) {
+
+  // FIXME: For smaller cursor arrays, it may be more efficient to skip
+  //        the priority queue and just do a scan.
+  PriorityQueue<Wrapped<R>> pq(cursors.size());
+  for (size_t i = 0; i < cursors.size(); i++) {
+    pq.push(cursors[i].ptr, i);
+  }
+
+  merge_info info = {0, 0};
+  while (pq.size()) {
+    auto now = pq.peek();
+    auto next =
+        pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+    /*
+     * if the current record is not a tombstone, and the next record is
+     * a tombstone that matches the current one, then the current one
+     * has been deleted, and both it and its tombstone can be skipped
+     * over.
+     */
+    if (!now.data->is_tombstone() && next.data != nullptr &&
+        now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+      pq.pop();
+      pq.pop();
+      auto &cursor1 = cursors[now.version];
+      auto &cursor2 = cursors[next.version];
+      if (advance_cursor(cursor1))
+        pq.push(cursor1.ptr, now.version);
+      if (advance_cursor(cursor2))
+        pq.push(cursor2.ptr, next.version);
+    } else {
+      auto &cursor = cursors[now.version];
+      /* skip over records that have been deleted via tagging */
+      if (!cursor.ptr->is_deleted()) {
+        buffer[info.record_count++] = *cursor.ptr;
+
+        /*
+         * if the record is a tombstone, increment the ts count and
+         * insert it into the bloom filter if one has been
+         * provided.
          */
-        if (!now.data->is_tombstone() && next.data != nullptr &&
-            now.data->rec == next.data->rec && next.data->is_tombstone()) {
-            
-            pq.pop(); pq.pop();
-            auto& cursor1 = cursors[now.version];
-            auto& cursor2 = cursors[next.version];
-            if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
-            if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
-        } else {
-            auto& cursor = cursors[now.version];
-            /* skip over records that have been deleted via tagging */
-            if (!cursor.ptr->is_deleted()) {
-                buffer[info.record_count++] = *cursor.ptr;
-
-                /*  
-                 * if the record is a tombstone, increment the ts count and 
-                 * insert it into the bloom filter if one has been
-                 * provided.
-                 */
-                if (cursor.ptr->is_tombstone()) {
-                    info.tombstone_count++;
-                    if (bf) {
-                        bf->insert(cursor.ptr->rec);
-                    }
-                }
-            }
-            pq.pop();
-            
-            if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
+        if (cursor.ptr->is_tombstone()) {
+          info.tombstone_count++;
+          if (bf) {
+            bf->insert(cursor.ptr->rec);
+          }
         }
+      }
+      pq.pop();
+
+      if (advance_cursor(cursor))
+        pq.push(cursor.ptr, now.version);
     }
+  }
 
-    return info;
+  return info;
 }
 
-
-
-}
+} // namespace de
diff --git a/include/util/bf_config.h b/include/util/bf_config.h
index 9f29ed7..836e452 100644
--- a/include/util/bf_config.h
+++ b/include/util/bf_config.h
@@ -1,8 +1,8 @@
 /*
  * include/util/bf_config.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                         Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -26,19 +26,15 @@ static double BF_FPR = .01;
 static size_t BF_HASH_FUNCS = 7;
 
 /*
- * Adjust the value of BF_FPR. The argument must be on the interval 
+ * Adjust the value of BF_FPR. The argument must be on the interval
  * (0, 1), or the behavior of bloom filters is undefined.
  */
-static void BF_SET_FPR(double fpr) {
-    BF_FPR = fpr;
-}
+[[maybe_unused]] static void BF_SET_FPR(double fpr) { BF_FPR = fpr; }
 
 /*
  * Adjust the value of BF_HASH_FUNCS. The argument must be on the interval
  * (0, INT64_MAX], or the behavior of bloom filters is undefined.
  */
-static void BF_SET_HASHFUNC(size_t func_cnt) {
-    BF_HASH_FUNCS = func_cnt;
-}
+[[maybe_unused]] static void BF_SET_HASHFUNC(size_t func_cnt) { BF_HASH_FUNCS = func_cnt; }
 
-}
+} // namespace de
diff --git a/include/util/types.h b/include/util/types.h
index cf61412..b8a1343 100644
--- a/include/util/types.h
+++ b/include/util/types.h
@@ -1,7 +1,7 @@
 /*
  * include/util/types.h
  *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023-2024 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -17,10 +17,10 @@
  */
 #pragma once
 
+#include <cassert>
 #include <cstdint>
 #include <cstdlib>
 #include <vector>
-#include <cassert>
 
 namespace de {
 
@@ -30,14 +30,14 @@ typedef uint32_t PageNum;
 /*
  * Byte offset within a page. Also used for lengths of records, etc.,
  * within the codebase. size_t isn't necessary, as the maximum offset
- * is only parm::PAGE_SIZE 
+ * is only parm::PAGE_SIZE
  */
 typedef uint16_t PageOffset;
 
 /* A unique identifier for a frame within a buffer or cache */
 typedef int32_t FrameId;
 
-/* 
+/*
  * A unique timestamp for use in MVCC concurrency control. Currently stored in
  * record headers, but not used by anything.
  */
@@ -45,7 +45,7 @@ typedef uint32_t Timestamp;
 const Timestamp TIMESTAMP_MIN = 0;
 const Timestamp TIMESTAMP_MAX = UINT32_MAX;
 
-/* 
+/*
  * Invalid values for various IDs. Used throughout the code base to indicate
  * uninitialized values and error conditions.
  */
@@ -60,90 +60,85 @@ const FrameId INVALID_FRID = -1;
  * as a contiguous index space.
  */
 struct ShardID {
-    ssize_t level_idx;
-    ssize_t shard_idx;
+  ssize_t level_idx;
+  ssize_t shard_idx;
 
-    friend bool operator==(const ShardID &shid1, const ShardID &shid2) {
-        return shid1.level_idx == shid2.level_idx && shid1.shard_idx == shid2.shard_idx;
-    }
+  friend bool operator==(const ShardID &shid1, const ShardID &shid2) {
+    return shid1.level_idx == shid2.level_idx &&
+           shid1.shard_idx == shid2.shard_idx;
+  }
 };
 
-/* A placeholder for an invalid shard--also used to indicate the mutable buffer */
+/* 
+ * A placeholder for an invalid shard--also used to indicate the 
+ * mutable buffer
+ */
 const ShardID INVALID_SHID = {-1, -1};
 
 typedef ssize_t level_index;
 
 typedef struct ReconstructionTask {
-    std::vector<level_index> sources;
-    level_index target;
-    size_t reccnt;
+  std::vector<level_index> sources;
+  level_index target;
+  size_t reccnt;
 
-    void add_source(level_index source, size_t cnt) {
-        sources.push_back(source);
-        reccnt += cnt;
-    }
+  void add_source(level_index source, size_t cnt) {
+    sources.push_back(source);
+    reccnt += cnt;
+  }
 
 } ReconstructionTask;
 
 class ReconstructionVector {
 public:
-    ReconstructionVector() 
-    : total_reccnt(0) {}
+  ReconstructionVector() : total_reccnt(0) {}
 
-    ~ReconstructionVector() = default;
+  ~ReconstructionVector() = default;
 
-    ReconstructionTask operator[](size_t idx) {
-        return m_tasks[idx];
-    }
+  ReconstructionTask operator[](size_t idx) { return m_tasks[idx]; }
 
-    void add_reconstruction(level_index source, level_index target, size_t reccnt) {
-        m_tasks.push_back({{source}, target, reccnt});
-        total_reccnt += reccnt;
-    }
+  void add_reconstruction(level_index source, level_index target,
+                          size_t reccnt) {
+    m_tasks.push_back({{source}, target, reccnt});
+    total_reccnt += reccnt;
+  }
 
-    void add_reconstruction(ReconstructionTask task) {
-        m_tasks.push_back(task);
-    }
+  void add_reconstruction(ReconstructionTask task) { m_tasks.push_back(task); }
 
-    ReconstructionTask remove_reconstruction(size_t idx) {
-        assert(idx < m_tasks.size());
-        auto task = m_tasks[idx];
+  ReconstructionTask remove_reconstruction(size_t idx) {
+    assert(idx < m_tasks.size());
+    auto task = m_tasks[idx];
 
-        m_tasks.erase(m_tasks.begin() + idx);
-        total_reccnt -= task.reccnt;
+    m_tasks.erase(m_tasks.begin() + idx);
+    total_reccnt -= task.reccnt;
 
-        return task;
-    }
+    return task;
+  }
 
-    ReconstructionTask remove_smallest_reconstruction() {
-        size_t min_size = m_tasks[0].reccnt;
-        size_t idx = 0;
-        for (size_t i=1; i<m_tasks.size(); i++) {
-            if (m_tasks[i].reccnt < min_size) {
-                min_size = m_tasks[i].reccnt;
-                idx = i;
-            }
-        }
-
-        auto task = m_tasks[idx];
-        m_tasks.erase(m_tasks.begin() + idx);
-        total_reccnt -= task.reccnt;
-
-        return task;
+  ReconstructionTask remove_smallest_reconstruction() {
+    size_t min_size = m_tasks[0].reccnt;
+    size_t idx = 0;
+    for (size_t i = 1; i < m_tasks.size(); i++) {
+      if (m_tasks[i].reccnt < min_size) {
+        min_size = m_tasks[i].reccnt;
+        idx = i;
+      }
     }
 
-    size_t get_total_reccnt() {
-        return total_reccnt;
-    }
+    auto task = m_tasks[idx];
+    m_tasks.erase(m_tasks.begin() + idx);
+    total_reccnt -= task.reccnt;
 
-    size_t size() {
-        return m_tasks.size();
-    }
+    return task;
+  }
+
+  size_t get_total_reccnt() { return total_reccnt; }
 
+  size_t size() { return m_tasks.size(); }
 
 private:
-    std::vector<ReconstructionTask> m_tasks;
-    size_t total_reccnt;
+  std::vector<ReconstructionTask> m_tasks;
+  size_t total_reccnt;
 };
 
-}
+} // namespace de
diff --git a/tests/alias_tests.cpp b/tests/alias_tests.cpp
index 98d0c63..dcd3eec 100644
--- a/tests/alias_tests.cpp
+++ b/tests/alias_tests.cpp
@@ -1,7 +1,7 @@
 /*
- * tests/alias_tests.cpp
+ * tests/isam_tests.cpp
  *
- * Unit tests for Alias shard
+ * Unit tests for ISAM Tree shard
  *
  * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
  *                    Dong Xie <dongx@psu.edu>
@@ -11,27 +11,22 @@
  */
 
 #include "shard/Alias.h"
-#include "query/wss.h"
-#include "framework/structure/MutableBuffer.h"
 #include "include/testing.h"
-
-
 #include <check.h>
 
 using namespace de;
 
-typedef WRec R;
+typedef WeightedRecord<uint64_t, uint32_t, uint32_t> R;
 typedef Alias<R> Shard;
 
-
 #include "include/shard_standard.h"
-#include "include/rangequery.h"
+#include "include/wss.h"
 
 Suite *unit_testing()
 {
-    Suite *unit = suite_create("ISAMTree Shard Unit Testing");
+    Suite *unit = suite_create("Walker's Alias Shard Unit Testing");
 
-    inject_rangequery_tests(unit);
+    inject_wss_tests(unit);
     inject_shard_tests(unit);
 
     return unit;
@@ -58,4 +53,3 @@ int main()
 
     return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
 }
-
diff --git a/tests/de_bsm_tag.cpp b/tests/de_bsm_tag.cpp
new file mode 100644
index 0000000..4063cfe
--- /dev/null
+++ b/tests/de_bsm_tag.cpp
@@ -0,0 +1,61 @@
+/*
+ * tests/de_level_tomb.cpp
+ *
+ * Unit tests for Dynamic Extension Framework
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ */
+#include <set>
+#include <random>
+#include <algorithm>
+
+#include "include/testing.h"
+#include "framework/DynamicExtension.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
+
+#include <check.h>
+using namespace de;
+
+typedef Rec R;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::BSM, DeletePolicy::TAGGING, SerialScheduler> DE;
+
+#include "include/dynamic_extension.h"
+
+
+Suite *unit_testing()
+{
+    Suite *unit = suite_create("DynamicExtension: Tagging BSM Testing");
+    inject_dynamic_extension_tests(unit);
+
+    return unit;
+}
+
+
+int shard_unit_tests()
+{
+    int failed = 0;
+    Suite *unit = unit_testing();
+    SRunner *unit_shardner = srunner_create(unit);
+
+    srunner_run_all(unit_shardner, CK_NORMAL);
+    failed = srunner_ntests_failed(unit_shardner);
+    srunner_free(unit_shardner);
+
+    return failed;
+}
+
+
+int main() 
+{
+    int unit_failed = shard_unit_tests();
+
+    return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tests/de_bsm_tomb.cpp b/tests/de_bsm_tomb.cpp
index 493440e..3a24e87 100644
--- a/tests/de_bsm_tomb.cpp
+++ b/tests/de_bsm_tomb.cpp
@@ -22,7 +22,10 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::BSM, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::BSM, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
 
 #include "include/dynamic_extension.h"
 
diff --git a/tests/de_level_concurrent.cpp b/tests/de_level_concurrent.cpp
index d1e0496..afd1af2 100644
--- a/tests/de_level_concurrent.cpp
+++ b/tests/de_level_concurrent.cpp
@@ -22,7 +22,10 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
 
 #include "include/concurrent_extension.h"
 
diff --git a/tests/de_level_tag.cpp b/tests/de_level_tag.cpp
index 75131c4..c175357 100644
--- a/tests/de_level_tag.cpp
+++ b/tests/de_level_tag.cpp
@@ -22,7 +22,10 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE;
 
 #include "include/dynamic_extension.h"
 
diff --git a/tests/de_level_tomb.cpp b/tests/de_level_tomb.cpp
index 6da211d..e587817 100644
--- a/tests/de_level_tomb.cpp
+++ b/tests/de_level_tomb.cpp
@@ -23,7 +23,10 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
 
 #include "include/dynamic_extension.h"
 
diff --git a/tests/de_tier_concurrent.cpp b/tests/de_tier_concurrent.cpp
index bb2ec7f..ce41dbc 100644
--- a/tests/de_tier_concurrent.cpp
+++ b/tests/de_tier_concurrent.cpp
@@ -17,12 +17,16 @@
 #include "framework/DynamicExtension.h"
 #include "shard/ISAMTree.h"
 #include "query/rangequery.h"
+#include "framework/scheduling//FIFOScheduler.h"
 
 #include <check.h>
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE;
 
 #include "include/concurrent_extension.h"
 
diff --git a/tests/de_tier_tag.cpp b/tests/de_tier_tag.cpp
index 79bb7bf..97a5299 100644
--- a/tests/de_tier_tag.cpp
+++ b/tests/de_tier_tag.cpp
@@ -23,7 +23,10 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+
+typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
 
 #include "include/dynamic_extension.h"
 
diff --git a/tests/de_tier_tomb.cpp b/tests/de_tier_tomb.cpp
index b1387bb..930d0d5 100644
--- a/tests/de_tier_tomb.cpp
+++ b/tests/de_tier_tomb.cpp
@@ -23,7 +23,9 @@
 using namespace de;
 
 typedef Rec R;
-typedef DynamicExtension<Rec, ISAMTree<R>, rq::Query<R, ISAMTree<R>>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
+typedef ISAMTree<R> S;
+typedef rq::Query<S> Q;
+typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
 
 #include "include/dynamic_extension.h"
 
diff --git a/tests/include/concurrent_extension.h b/tests/include/concurrent_extension.h
index 927a094..02bd694 100644
--- a/tests/include/concurrent_extension.h
+++ b/tests/include/concurrent_extension.h
@@ -22,17 +22,20 @@
  * should be included in the source file that includes this one, above the
  * include statement.
  */
-/*#include "testing.h"
-#include "framework/DynamicExtension.h"
-#include "framework/scheduling/FIFOScheduler.h"
-#include "shard/ISAMTree.h"
-#include "query/rangequery.h"
-#include <check.h>
-
-//using namespace de;
-//typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE;
-*/
-
+// #include "testing.h"
+// #include "framework/DynamicExtension.h"
+// //#include "framework/scheduling/FIFOScheduler.h"
+// #include "shard/ISAMTree.h"
+// #include "query/rangequery.h"
+// #include <check.h>
+// #include <set>
+// #include <random>
+
+// using namespace de;
+// typedef Rec R;
+// typedef ISAMTree<R> S;
+// typedef rq::Query<S> Q;
+// typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE> DE; //, FIFOScheduler> DE;
 
 START_TEST(t_create)
 {
@@ -164,11 +167,11 @@ START_TEST(t_range_query)
     uint64_t lower_key = keys[idx];
     uint64_t upper_key = keys[idx + 250];
 
-    rq::Parms<R> p;
+    Q::Parameters p;
     p.lower_bound = lower_key;
     p.upper_bound = upper_key;
 
-    auto result = test_de->query(&p);
+    auto result = test_de->query(std::move(p));
     auto r = result.get();
     std::sort(r.begin(), r.end());
 
@@ -203,8 +206,6 @@ START_TEST(t_tombstone_merging_01)
         records.insert({key, val});
     }
 
-    size_t deletes = 0;
-    size_t cnt=0;
     for (auto rec : records) {
         R r = {rec.first, rec.second};
         while (!test_de->insert(r)) {
@@ -220,7 +221,6 @@ START_TEST(t_tombstone_merging_01)
                 while (!test_de->erase(dr)) {
                     _mm_pause();
                 }
-                deletes++;
                 to_delete.erase(del_vec[i]);
                 deleted.insert(del_vec[i]);
             }
@@ -258,7 +258,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
         records.insert({key, val});
     }
 
-    size_t deletes = 0;
     for (auto rec : records) {
         ck_assert_int_eq(test_de->insert(rec), 1);
 
@@ -268,7 +267,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
 
             for (size_t i=0; i<del_vec.size(); i++) {
                 test_de->erase(del_vec[i]);
-                deletes++;
                 to_delete.erase(del_vec[i]);
                 deleted.insert(del_vec[i]);
             }
@@ -304,15 +302,10 @@ START_TEST(t_static_structure)
         records.insert({key, val});
     }
 
-    size_t deletes = 0;
-    size_t t_reccnt = 0;
-    size_t k=0;
     for (auto rec : records) {
-        k++;
         while (!test_de->insert(rec)) {
             _mm_pause();
         }
-        t_reccnt++;
 
          if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
             std::vector<R> del_vec;
@@ -323,7 +316,6 @@ START_TEST(t_static_structure)
                     _mm_pause();
                 }
 
-                deletes++;
                 to_delete.erase(del_vec[i]);
                 deleted.insert(del_vec[i]);
             }
diff --git a/tests/include/dynamic_extension.h b/tests/include/dynamic_extension.h
index 6e9b16c..90c6906 100644
--- a/tests/include/dynamic_extension.h
+++ b/tests/include/dynamic_extension.h
@@ -22,18 +22,24 @@
  * should be included in the source file that includes this one, above the
  * include statement.
  */
-/*
-#include "testing.h"
-#include "framework/DynamicExtension.h"
-#include "framework/scheduling/SerialScheduler.h"
-#include "shard/ISAMTree.h"
-#include "query/rangequery.h"
-#include <check.h>
-using namespace de;
-typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
-*/
+
+// #include "testing.h"
+// #include "framework/DynamicExtension.h"
+// #include "framework/scheduling/SerialScheduler.h"
+// #include "shard/ISAMTree.h"
+// #include "query/rangequery.h"
+// #include <check.h>
+// #include <random>
+// #include <set>
+
+// using namespace de;
+// typedef Rec R;
+// typedef ISAMTree<R> S;
+// typedef rq::Query<S> Q;
+// typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
 
 
+#include "framework/util/Configuration.h"
 START_TEST(t_create)
 {
     auto test_de = new DE(100, 1000, 2);
@@ -103,7 +109,16 @@ START_TEST(t_insert_with_mem_merges)
     test_de->await_next_epoch();
 
     ck_assert_int_eq(test_de->get_record_count(), 300);
-    ck_assert_int_eq(test_de->get_height(), 1);
+
+    /* 
+     * BSM grows on every flush, so the height will be different than
+     * normal layout policies 
+     */
+    if (test_de->Layout == de::LayoutPolicy::BSM) {
+        ck_assert_int_eq(test_de->get_height(), 2);
+    } else {
+        ck_assert_int_eq(test_de->get_height(), 1);
+    }
 
     delete test_de;
 }
@@ -138,11 +153,12 @@ START_TEST(t_range_query)
     uint64_t lower_key = keys[idx];
     uint64_t upper_key = keys[idx + 250];
 
-    rq::Parms<R> p;
+    Q::Parameters p;
+
     p.lower_bound = lower_key;
     p.upper_bound = upper_key;
 
-    auto result = test_de->query(&p);
+    auto result = test_de->query(std::move(p));
     auto r = result.get();
     std::sort(r.begin(), r.end());
     ck_assert_int_eq(r.size(), 251);
@@ -176,8 +192,6 @@ START_TEST(t_tombstone_merging_01)
         records.insert({key, val});
     }
 
-    size_t deletes = 0;
-    size_t cnt=0;
     for (auto rec : records) {
         R r = {rec.first, rec.second};
         ck_assert_int_eq(test_de->insert(r), 1);
@@ -189,7 +203,6 @@ START_TEST(t_tombstone_merging_01)
             for (size_t i=0; i<del_vec.size(); i++) {
                 R dr = {del_vec[i].first, del_vec[i].second};
                 test_de->erase(dr);
-                deletes++;
                 to_delete.erase(del_vec[i]);
                 deleted.insert(del_vec[i]);
             }
@@ -209,14 +222,14 @@ START_TEST(t_tombstone_merging_01)
 }
 END_TEST
 
-DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
+[[maybe_unused]] static DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
     auto rng = gsl_rng_alloc(gsl_rng_mt19937);
 
     auto test_de = new DE(1000, 10000, 2);
 
-    std::set<R> records; 
-    std::set<R> to_delete;
-    std::set<R> deleted;
+    std::set<Rec> records; 
+    std::set<Rec> to_delete;
+    std::set<Rec> deleted;
 
     while (records.size() < reccnt) {
         uint64_t key = rand();
@@ -227,17 +240,15 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
         records.insert({key, val});
     }
 
-    size_t deletes = 0;
     for (auto rec : records) {
         ck_assert_int_eq(test_de->insert(rec), 1);
 
          if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
-            std::vector<R> del_vec;
+            std::vector<Rec> del_vec;
             std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()});
 
             for (size_t i=0; i<del_vec.size(); i++) {
                 test_de->erase(del_vec[i]);
-                deletes++;
                 to_delete.erase(del_vec[i]);
                 deleted.insert(del_vec[i]);
             }
@@ -260,9 +271,9 @@ START_TEST(t_static_structure)
     size_t reccnt = 100000;
     auto test_de = new DE(100, 1000, 2);
 
-    std::set<R> records; 
-    std::set<R> to_delete;
-    std::set<R> deleted;
+    std::set<Rec> records; 
+    std::set<Rec> to_delete;
+    std::set<Rec> deleted;
 
     while (records.size() < reccnt) {
         uint64_t key = rand();
@@ -274,15 +285,11 @@ START_TEST(t_static_structure)
     }
 
     size_t deletes = 0;
-    size_t t_reccnt = 0;
-    size_t k=0;
     for (auto rec : records) {
-        k++;
         ck_assert_int_eq(test_de->insert(rec), 1);
-        t_reccnt++;
 
          if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
-            std::vector<R> del_vec;
+            std::vector<Rec> del_vec;
             std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()});
 
             for (size_t i=0; i<del_vec.size(); i++) {
diff --git a/tests/include/irs.h b/tests/include/irs.h
new file mode 100644
index 0000000..1c5be2c
--- /dev/null
+++ b/tests/include/irs.h
@@ -0,0 +1,165 @@
+/*
+ * tests/include/irs.h
+ *
+ * Standardized unit tests for range queries against supporting
+ * shard types
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *
+ * Distributed under the Modified BSD License.
+ *
+ * WARNING: This file must be included in the main unit test set
+ *          after the definition of an appropriate Shard and R
+ *          type. In particular, R needs to implement the key-value
+ *          pair interface and Shard needs to support lower_bound. 
+ *          For other types of record and shard, you'll need to
+ *          use a different set of unit tests.
+ */
+#pragma once
+
+#include "query/irs.h"
+#include <algorithm>
+
+/*
+ * Uncomment these lines temporarily to remove errors in this file
+ * temporarily for development purposes. They should be removed prior
+ * to building, to ensure no duplicate definitions. These includes/defines
+ * should be included in the source file that includes this one, above the
+ * include statement.
+ */
+#include "shard/ISAMTree.h"
+#include "query/irs.h"
+#include "testing.h"
+#include <check.h>
+#include <gsl/gsl_rng.h>
+using namespace de;
+
+typedef Rec R;
+typedef ISAMTree<R> Shard;
+typedef irs::Query<ISAMTree<R>> Query;
+
+static gsl_rng *g_rng;
+
+START_TEST(t_irs)
+{
+    auto buffer = create_sequential_mbuffer<R>(100, 1000);
+    auto shard = Shard(buffer->get_buffer_view());
+
+    size_t k = 5;
+    irs::Query<Shard>::Parameters parms;
+    parms.lower_bound = 300;
+    parms.upper_bound = 500;
+    parms.sample_size = k;
+    parms.rng = g_rng;
+
+    auto local_query = irs::Query<Shard>::local_preproc(&shard, &parms);
+    irs::Query<Shard>::distribute_query(&parms, {local_query}, nullptr);
+
+    auto result = irs::Query<Shard>::local_query(&shard, local_query);
+    delete local_query;
+
+    ck_assert_int_eq(result.size(), k);
+    for (size_t i=0; i<result.size(); i++) {
+        ck_assert_int_le(result[i].rec.key, parms.upper_bound);
+        ck_assert_int_ge(result[i].rec.key, parms.lower_bound);
+    }
+
+    delete buffer;
+}
+END_TEST
+
+
+START_TEST(t_buffer_irs)
+{
+    auto buffer = create_sequential_mbuffer<R>(100, 1000);
+
+    size_t k = 5;
+    irs::Query<Shard>::Parameters parms;
+    parms.lower_bound = 300;
+    parms.upper_bound = 500;
+    parms.sample_size = k;
+    parms.rng = g_rng;
+
+    {
+        auto view = buffer->get_buffer_view();
+        auto query = irs::Query<Shard>::local_preproc_buffer(&view, &parms);
+        irs::Query<Shard>::distribute_query(&parms, {}, query);
+        auto result = irs::Query<Shard>::local_query_buffer(query); 
+        delete query;
+
+        ck_assert_int_le(result.size(), k);
+        for (size_t i=0; i<result.size(); i++) {
+            ck_assert_int_le(result[i].rec.key, parms.upper_bound);
+            ck_assert_int_ge(result[i].rec.key, parms.lower_bound);
+        }
+    }
+
+    delete buffer;
+}
+END_TEST
+
+
+START_TEST(t_irs_merge)
+{    
+    auto buffer1 = create_sequential_mbuffer<R>(100, 200);
+    auto buffer2 = create_sequential_mbuffer<R>(400, 1000);
+
+    auto shard1 = Shard(buffer1->get_buffer_view());
+    auto shard2 = Shard(buffer2->get_buffer_view());
+
+    size_t k = 10;
+    irs::Query<Shard>::Parameters parms;
+    parms.lower_bound = 150;
+    parms.upper_bound = 500;
+    parms.sample_size = k;
+    parms.rng = g_rng;
+
+    /* necessary to store the alias structure */
+    auto dummy_buffer_query = irs::Query<Shard>::LocalQueryBuffer();
+    dummy_buffer_query.buffer = nullptr;
+    dummy_buffer_query.sample_size = 0;
+    dummy_buffer_query.cutoff = 0;
+    dummy_buffer_query.global_parms = parms;
+    dummy_buffer_query.records = {};
+    dummy_buffer_query.alias = nullptr;
+    
+    auto query1 = irs::Query<Shard>::local_preproc(&shard1, &parms);
+    auto query2 = irs::Query<Shard>::local_preproc(&shard2, &parms);
+
+    irs::Query<Shard>::distribute_query(&parms, {query1, query2}, &dummy_buffer_query);
+
+    std::vector<std::vector<irs::Query<Shard>::LocalResultType>> results(2);
+    results[0] = irs::Query<Shard>::local_query(&shard1, query1);     
+    results[1] = irs::Query<Shard>::local_query(&shard2, query2); 
+    delete query1;
+    delete query2;
+
+    ck_assert_int_eq(results[0].size() + results[1].size(), k);
+
+    std::vector<std::vector<Wrapped<R>>> proc_results;
+
+    for (size_t j=0; j<results.size(); j++) {
+        proc_results.emplace_back(std::vector<Wrapped<R>>());
+        for (size_t i=0; i<results[j].size(); i++) {
+            proc_results[j].emplace_back(results[j][i]);
+        }
+    }
+
+    std::vector<irs::Query<Shard>::ResultType> result;
+    irs::Query<Shard>::combine(proc_results, nullptr, result);
+    ck_assert_int_eq(result.size(), k);
+
+    delete buffer1;
+    delete buffer2;
+}
+END_TEST
+
+static void inject_irs_tests(Suite *suite) {
+    g_rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+    TCase *irs = tcase_create("Independent Range Sampling Query Testing"); 
+    tcase_add_test(irs, t_irs); 
+    tcase_add_test(irs, t_buffer_irs); 
+    tcase_add_test(irs, t_irs_merge); 
+    suite_add_tcase(suite, irs);
+}
diff --git a/tests/include/pointlookup.h b/tests/include/pointlookup.h
index 71a1099..af58440 100644
--- a/tests/include/pointlookup.h
+++ b/tests/include/pointlookup.h
@@ -17,6 +17,8 @@
  */
 #pragma once
 
+#include "query/pointlookup.h"
+
 /*
  * Uncomment these lines temporarily to remove errors in this file
  * temporarily for development purposes. They should be removed prior
@@ -25,15 +27,12 @@
  * include statement.
  */
 
-//#include "shard/FSTrie.h"
-#include "query/pointlookup.h"
+#include "shard/FSTrie.h"
 #include "testing.h"
-
 #include <check.h>
-
 using namespace de;
-//typedef StringRec R;
-//typedef FSTrie<R> Shard;
+typedef StringRec R;
+typedef FSTrie<R> Shard;
 
 START_TEST(t_point_lookup_query)
 {
@@ -45,23 +44,21 @@ START_TEST(t_point_lookup_query)
         for (size_t i=0; i<bv.get_record_count(); i++) {
             auto key = bv.get(i)->rec.key;
 
-            pl::Parms<R> parms = {key};
-            auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms);
-            auto result = pl::Query<R, Shard>::query(&shard, state, &parms);
-            pl::Query<R, Shard>::delete_query_state(state);
-
+            pl::Query<Shard>::Parameters parms = {key};
+            auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms);
+            auto result = pl::Query<Shard>::local_query(&shard,local_query);
+            delete local_query;
             ck_assert_int_eq(result.size(), 1);
-            //ck_assert_str_eq(result[0].rec.key, key);
-            //ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value);
+            ck_assert_str_eq(result[0].rec.key, key);
+            ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value);
         }
 
         /* point lookup miss; result size should be 0 */
         const char *c = "computer";
-        pl::Parms<R> parms = {c};
-
-        auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms);
-        auto result = pl::Query<R, Shard>::query(&shard, state, &parms);
-        pl::Query<R, Shard>::delete_query_state(state);
+        pl::Query<Shard>::Parameters parms = {c};
+        auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms);
+        auto result = pl::Query<Shard>::local_query(&shard,local_query);
+        delete local_query;
 
         ck_assert_int_eq(result.size(), 0);
     }
@@ -78,24 +75,21 @@ START_TEST(t_buffer_point_lookup)
     {
         auto view = buffer->get_buffer_view();
         for (int i=view.get_record_count()-1; i>=0; i--) {
-            pl::Parms<R> parms = {view.get(i)->rec.key};
-
-            auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-            auto result = pl::Query<R, Shard>::buffer_query(state, &parms);
-            pl::Query<R, Shard>::delete_buffer_query_state(state);
+            pl::Query<Shard>::Parameters parms = {view.get(i)->rec.key};
+            auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms);
+            auto result = pl::Query<Shard>::local_query_buffer(local_query);
+            delete local_query;
 
             ck_assert_int_eq(result.size(), 1);
-            //ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key);
-            //ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value);
+            ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key);
+            ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value);
         }
 
         /* point lookup miss; result size should be 0 */
         const char *c = "computer";
-        pl::Parms<R> parms = {c};
-
-        auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-        auto result = pl::Query<R, Shard>::buffer_query(state, &parms);
-        pl::Query<R, Shard>::delete_buffer_query_state(state);
+        pl::Query<Shard>::Parameters parms = {c};
+        auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms);
+        auto result = pl::Query<Shard>::local_query_buffer(local_query);
 
         ck_assert_int_eq(result.size(), 0);
     }
diff --git a/tests/include/rangecount.h b/tests/include/rangecount.h
index 1951221..22189b9 100644
--- a/tests/include/rangecount.h
+++ b/tests/include/rangecount.h
@@ -1,5 +1,5 @@
 /*
- * tests/include/rangecount.h
+ * tests/include/rangequery.h
  *
  * Standardized unit tests for range queries against supporting
  * shard types
@@ -17,6 +17,9 @@
  */
 #pragma once
 
+#include "query/rangecount.h"
+#include <algorithm>
+
 /*
  * Uncomment these lines temporarily to remove errors in this file
  * temporarily for development purposes. They should be removed prior
@@ -24,30 +27,29 @@
  * should be included in the source file that includes this one, above the
  * include statement.
  */
-//#include "shard/ISAMTree.h"
-//#include "query/rangecount.h"
-//#include "testing.h"
-//#include <check.h>
-//using namespace de;
-//typedef ISAMTree<R> Shard;
-
+// #include "shard/ISAMTree.h"
+// #include "query/rangequery.h"
+// #include "testing.h"
+// #include <check.h>
+// using namespace de;
 
-#include "query/rangecount.h"
+// typedef Rec R;
+// typedef ISAMTree<R> Shard;
+// typedef rc::Query<ISAMTree<R>> Query;
 
 START_TEST(t_range_count)
 {
-    
     auto buffer = create_sequential_mbuffer<R>(100, 1000);
     auto shard = Shard(buffer->get_buffer_view());
 
-    rc::Parms<R> parms = {300, 500};
+    rc::Query<Shard>::Parameters parms = {300, 500};
 
-    auto state = rc::Query<R, Shard>::get_query_state(&shard, &parms);
-    auto result = rc::Query<R, Shard>::query(&shard, state, &parms);
-    rc::Query<R, Shard>::delete_query_state(state);
+    auto local_query = rc::Query<Shard>::local_preproc(&shard, &parms);
+        
+    auto result = rc::Query<Shard>::local_query(&shard, local_query);
+    delete local_query;
 
-    ck_assert_int_eq(result.size(), 1);
-    ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1);
+    ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1);
 
     delete buffer;
 }
@@ -58,16 +60,15 @@ START_TEST(t_buffer_range_count)
 {
     auto buffer = create_sequential_mbuffer<R>(100, 1000);
 
-    rc::Parms<R> parms = {300, 500};
+    rc::Query<Shard>::Parameters parms = {300, 500};
 
     {
         auto view = buffer->get_buffer_view();
-        auto state = rc::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-        auto result = rc::Query<R, Shard>::buffer_query(state, &parms);
-        rc::Query<R, Shard>::delete_buffer_query_state(state);
+        auto query = rc::Query<Shard>::local_preproc_buffer(&view, &parms);
+        auto result = rc::Query<Shard>::local_query_buffer(query); 
+        delete query;
 
-        ck_assert_int_eq(result.size(), 1);
-        ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1);
+        ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1);
     }
 
     delete buffer;
@@ -83,66 +84,31 @@ START_TEST(t_range_count_merge)
     auto shard1 = Shard(buffer1->get_buffer_view());
     auto shard2 = Shard(buffer2->get_buffer_view());
 
-    rc::Parms<R> parms = {150, 500};
+    rc::Query<Shard>::Parameters parms = {150, 500};
 
     size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200;
 
-    auto state1 = rc::Query<R, Shard>::get_query_state(&shard1, &parms);
-    auto state2 = rc::Query<R, Shard>::get_query_state(&shard2, &parms);
-
-    std::vector<std::vector<de::Wrapped<R>>> results(2);
-    results[0] = rc::Query<R, Shard>::query(&shard1, state1, &parms);
-    results[1] = rc::Query<R, Shard>::query(&shard2, state2, &parms);
-
-    rc::Query<R, Shard>::delete_query_state(state1);
-    rc::Query<R, Shard>::delete_query_state(state2);
+    auto query1 = rc::Query<Shard>::local_preproc(&shard1, &parms);
+    auto query2 = rc::Query<Shard>::local_preproc(&shard2, &parms);
 
-    ck_assert_int_eq(results[0].size(), 1);
-    ck_assert_int_eq(results[1].size(), 1);
+    std::vector<std::vector<rc::Query<Shard>::LocalResultType>> results(2);
+    results[0] = rc::Query<Shard>::local_query(&shard1, query1);     
+    results[1] = rc::Query<Shard>::local_query(&shard2, query2); 
+    delete query1;
+    delete query2;
 
-    std::vector<R> result;
-    rc::Query<R, Shard>::merge(results, nullptr, result);
+    size_t reccnt = results[0][0].record_count + results[1][0].record_count;
+    size_t tscnt = results[0][0].tombstone_count + results[1][0].tombstone_count;
 
-    ck_assert_int_eq(result[0].key, result_size);
-
-    delete buffer1;
-    delete buffer2;
-}
-END_TEST
+    ck_assert_int_eq(reccnt - tscnt, result_size);
 
+    std::vector<rc::Query<Shard>::ResultType> result;
+    rc::Query<Shard>::combine(results, nullptr, result);
 
-START_TEST(t_lower_bound)
-{
-    auto buffer1 = create_sequential_mbuffer<R>(100, 200);
-    auto buffer2 = create_sequential_mbuffer<R>(400, 1000);
-
-    auto shard1 = new Shard(buffer1->get_buffer_view());
-    auto shard2 = new Shard(buffer2->get_buffer_view());
-
-    std::vector<Shard*> shards = {shard1, shard2};
-
-    auto merged = Shard(shards);
-
-    for (uint32_t i=100; i<1000; i++) {
-        R r = R{i, i};
-
-        auto idx = merged.get_lower_bound(i);
-
-        assert(idx < merged.get_record_count());
-
-        auto res = merged.get_record_at(idx);
-
-        if (i >=200 && i <400) {
-            ck_assert_int_lt(res->rec.key, i);
-        } else {
-            ck_assert_int_eq(res->rec.key, i);
-        }
-    }
+    ck_assert_int_eq(result[0], result_size);
 
     delete buffer1;
     delete buffer2;
-    delete shard1;
-    delete shard2;
 }
 END_TEST
 
diff --git a/tests/include/rangequery.h b/tests/include/rangequery.h
index f90e107..5c3c1d6 100644
--- a/tests/include/rangequery.h
+++ b/tests/include/rangequery.h
@@ -17,6 +17,9 @@
  */
 #pragma once
 
+#include "query/rangequery.h"
+#include <algorithm>
+
 /*
  * Uncomment these lines temporarily to remove errors in this file
  * temporarily for development purposes. They should be removed prior
@@ -24,26 +27,27 @@
  * should be included in the source file that includes this one, above the
  * include statement.
  */
-//#include "shard/ISAMTree.h"
-//#include "query/rangequery.h"
-//#include "testing.h"
-//#include <check.h>
-//using namespace de;
-//typedef ISAMTree<R> Shard;
-
-#include "query/rangequery.h"
+// #include "shard/ISAMTree.h"
+// #include "query/rangequery.h"
+// #include "testing.h"
+// #include <check.h>
+// using namespace de;
 
+// typedef Rec R;
+// typedef ISAMTree<R> Shard;
+// typedef rq::Query<ISAMTree<R>> Query;
 
 START_TEST(t_range_query)
 {
     auto buffer = create_sequential_mbuffer<R>(100, 1000);
     auto shard = Shard(buffer->get_buffer_view());
 
-    rq::Parms<R> parms = {300, 500};
+    rq::Query<Shard>::Parameters parms = {300, 500};
 
-    auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms);
-    auto result = rq::Query<R, Shard>::query(&shard, state, &parms);
-    rq::Query<R, Shard>::delete_query_state(state);
+    auto local_query = rq::Query<Shard>::local_preproc(&shard, &parms);
+        
+    auto result = rq::Query<Shard>::local_query(&shard, local_query);
+    delete local_query;
 
     ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
     for (size_t i=0; i<result.size(); i++) {
@@ -60,13 +64,13 @@ START_TEST(t_buffer_range_query)
 {
     auto buffer = create_sequential_mbuffer<R>(100, 1000);
 
-    rq::Parms<R> parms = {300, 500};
+    rq::Query<Shard>::Parameters parms = {300, 500};
 
     {
         auto view = buffer->get_buffer_view();
-        auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-        auto result = rq::Query<R, Shard>::buffer_query(state, &parms);
-        rq::Query<R, Shard>::delete_buffer_query_state(state);
+        auto query = rq::Query<Shard>::local_preproc_buffer(&view, &parms);
+        auto result = rq::Query<Shard>::local_query_buffer(query); 
+        delete query;
 
         ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
         for (size_t i=0; i<result.size(); i++) {
@@ -88,19 +92,18 @@ START_TEST(t_range_query_merge)
     auto shard1 = Shard(buffer1->get_buffer_view());
     auto shard2 = Shard(buffer2->get_buffer_view());
 
-    rq::Parms<R> parms = {150, 500};
+    rq::Query<Shard>::Parameters parms = {150, 500};
 
     size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200;
 
-    auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms);
-    auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms);
-
-    std::vector<std::vector<de::Wrapped<R>>> results(2);
-    results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms);
-    results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms);
+    auto query1 = rq::Query<Shard>::local_preproc(&shard1, &parms);
+    auto query2 = rq::Query<Shard>::local_preproc(&shard2, &parms);
 
-    rq::Query<R, Shard>::delete_query_state(state1);
-    rq::Query<R, Shard>::delete_query_state(state2);
+    std::vector<std::vector<rq::Query<Shard>::LocalResultType>> results(2);
+    results[0] = rq::Query<Shard>::local_query(&shard1, query1);     
+    results[1] = rq::Query<Shard>::local_query(&shard2, query2); 
+    delete query1;
+    delete query2;
 
     ck_assert_int_eq(results[0].size() + results[1].size(), result_size);
 
@@ -113,8 +116,8 @@ START_TEST(t_range_query_merge)
         }
     }
 
-    std::vector<R> result;
-    rq::Query<R, Shard>::merge(proc_results, nullptr, result);
+    std::vector<rq::Query<Shard>::ResultType> result;
+    rq::Query<Shard>::combine(proc_results, nullptr, result);
     std::sort(result.begin(), result.end());
 
     ck_assert_int_eq(result.size(), result_size);
@@ -145,8 +148,6 @@ START_TEST(t_lower_bound)
     auto merged = Shard(shards);
 
     for (uint32_t i=100; i<1000; i++) {
-        R r = R{i, i};
-
         auto idx = merged.get_lower_bound(i);
 
         assert(idx < merged.get_record_count());
diff --git a/tests/include/shard_standard.h b/tests/include/shard_standard.h
index 2809d74..ece2a57 100644
--- a/tests/include/shard_standard.h
+++ b/tests/include/shard_standard.h
@@ -75,7 +75,6 @@ START_TEST(t_shard_init)
     ck_assert_int_eq(shard4->get_record_count(), n * 3);
     ck_assert_int_eq(shard4->get_tombstone_count(), 0);
 
-    size_t total_cnt = 0;
     size_t shard1_idx = 0;
     size_t shard2_idx = 0;
     size_t shard3_idx = 0;
diff --git a/tests/include/shard_string.h b/tests/include/shard_string.h
index 881f41a..2ef4cec 100644
--- a/tests/include/shard_string.h
+++ b/tests/include/shard_string.h
@@ -73,7 +73,6 @@ START_TEST(t_shard_init)
     ck_assert_int_eq(shard4->get_record_count(), n * 3);
     ck_assert_int_eq(shard4->get_tombstone_count(), 0);
 
-    size_t total_cnt = 0;
     size_t shard1_idx = 0;
     size_t shard2_idx = 0;
     size_t shard3_idx = 0;
diff --git a/tests/include/testing.h b/tests/include/testing.h
index d0bff2d..33cbb3f 100644
--- a/tests/include/testing.h
+++ b/tests/include/testing.h
@@ -34,7 +34,7 @@ static std::string summa_wordlist = "tests/data/summa-wordlist.txt";
 
 static std::vector<std::unique_ptr<char[]>> string_data;
 
-static std::vector<StringRec> read_string_data(std::string fname, size_t n) {
+[[maybe_unused]] static std::vector<StringRec> read_string_data(std::string fname, size_t n) {
     std::vector<StringRec> vec;
     vec.reserve(n);
     string_data.reserve(n);
@@ -50,14 +50,14 @@ static std::vector<StringRec> read_string_data(std::string fname, size_t n) {
         std::string field;
 
         std::getline(ls, field, '\t');
-        auto val = atol(field.c_str());
+        uint64_t val = atol(field.c_str());
         std::getline(ls, field, '\n');
 
         char *c = strdup(field.c_str());
 
         string_data.push_back(std::unique_ptr<char[]>(c));
 
-        StringRec r(string_data[string_data.size() -1].get(), val, field.size());
+        StringRec r{string_data[string_data.size() -1].get(), val, field.size()};
         
         vec.push_back(r);
     }
@@ -76,7 +76,7 @@ std::vector<R> strip_wrapping(std::vector<de::Wrapped<R>> vec) {
     return out;
 }
 
-static bool initialize_test_file(std::string fname, size_t page_cnt)
+[[maybe_unused]] static bool initialize_test_file(std::string fname, size_t page_cnt)
 {
     auto flags = O_RDWR | O_CREAT | O_TRUNC;
     mode_t mode = 0640;
@@ -113,7 +113,7 @@ error:
     return 0;
 }
 
-static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) {
+[[maybe_unused]] static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) {
     return ((double) std::abs(n1 - n2) / (double) mag) < epsilon;
 }
 
diff --git a/tests/include/wirs.h b/tests/include/wirs.h
deleted file mode 100644
index 4c0630f..0000000
--- a/tests/include/wirs.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * tests/include/rangequery.h
- *
- * Standardized unit tests for range queries against supporting
- * shard types
- *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
- *
- * Distributed under the Modified BSD License.
- *
- * WARNING: This file must be included in the main unit test set
- *          after the definition of an appropriate Shard and R
- *          type. In particular, R needs to implement the key-value
- *          pair interface and Shard needs to support lower_bound. 
- *          For other types of record and shard, you'll need to
- *          use a different set of unit tests.
- */
-#pragma once
-
-/*
- * Uncomment these lines temporarily to remove errors in this file
- * temporarily for development purposes. They should be removed prior
- * to building, to ensure no duplicate definitions. These includes/defines
- * should be included in the source file that includes this one, above the
- * include statement.
- */
-//#include "shard/ISAMTree.h"
-//#include "query/rangequery.h"
-//#include "testing.h"
-//#include <check.h>
-//using namespace de;
-//typedef ISAMTree<R> Shard;
-
-
-START_TEST(t_range_query)
-{
-    auto buffer = create_sequential_mbuffer<R>(100, 1000);
-    auto shard = Shard(buffer->get_buffer_view());
-
-    rq::Parms<R> parms;
-    parms.lower_bound = 300;
-    parms.upper_bound = 500;
-
-    auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms);
-    auto result = rq::Query<R, Shard>::query(&shard, state, &parms);
-    rq::Query<R, Shard>::delete_query_state(state);
-
-    ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
-    for (size_t i=0; i<result.size(); i++) {
-        ck_assert_int_le(result[i].rec.key, parms.upper_bound);
-        ck_assert_int_ge(result[i].rec.key, parms.lower_bound);
-    }
-
-    delete buffer;
-}
-END_TEST
-
-
-START_TEST(t_buffer_range_query)
-{
-    auto buffer = create_sequential_mbuffer<R>(100, 1000);
-
-    rq::Parms<R> parms;
-    parms.lower_bound = 300;
-    parms.upper_bound = 500;
-
-    {
-        auto view = buffer->get_buffer_view();
-        auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-        auto result = rq::Query<R, Shard>::buffer_query(state, &parms);
-        rq::Query<R, Shard>::delete_buffer_query_state(state);
-
-        ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
-        for (size_t i=0; i<result.size(); i++) {
-            ck_assert_int_le(result[i].rec.key, parms.upper_bound);
-            ck_assert_int_ge(result[i].rec.key, parms.lower_bound);
-        }
-    }
-
-    delete buffer;
-}
-END_TEST
-
-
-START_TEST(t_range_query_merge)
-{    
-    auto buffer1 = create_sequential_mbuffer<R>(100, 200);
-    auto buffer2 = create_sequential_mbuffer<R>(400, 1000);
-
-    auto shard1 = Shard(buffer1->get_buffer_view());
-    auto shard2 = Shard(buffer2->get_buffer_view());
-
-    rq::Parms<R> parms;
-    parms.lower_bound = 150;
-    parms.upper_bound = 500;
-
-    size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200;
-
-    auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms);
-    auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms);
-
-    std::vector<std::vector<de::Wrapped<R>>> results(2);
-    results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms);
-    results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms);
-
-    rq::Query<R, Shard>::delete_query_state(state1);
-    rq::Query<R, Shard>::delete_query_state(state2);
-
-    ck_assert_int_eq(results[0].size() + results[1].size(), result_size);
-
-    std::vector<std::vector<Wrapped<R>>> proc_results;
-
-    for (size_t j=0; j<results.size(); j++) {
-        proc_results.emplace_back(std::vector<Wrapped<R>>());
-        for (size_t i=0; i<results[j].size(); i++) {
-            proc_results[j].emplace_back(results[j][i]);
-        }
-    }
-
-    std::vector<R> result;
-    rq::Query<R, Shard>::merge(proc_results, nullptr, result);
-    std::sort(result.begin(), result.end());
-
-    ck_assert_int_eq(result.size(), result_size);
-    auto key = parms.lower_bound;
-    for (size_t i=0; i<result.size(); i++) {
-        ck_assert_int_eq(key++, result[i].key);
-        if (key == 200) {
-            key = 400;
-        }
-    }
-
-    delete buffer1;
-    delete buffer2;
-}
-END_TEST
-
-
-START_TEST(t_lower_bound)
-{
-    auto buffer1 = create_sequential_mbuffer<R>(100, 200);
-    auto buffer2 = create_sequential_mbuffer<R>(400, 1000);
-
-    auto shard1 = new Shard(buffer1->get_buffer_view());
-    auto shard2 = new Shard(buffer2->get_buffer_view());
-
-    std::vector<Shard*> shards = {shard1, shard2};
-
-    auto merged = Shard(shards);
-
-    for (size_t i=100; i<1000; i++) {
-        R r;
-        r.key = i;
-        r.value = i;
-
-        auto idx = merged.get_lower_bound(i);
-
-        assert(idx < merged.get_record_count());
-
-        auto res = merged.get_record_at(idx);
-
-        if (i >=200 && i <400) {
-            ck_assert_int_lt(res->rec.key, i);
-        } else {
-            ck_assert_int_eq(res->rec.key, i);
-        }
-    }
-
-    delete buffer1;
-    delete buffer2;
-    delete shard1;
-    delete shard2;
-}
-END_TEST
-
-static void inject_rangequery_tests(Suite *suite) {
-    TCase *range_query = tcase_create("Range Query Testing"); 
-    tcase_add_test(range_query, t_range_query); 
-    tcase_add_test(range_query, t_buffer_range_query); 
-    tcase_add_test(range_query, t_range_query_merge); 
-    suite_add_tcase(suite, range_query);
-}
diff --git a/tests/include/wss.h b/tests/include/wss.h
index f0ac74c..01327d2 100644
--- a/tests/include/wss.h
+++ b/tests/include/wss.h
@@ -1,10 +1,10 @@
 /*
- * tests/include/rangequery.h
+ * tests/include/wss.h
  *
- * Standardized unit tests for range queries against supporting
+ * Standardized unit tests for weighted set sampling against supporting
  * shard types
  *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu> 
  *
  * Distributed under the Modified BSD License.
  *
@@ -17,6 +17,8 @@
  */
 #pragma once
 
+#include "query/wss.h"
+
 /*
  * Uncomment these lines temporarily to remove errors in this file
  * temporarily for development purposes. They should be removed prior
@@ -24,28 +26,38 @@
  * should be included in the source file that includes this one, above the
  * include statement.
  */
-#include "shard/Alias.h"
-#include "testing.h"
-#include <check.h>
-using namespace de;
-typedef Alias<R> Shard;
+// #include "framework/interface/Record.h"
+// #include "shard/Alias.h"
+// #include "testing.h"
+// #include <check.h>
 
-#include "query/wss.h"
+// using namespace de;
+
+// typedef WeightedRecord<int64_t, int32_t, int32_t> R;
+// typedef Alias<R> Shard;
+
+typedef wss::Query<Shard> Q;
 
 START_TEST(t_wss_query)
 {
     auto buffer = create_weighted_mbuffer<R>(1000);
     auto shard = Shard(buffer->get_buffer_view());
-
     auto rng = gsl_rng_alloc(gsl_rng_mt19937);
 
-    wss::Parms<R> parms;
+    size_t k = 20;
+    
+    Q::Parameters parms;
     parms.rng = rng;
-    parms.sample_size = 20;
+    parms.sample_size = k;
+
+    auto query = Q::local_preproc(&shard, &parms);
+    Q::distribute_query(&parms, {query}, nullptr);
+
+    auto result = Q::local_query(&shard, query);
+    delete query;
+
+    ck_assert_int_eq(result.size(), k);
 
-    auto state = wss::Query<R, Shard>::get_query_state(&shard, &parms);
-    auto result = wss::Query<R, Shard>::query(&shard, state, &parms);
-    wss::Query<R, Shard>::delete_query_state(state);
 
     delete buffer;
     gsl_rng_free(rng);
@@ -56,83 +68,28 @@ END_TEST
 START_TEST(t_buffer_wss_query)
 {
     auto buffer = create_weighted_mbuffer<R>(1000);
-
-
     auto rng = gsl_rng_alloc(gsl_rng_mt19937);
 
-    wss::Parms<R> parms;
+    size_t k = 20;
+    
+    Q::Parameters parms;
     parms.rng = rng;
+    parms.sample_size = k;
 
     {
         auto view = buffer->get_buffer_view();
-        auto state = wss::Query<R, Shard>::get_buffer_query_state(&view, &parms);
-        auto result = wss::Query<R, Shard>::buffer_query(state, &parms);
-        wss::Query<R, Shard>::delete_buffer_query_state(state);
-
-        ck_assert_int_eq(result.size(), parms.sample_size);
-        for (size_t i=0; i<result.size(); i++) {
-            
-        }
-    }
-
-    delete buffer;
-}
-END_TEST
+        auto query = Q::local_preproc_buffer(&view, &parms);
+        Q::distribute_query(&parms, {}, query);
+        auto result = Q::local_query_buffer(query);
 
-
-/*
-START_TEST(t_range_query_merge)
-{    
-    auto buffer1 = create_sequential_mbuffer<R>(100, 200);
-    auto buffer2 = create_sequential_mbuffer<R>(400, 1000);
-
-    auto shard1 = Shard(buffer1->get_buffer_view());
-    auto shard2 = Shard(buffer2->get_buffer_view());
-
-    wss::Parms<R> parms;
-    parms.lower_bound = 150;
-    parms.upper_bound = 500;
-
-    size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200;
-
-    auto state1 = wss::Query<R, Shard>::get_query_state(&shard1, &parms);
-    auto state2 = wss::Query<R, Shard>::get_query_state(&shard2, &parms);
-
-    std::vector<std::vector<de::Wrapped<R>>> results(2);
-    results[0] = wss::Query<R, Shard>::query(&shard1, state1, &parms);
-    results[1] = wss::Query<R, Shard>::query(&shard2, state2, &parms);
-
-    wss::Query<R, Shard>::delete_query_state(state1);
-    wss::Query<R, Shard>::delete_query_state(state2);
-
-    ck_assert_int_eq(results[0].size() + results[1].size(), result_size);
-
-    std::vector<std::vector<Wrapped<R>>> proc_results;
-
-    for (size_t j=0; j<results.size(); j++) {
-        proc_results.emplace_back(std::vector<Wrapped<R>>());
-        for (size_t i=0; i<results[j].size(); i++) {
-            proc_results[j].emplace_back(results[j][i]);
-        }
-    }
-
-    auto result = wss::Query<R, Shard>::merge(proc_results, nullptr);
-    std::sort(result.begin(), result.end());
-
-    ck_assert_int_eq(result.size(), result_size);
-    auto key = parms.lower_bound;
-    for (size_t i=0; i<result.size(); i++) {
-        ck_assert_int_eq(key++, result[i].key);
-        if (key == 200) {
-            key = 400;
-        }
+        delete query;
+        ck_assert_int_le(result.size(), k);
     }
 
-    delete buffer1;
-    delete buffer2;
+    delete buffer;
+    gsl_rng_free(rng);
 }
 END_TEST
-*/
 
 
 static void inject_wss_tests(Suite *suite) {
diff --git a/tests/internal_level_tests.cpp b/tests/internal_level_tests.cpp
index 06b0bab..e11b7c7 100644
--- a/tests/internal_level_tests.cpp
+++ b/tests/internal_level_tests.cpp
@@ -22,7 +22,7 @@
 
 using namespace de;
 
-typedef InternalLevel<Rec, ISAMTree<Rec>, rq::Query<Rec, ISAMTree<Rec>>> ILevel;
+typedef InternalLevel<ISAMTree<Rec>, rq::Query<ISAMTree<Rec>>> ILevel;
 
 START_TEST(t_memlevel_merge)
 {
diff --git a/tests/irs_tests.cpp b/tests/irs_tests.cpp
new file mode 100644
index 0000000..6ef03f4
--- /dev/null
+++ b/tests/irs_tests.cpp
@@ -0,0 +1,55 @@
+/*
+ * tests/rangequery_tests.cpp
+ *
+ * Unit tests for Range Queries across several different
+ * shards
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ */
+
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
+#include "include/testing.h"
+
+#include <check.h>
+
+using namespace de;
+
+typedef Rec R;
+typedef ISAMTree<R> Shard;
+
+#include "include/irs.h"
+
+Suite *unit_testing()
+{
+    Suite *unit = suite_create("Independent Range Sampling Query Testing");
+    inject_irs_tests(unit);
+
+    return unit;
+}
+
+int shard_unit_tests()
+{
+    int failed = 0;
+    Suite *unit = unit_testing();
+    SRunner *unit_shardner = srunner_create(unit);
+
+    srunner_run_all(unit_shardner, CK_NORMAL);
+    failed = srunner_ntests_failed(unit_shardner);
+    srunner_free(unit_shardner);
+
+    return failed;
+}
+
+
+int main() 
+{
+    int unit_failed = shard_unit_tests();
+    gsl_rng_free(g_rng);
+
+    return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tests/mutable_buffer_tests.cpp b/tests/mutable_buffer_tests.cpp
index 26057a2..c3e1b34 100644
--- a/tests/mutable_buffer_tests.cpp
+++ b/tests/mutable_buffer_tests.cpp
@@ -323,13 +323,11 @@ START_TEST(t_bview_delete)
     /* insert 75 records and get tail when LWM is exceeded */
     size_t new_head = 0;
     Rec rec = {1, 1};
-    size_t cnt = 0;
     for (size_t i=0; i<75; i++) {
         ck_assert_int_eq(buffer->append(rec), 1);
 
         rec.key++;
         rec.value++;
-        cnt++;
 
         if (buffer->is_at_low_watermark() && new_head == 0) {
             new_head = buffer->get_tail();
@@ -343,7 +341,6 @@ START_TEST(t_bview_delete)
 
         rec.key++;
         rec.value++;
-        cnt++;
     }
 
     Rec dr1 = {67, 67};
diff --git a/tests/vptree_tests.cpp b/tests/vptree_tests.cpp
index ff99ba6..faa704f 100644
--- a/tests/vptree_tests.cpp
+++ b/tests/vptree_tests.cpp
@@ -20,6 +20,8 @@ using namespace de;
 
 typedef PRec R;
 typedef VPTree<R> Shard;
+typedef knn::Query<Shard> Q;
+
 
 START_TEST(t_mbuffer_init)
 {
@@ -123,15 +125,15 @@ START_TEST(t_buffer_query)
     target.data[0] = 120;
     target.data[1] = 120;
 
-    knn::Parms<PRec> p;
+    Q::Parameters p;
     p.k = 10;
     p.point = target;
 
     {
         auto bv = buffer->get_buffer_view();
-        auto state = knn::Query<PRec, Shard>::get_buffer_query_state(&bv, &p);
-        auto result = knn::Query<PRec, Shard>::buffer_query(state, &p);
-        knn::Query<PRec, Shard>::delete_buffer_query_state(state);
+        auto query = Q::local_preproc_buffer(&bv, &p);
+        auto result = Q::local_query_buffer(query);
+        delete query;
 
         std::sort(result.begin(), result.end());
         size_t start = 120 - 5;
@@ -150,15 +152,16 @@ START_TEST(t_knn_query)
 
     auto vptree = VPTree<PRec>(buffer->get_buffer_view());
 
-    knn::Parms<PRec> p;
+    Q::Parameters p;
+
     for (size_t i=0; i<100; i++) {
         p.k = rand() % 150;
         p.point.data[0] = rand() % (n-p.k);
         p.point.data[1] = p.point.data[0];
 
-        auto state = knn::Query<PRec, Shard>::get_query_state(&vptree, &p);
-        auto results = knn::Query<PRec, Shard>::query(&vptree, state, &p);
-        knn::Query<PRec, Shard>::delete_query_state(state);
+        auto query = Q::local_preproc(&vptree, &p); 
+        auto results = Q::local_query(&vptree, query);
+        delete query;
 
         ck_assert_int_eq(results.size(), p.k);
 
diff --git a/tests/wss_tests.cpp b/tests/wss_tests.cpp
new file mode 100644
index 0000000..39e9d6e
--- /dev/null
+++ b/tests/wss_tests.cpp
@@ -0,0 +1,56 @@
+/*
+ * tests/rangequery_tests.cpp
+ *
+ * Unit tests for Range Queries across several different
+ * shards
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * Distributed under the Modified BSD License.
+ *
+ */
+
+#include "shard/ISAMTree.h"
+#include "query/rangecount.h"
+#include "include/testing.h"
+
+#include <check.h>
+
+using namespace de;
+
+typedef Rec R;
+typedef ISAMTree<Rec> Shard;
+
+#include "include/wss.h"
+
+
+Suite *unit_testing()
+{
+    Suite *unit = suite_create("Range Count Query Testing");
+    inject_rangecount_tests(unit);
+
+    return unit;
+}
+
+
+int shard_unit_tests()
+{
+    int failed = 0;
+    Suite *unit = unit_testing();
+    SRunner *unit_shardner = srunner_create(unit);
+
+    srunner_run_all(unit_shardner, CK_NORMAL);
+    failed = srunner_ntests_failed(unit_shardner);
+    srunner_free(unit_shardner);
+
+    return failed;
+}
+
+
+int main() 
+{
+    int unit_failed = shard_unit_tests();
+
+    return (unit_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
+}