From 7c03d771475421c1d5a2bbc135242536af1a371c Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Mon, 25 Sep 2023 10:49:36 -0400
Subject: Re-structuring Project + scheduling updates

This is a big one--probably should have split it apart, but I'm feeling
lazy this morning.

* Organized the mess of header files in include/framework by splitting
them out into their own subdirectories, and renaming a few files to
remove redundancies introduced by the directory structure.

* Introduced a new framework/ShardRequirements.h header file for simpler
shard development. This header simply contains the necessary includes
from framework/* for creating shard files. This should help to remove
structural dependencies from the framework file structure and shards,
as well as centralizing the necessary framework files to make shard
development easier.

* Created a (currently dummy) SchedulerInterface, and make the scheduler
implementation a template parameter of the dynamic extension for easier
testing of various scheduling policies. There's still more work to be
done to fully integrate the scheduler (queries, multiple buffers), but
some more of the necessary framework code for this has been added as well.

* Adjusted the Task interface setup for the scheduler. The task structures
have been removed from ExtensionStructure and placed in their own header
file. Additionally, I started experimenting with using std::variant,
as opposed to inheritence, to implement subtype polymorphism on the
Merge and Query tasks. The scheduler now has a general task queue that
contains both, and std::variant, std::visit, and std::get are used to
manipulate them without virtual functions.

* Removed Alex.h, as it can't build anyway. There's a branch out there
containing the Alex implementation stripped of the C++20 stuff. So
there's no need to keep it here.
---
 include/shard/Alex.h       | 360 ---------------------------------------------
 include/shard/MemISAM.h    |   3 +-
 include/shard/PGM.h        |   6 +-
 include/shard/TrieSpline.h |   5 +-
 include/shard/VPTree.h     |   6 +-
 include/shard/WIRS.h       |   6 +-
 include/shard/WSS.h        |   6 +-
 7 files changed, 11 insertions(+), 381 deletions(-)
 delete mode 100644 include/shard/Alex.h

(limited to 'include/shard')
diff --git a/include/shard/Alex.h b/include/shard/Alex.h
deleted file mode 100644
index 9f794dc..0000000
--- a/include/shard/Alex.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * include/shard/Alex.h
- *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-
-#include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
-
-#include "alex.h"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
-using psudb::Alias;
-
-namespace de {
-
-template <RecordInterface R>
-struct alex_range_query_parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-class AlexRangeQuery;
-
-template <RecordInterface R>
-struct AlexState {
-    size_t start_idx;
-    size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct AlexBufferState {
-    size_t cutoff;
-    Alias* alias;
-
-    ~AlexBufferState() {
-        delete alias;
-    }
-};
-
-
-template <RecordInterface R, size_t epsilon=128>
-class Alex {
-private:
-    typedef decltype(R::key) K;
-    typedef decltype(R::value) V;
-
-public:
-
-    // FIXME: there has to be a better way to do this
-    friend class AlexRangeQuery<R>;
-
-    Alex(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        std::vector<std::pair<K, V>> temp_records;
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        K min_key = base->rec.key;
-        K max_key = (stop - 1)->rec.key;
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            temp_records.push_back({base->rec.key, base->rec.value});
-
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
-            }
-            
-            base++;
-        }
-
-        if (m_reccnt > 0) {
-            m_alex = alex::Alex<K, V>();
-            m_alex.set_expected_insert_frac(0);
-            m_alex.bulkload(temp_records.data(), temp_records.size());
-        }
-    }
-
-    Alex(Alex** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
-
-        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        std::vector<std::pair<K, V>> temp_records;
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cur5sor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    temp_records.pushback({cursor.ptr->rec.key, cursor.ptr->rec.value});
-                    if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        if (m_reccnt > 0) {
-            m_alex = alex::Alex<K, V>();
-            m_alex.set_expected_insert_frac(0);
-            m_alex.bulkload(temp_records.data(), temp_records.size());
-        }
-   }
-
-    ~Alex() {
-        if (m_data) free(m_data);
-        if (m_bf) delete m_bf;
-
-    }
-
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
-
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
-
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
-
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
-
-        return nullptr;
-    }
-
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
-
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
-
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        if (idx >= m_reccnt) return nullptr;
-        return m_data + idx;
-    }
-
-
-    size_t get_memory_usage() {
-        return m_alex.size_in_bytes() + m_alloc_size;
-    }
-
-    alex::Alex<K, V>::Iterator get_lower_bound(const K& key) const {
-        auto bound = m_alex.find(key);
-        while (bound != m_alex.end() && bound.key() < key) {
-            bound++;
-        }
-
-        return bound;
-    }
-
-private:
-    Wrapped<R>* m_data;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_alloc_size;
-    K m_max_key;
-    K m_min_key;
-    alex::Alex<K, V> m_alex;
-    BloomFilter<R> *m_bf;
-};
-
-
-template <RecordInterface R>
-class AlexRangeQuery {
-public:
-    static void *get_query_state(Alex<R> *ts, void *parms) {
-        auto res = new AlexState<R>();
-        auto p = (alex_range_query_parms<R> *) parms;
-
-        res->start_idx = ts->get_lower_bound(p->lower_bound);
-        res->stop_idx = ts->get_record_count();
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        auto res = new AlexBufferState<R>();
-        res->cutoff = buffer->get_record_count();
-
-        return res;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(Alex<R> *ts, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> records;
-        auto p = (alex_range_query_parms<R> *) parms;
-        auto s = (AlexState<R> *) q_state;
-
-        // if the returned index is one past the end of the
-        // records for the Alex, then there are not records
-        // in the index falling into the specified range.
-        if (s->start_idx == ts->get_record_count()) {
-            return records;
-        }
-
-        auto ptr = ts->get_record_at(s->start_idx);
-        
-        // roll the pointer forward to the first record that is
-        // greater than or equal to the lower bound.
-        while(ptr->rec.key < p->lower_bound) {
-            ptr++;
-        }
-
-        while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
-            records.emplace_back(*ptr);
-            ptr++;
-        }
-
-        return records;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto p = (alex_range_query_parms<R> *) parms;
-        auto s = (AlexBufferState<R> *) state;
-
-        std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                records.emplace_back(*rec);
-            }
-        }
-
-        return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<R>> &results, void *parms) {
-        size_t total = 0;
-        for (size_t i=0; i<results.size(); i++) {
-            total += results[i].size();
-        }
-
-        if (total == 0) {
-            return std::vector<R>();
-        }
-
-        std::vector<R> output;
-        output.reserve(total);
-
-        for (size_t i=0; i<results.size(); i++) {
-            std::move(results[i].begin(), results[i].end(), std::back_inserter(output));
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (AlexState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (AlexBufferState<R> *) state;
-        delete s;
-    }
-};
-
-;
-
-}
diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index a220792..f9c621e 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -14,7 +14,8 @@
 #include <queue>
 #include <memory>
 
-#include "framework/MutableBuffer.h"
+#include "framework/ShardRequirements.h"
+
 #include "util/bf_config.h"
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 2cd153e..d960e70 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -15,15 +15,13 @@
 #include <memory>
 #include <concepts>
 
+#include "framework/ShardRequirements.h"
+
 #include "pgm/pgm_index.hpp"
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 69fcfbc..98153c0 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -15,15 +15,12 @@
 #include <memory>
 #include <concepts>
 
+#include "framework/ShardRequirements.h"
 #include "ts/builder.h"
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 8feec84..0e998d9 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -15,14 +15,12 @@
 #include <concepts>
 #include <map>
 
+#include "framework/ShardRequirements.h"
+
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
index 19d3eea..8583cb0 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/WIRS.h
@@ -16,15 +16,13 @@
 #include <memory>
 #include <concepts>
 
+#include "framework/ShardRequirements.h"
+
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 #include "psu-ds/Alias.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
diff --git a/include/shard/WSS.h b/include/shard/WSS.h
index c0af573..87b016c 100644
--- a/include/shard/WSS.h
+++ b/include/shard/WSS.h
@@ -16,15 +16,13 @@
 #include <memory>
 #include <concepts>
 
+#include "framework/ShardRequirements.h"
+
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
 #include "psu-ds/Alias.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "framework/MutableBuffer.h"
-#include "framework/RecordInterface.h"
-#include "framework/ShardInterface.h"
-#include "framework/QueryInterface.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
-- 
cgit v1.2.3


From 39ae3e0441d8297a09197aba98bd494b5ada12c1 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Mon, 30 Oct 2023 14:17:59 -0400
Subject: Concurrency updates + fixes for compile errors

---
 include/shard/WIRS.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
index 8583cb0..83573c8 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/WIRS.h
@@ -448,17 +448,21 @@ public:
         return state;
     }
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
+    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buff_states) {
+        // FIXME: need to redo for the buffer vector interface
         auto p = (wirs_query_parms<R> *) query_parms;
-        auto bs = (WIRSBufferState<R> *) buff_state;
 
         std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
         size_t buffer_sz = 0;
 
+        decltype(R::weight) total_weight = 0;
         std::vector<decltype(R::weight)> weights;
-        weights.push_back(bs->total_weight);
+        for (auto &s : buff_states) {
+            auto state = (WIRSBufferState<R> *) s;
+            total_weight += state->total_weight;
+            weights.push_back(state->total_weight);
+        }
 
-        decltype(R::weight) total_weight = 0;
         for (auto &s : shard_states) {
             auto state = (WIRSState<R> *) s;
             total_weight += state->total_weight;
@@ -480,8 +484,6 @@ public:
             }
         }
 
-
-        bs->sample_size = buffer_sz;
         for (size_t i=0; i<shard_states.size(); i++) {
             auto state = (WIRSState<R> *) shard_states[i];
             state->sample_size = shard_sample_sizes[i+1];
-- 
cgit v1.2.3


From d2279e1b96d352a0af1d425dcaaf93e8a26a8d52 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Mon, 30 Oct 2023 17:15:05 -0400
Subject: General Comment + Consistency updates

---
 include/shard/MemISAM.h    | 6 +++++-
 include/shard/PGM.h        | 4 ++++
 include/shard/TrieSpline.h | 4 ++++
 include/shard/VPTree.h     | 9 +++++++--
 include/shard/WIRS.h       | 6 +++++-
 include/shard/WSS.h        | 8 ++++++--
 6 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index f9c621e..8ca5cee 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -1,7 +1,7 @@
 /*
  * include/shard/MemISAM.h
  *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
  *                    Dong Xie <dongx@psu.edu>
  *
  * All rights reserved. Published under the Modified BSD License.
@@ -264,6 +264,10 @@ public:
         return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
 private:
     size_t get_lower_bound(const K& key) const {
         const InMemISAMNode* now = m_root;
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index d960e70..6d76376 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -235,6 +235,10 @@ public:
         return m_pgm.size_in_bytes() + m_alloc_size;
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
     size_t get_lower_bound(const K& key) const {
         auto bound = m_pgm.search(key);
         size_t idx = bound.lo;
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 98153c0..a784a38 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -250,6 +250,10 @@ public:
         return m_ts.GetSize() + m_alloc_size;
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
 private:
 
     size_t get_lower_bound(const K& key) const {
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 0e998d9..d9a15b1 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -1,9 +1,9 @@
 /*
  * include/shard/VPTree.h
  *
- * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
- * All outsides reserved. Published under the Modified BSD License.
+ * All rights reserved. Published under the Modified BSD License.
  *
  */
 #pragma once
@@ -240,6 +240,11 @@ public:
         return m_node_cnt * sizeof(vpnode) + m_reccnt * sizeof(R*) + m_alloc_size;
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
+
 private:
 
     vpnode *build_vptree() {
diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
index 83573c8..bf29325 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/WIRS.h
@@ -2,7 +2,7 @@
  * include/shard/WIRS.h
  *
  * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
- *                    Douglas Rumbaugh <drumbaugh@psu.edu>
+ *                    Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
  * All rights reserved. Published under the Modified BSD License.
  *
@@ -260,6 +260,10 @@ public:
         return m_alloc_size + m_node_cnt * sizeof(wirs_node<Wrapped<R>>);
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
 private:
 
     size_t get_lower_bound(const K& key) const {
diff --git a/include/shard/WSS.h b/include/shard/WSS.h
index 87b016c..4e3a326 100644
--- a/include/shard/WSS.h
+++ b/include/shard/WSS.h
@@ -1,8 +1,8 @@
 /*
  * include/shard/WSS.h
  *
- * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
- *                    Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                    Dong Xie <dongx@psu.edu>
  *
  * All rights reserved. Published under the Modified BSD License.
  *
@@ -243,6 +243,10 @@ public:
         return m_alloc_size;
     }
 
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
 private:
 
     size_t get_lower_bound(const K& key) const {
-- 
cgit v1.2.3


From 28b036025d35853e2ff4ad0bd0c581768f93ece0 Mon Sep 17 00:00:00 2001
From: "Douglas B. Rumbaugh" <doug@douglasrumbaugh.com>
Date: Tue, 31 Oct 2023 11:04:18 -0400
Subject: VPTree Shard: updates to build on my desktop

---
 include/shard/VPTree.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/shard')

diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index d9a15b1..978372b 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -14,6 +14,8 @@
 #include <memory>
 #include <concepts>
 #include <map>
+#include <unordered_map>
+#include <functional>
 
 #include "framework/ShardRequirements.h"
 
-- 
cgit v1.2.3


From df59a313ae18a6968daa2662dc39a8065d92cfcb Mon Sep 17 00:00:00 2001
From: "Douglas B. Rumbaugh" <doug@douglasrumbaugh.com>
Date: Tue, 31 Oct 2023 11:54:24 -0400
Subject: MemISAM: updated to new query interface

---
 include/shard/MemISAM.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/shard')

diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index 8ca5cee..00fb467 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -589,7 +589,7 @@ public:
         return res;
     }
 
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
+    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
         return;
     }
 
-- 
cgit v1.2.3


From 0b723322a611de83872dd83b55d2e10e8886a283 Mon Sep 17 00:00:00 2001
From: "Douglas B. Rumbaugh" <douglas@t480.douglasrumbaugh.com>
Date: Thu, 2 Nov 2023 08:01:45 -0400
Subject: started refactoring queries interface

---
 include/shard/MemISAM.h | 361 ------------------------------------------------
 1 file changed, 361 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index 00fb467..6962c19 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -31,52 +31,6 @@ namespace de {
 
 thread_local size_t mrun_cancelations = 0;
 
-template <RecordInterface R>
-struct irs_query_parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-    size_t sample_size;
-    gsl_rng *rng;
-};
-
-template <RecordInterface R, bool Rejection>
-class IRSQuery;
-
-template <RecordInterface R>
-struct IRSState {
-    size_t lower_bound;
-    size_t upper_bound;
-    size_t sample_size;
-    size_t total_weight;
-};
-
-template <RecordInterface R>
-struct IRSBufferState {
-    size_t cutoff;
-    std::vector<Wrapped<R>> records;
-    size_t sample_size;
-};
-
-template <RecordInterface R>
-struct ISAMRangeQueryParms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-class ISAMRangeQuery;
-
-template <RecordInterface R>
-struct ISAMRangeQueryState {
-    size_t start_idx;
-    size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct RangeQueryBufferState {
-    size_t cutoff;
-};
-
 template <RecordInterface R>
 class MemISAM {
 private:
@@ -384,319 +338,4 @@ private:
     size_t m_deleted_cnt;
     size_t m_alloc_size;
 };
-
-template <RecordInterface R, bool Rejection=true>
-class IRSQuery {
-public:
-
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(MemISAM<R> *isam, void *parms) {
-        auto res = new IRSState<R>();
-        decltype(R::key) lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
-        decltype(R::key) upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
-
-        res->lower_bound = isam->get_lower_bound(lower_key);
-        res->upper_bound = isam->get_upper_bound(upper_key);
-
-        if (res->lower_bound == isam->get_record_count()) {
-            res->total_weight = 0;
-        } else {
-            res->total_weight = res->upper_bound - res->lower_bound;
-        }
-
-        res->sample_size = 0;
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        auto res = new IRSBufferState<R>();
-
-        res->cutoff = buffer->get_record_count();
-        res->sample_size = 0;
-
-        if constexpr (Rejection) {
-            return res;
-        }
-
-        auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
-        auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
-
-        for (size_t i=0; i<res->cutoff; i++) {
-            if (((buffer->get_data() + i)->rec.key >= lower_key) && ((buffer->get_data() + i)->rec.key <= upper_key)) { 
-                res->records.emplace_back(*(buffer->get_data() + i));
-            }
-        }
-
-        return res;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        auto p = (irs_query_parms<R> *) query_parms;
-        auto bs = (buff_state) ? (IRSBufferState<R> *) buff_state : nullptr;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
-        size_t buffer_sz = 0;
-
-        std::vector<size_t> weights;
-        if constexpr (Rejection) {
-            weights.push_back((bs) ? bs->cutoff : 0);
-        } else {
-            weights.push_back((bs) ? bs->records.size() : 0);
-        }
-
-        size_t total_weight = 0;
-        for (auto &s : shard_states) {
-            auto state = (IRSState<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        // if no valid records fall within the query range, just
-        // set all of the sample sizes to 0 and bail out.
-        if (total_weight == 0) {
-            for (size_t i=0; i<shard_states.size(); i++) {
-                auto state = (IRSState<R> *) shard_states[i];
-                state->sample_size = 0;
-            }
-
-            return;
-        }
-
-        std::vector<double> normalized_weights;
-        for (auto w : weights) {
-            normalized_weights.push_back((double) w / (double) total_weight);
-        }
-
-        auto shard_alias = Alias(normalized_weights);
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = shard_alias.get(p->rng);            
-            if (idx == 0) {
-                buffer_sz++;
-            } else {
-                shard_sample_sizes[idx - 1]++;
-            }
-        }
-
-        if (bs) {
-            bs->sample_size = buffer_sz;
-        }
-        for (size_t i=0; i<shard_states.size(); i++) {
-            auto state = (IRSState<R> *) shard_states[i];
-            state->sample_size = shard_sample_sizes[i+1];
-        }
-    }
-
-    static std::vector<Wrapped<R>> query(MemISAM<R> *isam, void *q_state, void *parms) { 
-        auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
-        auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
-        auto rng = ((irs_query_parms<R> *) parms)->rng;
-
-        auto state = (IRSState<R> *) q_state;
-        auto sample_sz = state->sample_size;
-
-        std::vector<Wrapped<R>> result_set;
-
-        if (sample_sz == 0 || state->lower_bound == isam->get_record_count()) {
-            return result_set;
-        }
-
-        size_t attempts = 0;
-        size_t range_length = state->upper_bound - state->lower_bound;
-        do {
-            attempts++;
-            size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0;
-            result_set.emplace_back(*isam->get_record_at(state->lower_bound + idx));
-        } while (attempts < sample_sz);
-
-        return result_set;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto st = (IRSBufferState<R> *) state;
-        auto p = (irs_query_parms<R> *) parms;
-
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
-
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = buffer->get_data() + idx;
-
-                if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                    result.emplace_back(*rec);
-                }
-            }
-
-            return result;
-        }
-
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = gsl_rng_uniform_int(p->rng, st->records.size());
-            result.emplace_back(st->records[idx]);
-        }
-
-        return result;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        std::vector<R> output;
-
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (IRSState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (IRSBufferState<R> *) state;
-        delete s;
-    }
-};
-
-
-template <RecordInterface R>
-class ISAMRangeQuery {
-public:
-
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(MemISAM<R> *ts, void *parms) {
-        auto res = new ISAMRangeQueryState<R>();
-        auto p = (ISAMRangeQueryParms<R> *) parms;
-
-        res->start_idx = ts->get_lower_bound(p->lower_bound);
-        res->stop_idx = ts->get_record_count();
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        auto res = new RangeQueryBufferState<R>();
-        res->cutoff = buffer->get_record_count();
-
-        return res;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(MemISAM<R> *ts, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> records;
-        auto p = (ISAMRangeQueryParms<R> *) parms;
-        auto s = (ISAMRangeQueryState<R> *) q_state;
-
-        // if the returned index is one past the end of the
-        // records for the PGM, then there are not records
-        // in the index falling into the specified range.
-        if (s->start_idx == ts->get_record_count()) {
-            return records;
-        }
-
-        auto ptr = ts->get_record_at(s->start_idx);
-        
-        // roll the pointer forward to the first record that is
-        // greater than or equal to the lower bound.
-        while(ptr->rec.key < p->lower_bound) {
-            ptr++;
-        }
-
-        while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
-            records.emplace_back(*ptr);
-            ptr++;
-        }
-
-        return records;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto p = (ISAMRangeQueryParms<R> *) parms;
-        auto s = (RangeQueryBufferState<R> *) state;
-
-        std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                records.emplace_back(*rec);
-            }
-        }
-
-        return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(results.size());
-
-        PriorityQueue<Wrapped<R>> pq(results.size());
-        size_t total = 0;
-		size_t tmp_n = results.size();
-        
-
-        for (size_t i = 0; i < tmp_n; ++i)
-			if (results[i].size() > 0){
-	            auto base = results[i].data();
-		        cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
-				assert(i == cursors.size() - 1);
-			    total += results[i].size();
-				pq.push(cursors[i].ptr, tmp_n - i - 1);
-			} else {
-				cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-			}
-
-        if (total == 0) {
-            return std::vector<R>();
-        }
-
-        std::vector<R> output;
-        output.reserve(total);
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[tmp_n - now.version - 1];
-                auto& cursor2 = cursors[tmp_n - next.version - 1];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[tmp_n - now.version - 1];
-                if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (ISAMRangeQueryState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (RangeQueryBufferState<R> *) state;
-        delete s;
-    }
-};
-
-
-
 }
-- 
cgit v1.2.3


From e02742b07540dd5a9bcbb44dae14856bf10955ed Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Mon, 6 Nov 2023 15:18:53 -0500
Subject: Refactoring progress

---
 include/shard/Alias.h      | 251 +++++++++++++++++++++++++
 include/shard/ISAMTree.h   | 339 +++++++++++++++++++++++++++++++++
 include/shard/MemISAM.h    | 341 ----------------------------------
 include/shard/PGM.h        | 267 --------------------------
 include/shard/TrieSpline.h | 184 +-----------------
 include/shard/WSS.h        | 453 ---------------------------------------------
 6 files changed, 592 insertions(+), 1243 deletions(-)
 create mode 100644 include/shard/Alias.h
 create mode 100644 include/shard/ISAMTree.h
 delete mode 100644 include/shard/MemISAM.h
 delete mode 100644 include/shard/WSS.h

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
new file mode 100644
index 0000000..b6b16c5
--- /dev/null
+++ b/include/shard/Alias.h
@@ -0,0 +1,251 @@
+/*
+ * include/shard/Alias.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <queue>
+#include <memory>
+#include <concepts>
+
+#include "framework/ShardRequirements.h"
+
+#include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
+#include "psu-ds/psudb::Alias.h"
+#include "psu-ds/BloomFilter.h"
+#include "util/bf_config.h"
+
+using psudb::CACHELINE_SIZE;
+using psudb::BloomFilter;
+using psudb::PriorityQueue;
+using psudb::queue_record;
+
+namespace de {
+
+thread_local size_t wss_cancelations = 0;
+
+template <WeightedRecordInterface R>
+class Alias {
+private:
+    typedef decltype(R::key) K;
+    typedef decltype(R::value) V;
+    typedef decltype(R::weight) W;
+
+public:
+    Alias(MutableBuffer<R>* buffer)
+    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
+
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
+
+        size_t offset = 0;
+        m_reccnt = 0;
+        auto base = buffer->get_data();
+        auto stop = base + buffer->get_record_count();
+
+        std::sort(base, stop, std::less<Wrapped<R>>());
+
+        std::vector<W> weights;
+
+        while (base < stop) {
+            if (!(base->is_tombstone()) && (base + 1) < stop) {
+                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
+                    base += 2;
+                    wss_cancelations++;
+                    continue;
+                }
+            } else if (base->is_deleted()) {
+                base += 1;
+                continue;
+            }
+
+            // FIXME: this shouldn't be necessary, but the tagged record
+            // bypass doesn't seem to be working on this code-path, so this
+            // ensures that tagged records from the buffer are able to be
+            // dropped, eventually. It should only need to be &= 1
+            base->header &= 3;
+            m_data[m_reccnt++] = *base;
+            m_total_weight+= base->rec.weight;
+            weights.push_back(base->rec.weight);
+
+            if (m_bf && base->is_tombstone()) {
+                m_tombstone_cnt++;
+                m_bf->insert(base->rec);
+            }
+            
+            base++;
+        }
+
+        if (m_reccnt > 0) {
+            build_alias_structure(weights);
+        }
+    }
+
+    Alias(Alias** shards, size_t len)
+    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
+        std::vector<Cursor<Wrapped<R>>> cursors;
+        cursors.reserve(len);
+
+        PriorityQueue<Wrapped<R>> pq(len);
+
+        size_t attemp_reccnt = 0;
+        size_t tombstone_count = 0;
+        
+        for (size_t i = 0; i < len; ++i) {
+            if (shards[i]) {
+                auto base = shards[i]->get_data();
+                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
+                attemp_reccnt += shards[i]->get_record_count();
+                tombstone_count += shards[i]->get_tombstone_count();
+                pq.push(cursors[i].ptr, i);
+            } else {
+                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+            }
+        }
+
+        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        std::vector<W> weights;
+        
+        while (pq.size()) {
+            auto now = pq.peek();
+            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+            if (!now.data->is_tombstone() && next.data != nullptr &&
+                now.data->rec == next.data->rec && next.data->is_tombstone()) {
+                
+                pq.pop(); pq.pop();
+                auto& cursor1 = cursors[now.version];
+                auto& cursor2 = cursors[next.version];
+                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+            } else {
+                auto& cursor = cursors[now.version];
+                if (!cursor.ptr->is_deleted()) {
+                    m_data[m_reccnt++] = *cursor.ptr;
+                    m_total_weight += cursor.ptr->rec.weight;
+                    weights.push_back(cursor.ptr->rec.weight);
+                    if (m_bf && cursor.ptr->is_tombstone()) {
+                        ++m_tombstone_cnt;
+                        if (m_bf) m_bf->insert(cursor.ptr->rec);
+                    }
+                }
+                pq.pop();
+                
+                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+            }
+        }
+
+        if (m_reccnt > 0) {
+            build_alias_structure(weights);
+        }
+   }
+
+    ~Alias() {
+        if (m_data) free(m_data);
+        if (m_alias) delete m_alias;
+        if (m_bf) delete m_bf;
+
+    }
+
+    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+        if (filter && !m_bf->lookup(rec)) {
+            return nullptr;
+        }
+
+        size_t idx = get_lower_bound(rec.key);
+        if (idx >= m_reccnt) {
+            return nullptr;
+        }
+
+        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+
+        if (m_data[idx].rec == rec) {
+            return m_data + idx;
+        }
+
+        return nullptr;
+    }
+
+    Wrapped<R>* get_data() const {
+        return m_data;
+    }
+    
+    size_t get_record_count() const {
+        return m_reccnt;
+    }
+
+    size_t get_tombstone_count() const {
+        return m_tombstone_cnt;
+    }
+
+    const Wrapped<R>* get_record_at(size_t idx) const {
+        if (idx >= m_reccnt) return nullptr;
+        return m_data + idx;
+    }
+
+
+    size_t get_memory_usage() {
+        return m_alloc_size;
+    }
+
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
+private:
+
+    size_t get_lower_bound(const K& key) const {
+        size_t min = 0;
+        size_t max = m_reccnt - 1;
+
+        const char * record_key;
+        while (min < max) {
+            size_t mid = (min + max) / 2;
+
+            if (key > m_data[mid].rec.key) {
+                min = mid + 1;
+            } else {
+                max = mid;
+            }
+        }
+
+        return min;
+    }
+
+    void build_alias_structure(std::vector<W> &weights) {
+
+        // normalize the weights vector
+        std::vector<double> norm_weights(weights.size());
+
+        for (size_t i=0; i<weights.size(); i++) {
+            norm_weights[i] = (double) weights[i] / (double) m_total_weight;
+        }
+
+        // build the alias structure
+        m_alias = new psudb::Alias(norm_weights);
+    }
+
+    Wrapped<R>* m_data;
+    psudb::Alias *m_alias;
+    W m_total_weight;
+    size_t m_reccnt;
+    size_t m_tombstone_cnt;
+    size_t m_group_size;
+    size_t m_alloc_size;
+    BloomFilter<R> *m_bf;
+};
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
new file mode 100644
index 0000000..a610c09
--- /dev/null
+++ b/include/shard/ISAMTree.h
@@ -0,0 +1,339 @@
+/*
+ * include/shard/ISAMTree.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <queue>
+#include <memory>
+
+#include "framework/ShardRequirements.h"
+
+#include "util/bf_config.h"
+#include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
+#include "psu-util/timer.h"
+
+using psudb::CACHELINE_SIZE;
+using psudb::BloomFilter;
+using psudb::PriorityQueue;
+using psudb::queue_record;
+using psudb::Alias;
+
+namespace de {
+
+thread_local size_t mrun_cancelations = 0;
+
+template <RecordInterface R>
+class ISAMTree {
+private:
+
+typedef decltype(R::key) K;
+typedef decltype(R::value) V;
+
+constexpr static size_t inmem_isam_node_size = 256;
+constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*));
+
+struct InternalNode {
+    K keys[inmem_isam_fanout];
+    char* child[inmem_isam_fanout];
+};
+
+constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R);
+constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout;
+
+static_assert(sizeof(InternalNode) == inmem_isam_node_size, "node size does not match");
+
+public:
+    ISAMTree(MutableBuffer<R>* buffer)
+    :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) {
+
+        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
+
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        TIMER_INIT();
+
+        size_t offset = 0;
+        m_reccnt = 0;
+        auto base = buffer->get_data();
+        auto stop = base + buffer->get_record_count();
+
+        TIMER_START();
+        std::sort(base, stop, std::less<Wrapped<R>>());
+        TIMER_STOP();
+        auto sort_time = TIMER_RESULT();
+
+        TIMER_START();
+        while (base < stop) {
+            if (!base->is_tombstone() && (base + 1 < stop)
+                && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
+                base += 2;
+                mrun_cancelations++;
+                continue;
+            } else if (base->is_deleted()) {
+                base += 1;
+                continue;
+            }
+
+            // FIXME: this shouldn't be necessary, but the tagged record
+            // bypass doesn't seem to be working on this code-path, so this
+            // ensures that tagged records from the buffer are able to be
+            // dropped, eventually. It should only need to be &= 1
+            base->header &= 3;
+            m_data[m_reccnt++] = *base;
+            if (m_bf && base->is_tombstone()) {
+                ++m_tombstone_cnt;
+                m_bf->insert(base->rec);
+            }
+
+            base++;
+        }
+        TIMER_STOP();
+        auto copy_time = TIMER_RESULT();
+
+        TIMER_START();
+        if (m_reccnt > 0) {
+            build_internal_levels();
+        }
+        TIMER_STOP();
+        auto level_time = TIMER_RESULT();
+    }
+
+    ISAMTree(ISAMTree** runs, size_t len)
+    : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) {
+        std::vector<Cursor<Wrapped<R>>> cursors;
+        cursors.reserve(len);
+
+        PriorityQueue<Wrapped<R>> pq(len);
+
+        size_t attemp_reccnt = 0;
+        size_t tombstone_count = 0;
+        
+        for (size_t i = 0; i < len; ++i) {
+            if (runs[i]) {
+                auto base = runs[i]->get_data();
+                cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()});
+                attemp_reccnt += runs[i]->get_record_count();
+                tombstone_count += runs[i]->get_tombstone_count();
+                pq.push(cursors[i].ptr, i);
+            } else {
+                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+            }
+        }
+
+        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        size_t offset = 0;
+        
+        while (pq.size()) {
+            auto now = pq.peek();
+            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+            if (!now.data->is_tombstone() && next.data != nullptr &&
+                now.data->rec == next.data->rec && next.data->is_tombstone()) {
+                
+                pq.pop(); pq.pop();
+                auto& cursor1 = cursors[now.version];
+                auto& cursor2 = cursors[next.version];
+                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
+                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
+            } else {
+                auto& cursor = cursors[now.version];
+                if (!cursor.ptr->is_deleted()) {
+                    m_data[m_reccnt++] = *cursor.ptr;
+                    if (cursor.ptr->is_tombstone()) {
+                        ++m_tombstone_cnt;
+                        m_bf->insert(cursor.ptr->rec);
+                    }
+                }
+                pq.pop();
+                
+                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
+            }
+        }
+
+        if (m_reccnt > 0) {
+            build_internal_levels();
+        }
+    }
+
+    ~ISAMTree() {
+        if (m_data) free(m_data);
+        if (m_isam_nodes) free(m_isam_nodes);
+        if (m_bf) delete m_bf;
+    }
+
+    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+        if (filter && !m_bf->lookup(rec)) {
+            return nullptr;
+        }
+
+        size_t idx = get_lower_bound(rec.key);
+        if (idx >= m_reccnt) {
+            return nullptr;
+        }
+
+        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+
+        if (m_data[idx].rec == rec) {
+            return m_data + idx;
+        }
+
+        return nullptr;
+    }
+
+    Wrapped<R>* get_data() const {
+        return m_data;
+    }
+    
+    size_t get_record_count() const {
+        return m_reccnt;
+    }
+
+    size_t get_tombstone_count() const {
+        return m_tombstone_cnt;
+    }
+
+    const Wrapped<R>* get_record_at(size_t idx) const {
+        return (idx < m_reccnt) ? m_data + idx : nullptr;
+    }
+
+    size_t get_memory_usage() {
+        return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
+    }
+
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
+    size_t get_lower_bound(const K& key) const {
+        const InternalNode* now = m_root;
+        while (!is_leaf(reinterpret_cast<const char*>(now))) {
+            const InternalNode* next = nullptr;
+            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+                if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
+                    next = reinterpret_cast<InternalNode*>(now->child[i]);
+                    break;
+                }
+            }
+
+            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+        }
+
+        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
+        while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
+
+        return pos - m_data;
+    }
+
+    size_t get_upper_bound(const K& key) const {
+        const InternalNode* now = m_root;
+        while (!is_leaf(reinterpret_cast<const char*>(now))) {
+            const InternalNode* next = nullptr;
+            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+                if (now->child[i + 1] == nullptr || key < now->keys[i]) {
+                    next = reinterpret_cast<InternalNode*>(now->child[i]);
+                    break;
+                }
+            }
+
+            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+        }
+
+        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
+        while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
+
+        return pos - m_data;
+    }
+
+
+private:
+    void build_internal_levels() {
+        size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0);
+        size_t level_node_cnt = n_leaf_nodes;
+        size_t node_cnt = 0;
+        do {
+            level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0);
+            node_cnt += level_node_cnt;
+        } while (level_node_cnt > 1);
+
+        m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+
+        m_isam_nodes = (InternalNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+        m_internal_node_cnt = node_cnt;
+        memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
+
+        InternalNode* current_node = m_isam_nodes;
+
+        const Wrapped<R>* leaf_base = m_data;
+        const Wrapped<R>* leaf_stop = m_data + m_reccnt;
+        while (leaf_base < leaf_stop) {
+            size_t fanout = 0;
+            for (size_t i = 0; i < inmem_isam_fanout; ++i) {
+                auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i;
+                if (rec_ptr >= leaf_stop) break;
+                const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1);
+                current_node->keys[i] = sep_key->rec.key;
+                current_node->child[i] = (char*)rec_ptr;
+                ++fanout;
+            }
+            current_node++;
+            leaf_base += fanout * inmem_isam_leaf_fanout;
+        }
+
+        auto level_start = m_isam_nodes;
+        auto level_stop = current_node;
+        auto current_level_node_cnt = level_stop - level_start;
+        while (current_level_node_cnt > 1) {
+            auto now = level_start;
+            while (now < level_stop) {
+                size_t child_cnt = 0;
+                for (size_t i = 0; i < inmem_isam_fanout; ++i) {
+                    auto node_ptr = now + i;
+                    ++child_cnt;
+                    if (node_ptr >= level_stop) break;
+                    current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1];
+                    current_node->child[i] = (char*)node_ptr;
+                }
+                now += child_cnt;
+                current_node++;
+            }
+            level_start = level_stop;
+            level_stop = current_node;
+            current_level_node_cnt = level_stop - level_start;
+        }
+        
+        assert(current_level_node_cnt == 1);
+        m_root = level_start;
+    }
+
+    bool is_leaf(const char* ptr) const {
+        return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt);
+    }
+
+    // Members: sorted data, internal ISAM levels, reccnt;
+    Wrapped<R>* m_data;
+    psudb::BloomFilter<R> *m_bf;
+    InternalNode* m_isam_nodes;
+    InternalNode* m_root;
+    size_t m_reccnt;
+    size_t m_tombstone_cnt;
+    size_t m_internal_node_cnt;
+    size_t m_deleted_cnt;
+    size_t m_alloc_size;
+};
+}
diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
deleted file mode 100644
index 6962c19..0000000
--- a/include/shard/MemISAM.h
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * include/shard/MemISAM.h
- *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-#include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-
-#include "framework/ShardRequirements.h"
-
-#include "util/bf_config.h"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-util/timer.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
-using psudb::Alias;
-
-namespace de {
-
-thread_local size_t mrun_cancelations = 0;
-
-template <RecordInterface R>
-class MemISAM {
-private:
-    friend class IRSQuery<R, true>;
-    friend class IRSQuery<R, false>;
-    friend class ISAMRangeQuery<R>;
-
-typedef decltype(R::key) K;
-typedef decltype(R::value) V;
-
-constexpr static size_t inmem_isam_node_size = 256;
-constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*));
-
-struct InMemISAMNode {
-    K keys[inmem_isam_fanout];
-    char* child[inmem_isam_fanout];
-};
-
-constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R);
-constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout;
-
-static_assert(sizeof(InMemISAMNode) == inmem_isam_node_size, "node size does not match");
-
-public:
-    MemISAM(MutableBuffer<R>* buffer)
-    :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) {
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        TIMER_INIT();
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        TIMER_START();
-        std::sort(base, stop, std::less<Wrapped<R>>());
-        TIMER_STOP();
-        auto sort_time = TIMER_RESULT();
-
-        TIMER_START();
-        while (base < stop) {
-            if (!base->is_tombstone() && (base + 1 < stop)
-                && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
-                base += 2;
-                mrun_cancelations++;
-                continue;
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            if (m_bf && base->is_tombstone()) {
-                ++m_tombstone_cnt;
-                m_bf->insert(base->rec);
-            }
-
-            base++;
-        }
-        TIMER_STOP();
-        auto copy_time = TIMER_RESULT();
-
-        TIMER_START();
-        if (m_reccnt > 0) {
-            build_internal_levels();
-        }
-        TIMER_STOP();
-        auto level_time = TIMER_RESULT();
-    }
-
-    MemISAM(MemISAM** runs, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (runs[i]) {
-                auto base = runs[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()});
-                attemp_reccnt += runs[i]->get_record_count();
-                tombstone_count += runs[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
-
-        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        size_t offset = 0;
-        
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    if (cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        if (m_reccnt > 0) {
-            build_internal_levels();
-        }
-    }
-
-    ~MemISAM() {
-        if (m_data) free(m_data);
-        if (m_isam_nodes) free(m_isam_nodes);
-        if (m_bf) delete m_bf;
-    }
-
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
-
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
-
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
-
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
-
-        return nullptr;
-    }
-
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
-
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
-
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        return (idx < m_reccnt) ? m_data + idx : nullptr;
-    }
-
-    size_t get_memory_usage() {
-        return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
-    }
-
-    size_t get_aux_memory_usage() {
-        return 0;
-    }
-
-private:
-    size_t get_lower_bound(const K& key) const {
-        const InMemISAMNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const char*>(now))) {
-            const InMemISAMNode* next = nullptr;
-            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
-                if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
-                    next = reinterpret_cast<InMemISAMNode*>(now->child[i]);
-                    break;
-                }
-            }
-
-            now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]);
-        }
-
-        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
-        while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
-
-        return pos - m_data;
-    }
-
-    size_t get_upper_bound(const K& key) const {
-        const InMemISAMNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const char*>(now))) {
-            const InMemISAMNode* next = nullptr;
-            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
-                if (now->child[i + 1] == nullptr || key < now->keys[i]) {
-                    next = reinterpret_cast<InMemISAMNode*>(now->child[i]);
-                    break;
-                }
-            }
-
-            now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]);
-        }
-
-        const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
-        while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
-
-        return pos - m_data;
-    }
-
-    void build_internal_levels() {
-        size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0);
-        size_t level_node_cnt = n_leaf_nodes;
-        size_t node_cnt = 0;
-        do {
-            level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0);
-            node_cnt += level_node_cnt;
-        } while (level_node_cnt > 1);
-
-        m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-
-        m_isam_nodes = (InMemISAMNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        m_internal_node_cnt = node_cnt;
-        memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
-
-        InMemISAMNode* current_node = m_isam_nodes;
-
-        const Wrapped<R>* leaf_base = m_data;
-        const Wrapped<R>* leaf_stop = m_data + m_reccnt;
-        while (leaf_base < leaf_stop) {
-            size_t fanout = 0;
-            for (size_t i = 0; i < inmem_isam_fanout; ++i) {
-                auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i;
-                if (rec_ptr >= leaf_stop) break;
-                const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1);
-                current_node->keys[i] = sep_key->rec.key;
-                current_node->child[i] = (char*)rec_ptr;
-                ++fanout;
-            }
-            current_node++;
-            leaf_base += fanout * inmem_isam_leaf_fanout;
-        }
-
-        auto level_start = m_isam_nodes;
-        auto level_stop = current_node;
-        auto current_level_node_cnt = level_stop - level_start;
-        while (current_level_node_cnt > 1) {
-            auto now = level_start;
-            while (now < level_stop) {
-                size_t child_cnt = 0;
-                for (size_t i = 0; i < inmem_isam_fanout; ++i) {
-                    auto node_ptr = now + i;
-                    ++child_cnt;
-                    if (node_ptr >= level_stop) break;
-                    current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1];
-                    current_node->child[i] = (char*)node_ptr;
-                }
-                now += child_cnt;
-                current_node++;
-            }
-            level_start = level_stop;
-            level_stop = current_node;
-            current_level_node_cnt = level_stop - level_start;
-        }
-        
-        assert(current_level_node_cnt == 1);
-        m_root = level_start;
-    }
-
-    bool is_leaf(const char* ptr) const {
-        return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt);
-    }
-
-    // Members: sorted data, internal ISAM levels, reccnt;
-    Wrapped<R>* m_data;
-    psudb::BloomFilter<R> *m_bf;
-    InMemISAMNode* m_isam_nodes;
-    InMemISAMNode* m_root;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_internal_node_cnt;
-    size_t m_deleted_cnt;
-    size_t m_alloc_size;
-};
-}
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 6d76376..6b66b7d 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -31,34 +31,6 @@ using psudb::Alias;
 
 namespace de {
 
-template <RecordInterface R>
-struct pgm_range_query_parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-struct PGMPointLookupParms {
-    decltype(R::key) target_key;
-};
-
-template <RecordInterface R>
-class PGMRangeQuery;
-
-template <RecordInterface R>
-class PGMPointLookup;
-
-template <RecordInterface R>
-struct PGMState {
-    size_t start_idx;
-    size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct PGMBufferState {
-    size_t cutoff;
-};
-
 template <RecordInterface R, size_t epsilon=128>
 class PGM {
 private:
@@ -67,11 +39,6 @@ private:
 
 
 public:
-
-    // FIXME: there has to be a better way to do this
-    friend class PGMRangeQuery<R>;
-    friend class PGMPointLookup<R>;
-
     PGM(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0) {
 
@@ -80,8 +47,6 @@ public:
         m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         std::vector<K> keys;
 
-        //m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
         size_t offset = 0;
         m_reccnt = 0;
         auto base = buffer->get_data();
@@ -110,13 +75,6 @@ public:
             base->header &= 3;
             m_data[m_reccnt++] = *base;
             keys.emplace_back(base->rec.key);
-			
-			/*
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
-            }*/
-            
             base++;
         }
 
@@ -148,8 +106,6 @@ public:
             }
         }
 
-        //m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
         m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
         assert(m_alloc_size % CACHELINE_SIZE == 0);
         m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
@@ -172,10 +128,6 @@ public:
                 if (!cursor.ptr->is_deleted()) {
                     m_data[m_reccnt++] = *cursor.ptr;
                     keys.emplace_back(cursor.ptr->rec.key);
-                    /*if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }*/
                 }
                 pq.pop();
                 
@@ -190,15 +142,9 @@ public:
 
     ~PGM() {
         if (m_data) free(m_data);
-        //if (m_bf) delete m_bf;
-
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        //if (filter && !m_bf->lookup(rec)) {
-        //    return nullptr;
-        //}
-
         size_t idx = get_lower_bound(rec.key);
         if (idx >= m_reccnt) {
             return nullptr;
@@ -284,219 +230,6 @@ private:
     K m_max_key;
     K m_min_key;
     pgm::PGMIndex<K, epsilon> m_pgm;
-    //BloomFilter<R> *m_bf;
-};
-template <RecordInterface R>
-class PGMPointLookup {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(PGM<R> *ts, void *parms) {
-        return nullptr;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        return nullptr;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> records;
-        auto p = (PGMPointLookupParms<R> *) parms;
-        auto s = (PGMState<R> *) q_state;
-
-        size_t idx = ts->get_lower_bound(p->target_key);
-        if (ts->get_record_at(idx)->rec.key == p->target_key) {
-            records.emplace_back(*ts->get_record_at(idx));
-        }
-
-        return records;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto p = (PGMPointLookupParms<R> *) parms;
-        auto s = (PGMBufferState<R> *) state;
-
-        std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<buffer->get_record_count(); i++) {
-            auto rec = buffer->get_data() + i;
-            if (rec->rec.key == p->target_key) {
-                records.emplace_back(*rec);
-                return records;
-            }
-        }
-
-        return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        std::vector<R> output;
-        for (size_t i=0 ;i<results.size(); i++) {
-            if (results[i].size() > 0) {
-                output.emplace_back(results[i][0].rec);
-                return output;
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-    }
-
-    static void delete_buffer_query_state(void *state) {
-    }
 };
 
-
-
-template <RecordInterface R>
-class PGMRangeQuery {
-public:
-    constexpr static bool EARLY_ABORT=false;
-	constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(PGM<R> *ts, void *parms) {
-        auto res = new PGMState<R>();
-        auto p = (pgm_range_query_parms<R> *) parms;
-
-        res->start_idx = ts->get_lower_bound(p->lower_bound);
-        res->stop_idx = ts->get_record_count();
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        auto res = new PGMBufferState<R>();
-        res->cutoff = buffer->get_record_count();
-
-        return res;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) {
-		size_t tot = 0;
-        //std::vector<Wrapped<R>> records;
-        auto p = (pgm_range_query_parms<R> *) parms;
-        auto s = (PGMState<R> *) q_state;
-
-        // if the returned index is one past the end of the
-        // records for the PGM, then there are not records
-        // in the index falling into the specified range.
-        if (s->start_idx == ts->get_record_count()) {
-            return {};
-        }
-
-        auto ptr = ts->get_record_at(s->start_idx);
-        
-        // roll the pointer forward to the first record that is
-        // greater than or equal to the lower bound.
-        while(ptr->rec.key < p->lower_bound) {
-            ptr++;
-        }
-
-        while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
-            if (ptr->is_tombstone()) --tot;
-			else if (!ptr->is_deleted()) ++tot;
-			//records.emplace_back(*ptr);
-            ptr++;
-        }
-
-		return {Wrapped<R>{0, {tot, 0}}};
-        //return records;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-		size_t tot = 0;
-        auto p = (pgm_range_query_parms<R> *) parms;
-        auto s = (PGMBufferState<R> *) state;
-
-        //std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-				if (rec->is_tombstone()) --tot;
-				else if (!rec->is_deleted()) ++tot;
-                //records.emplace_back(*rec);
-            }
-        }
-
-		return {Wrapped<R>{0, {tot, 0}}};
-        //return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-		/*std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(results.size());
-
-        PriorityQueue<Wrapped<R>> pq(results.size());
-        size_t total = 0;
-		size_t tmp_n = results.size();
-        
-
-        for (size_t i = 0; i < tmp_n; ++i)
-			if (results[i].size() > 0){
-	            auto base = results[i].data();
-		        cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
-				assert(i == cursors.size() - 1);
-			    total += results[i].size();
-				pq.push(cursors[i].ptr, tmp_n - i - 1);
-			} else {
-				cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-			}
-
-        if (total == 0) {
-            return std::vector<R>();
-        }
-
-        std::vector<R> output;
-        output.reserve(total);
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[tmp_n - now.version - 1];
-                auto& cursor2 = cursors[tmp_n - next.version - 1];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[tmp_n - now.version - 1];
-                if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }*/
-
-		size_t tot = 0;
-		for (auto& result: results)
-			if (result.size() > 0) tot += result[0].rec.key;
-
-        return {{tot, 0}};
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (PGMState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (PGMBufferState<R> *) state;
-        delete s;
-    }
-};
-
-;
-
 }
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index a784a38..fdf8edb 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -30,32 +30,6 @@ using psudb::Alias;
 
 namespace de {
 
-template <RecordInterface R>
-struct ts_range_query_parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-class TrieSplineRangeQuery;
-
-template <RecordInterface R>
-struct TrieSplineState {
-    size_t start_idx;
-    size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct TrieSplineBufferState {
-    size_t cutoff;
-    Alias* alias;
-
-    ~TrieSplineBufferState() {
-        delete alias;
-    }
-
-};
-
 template <RecordInterface R, size_t E=1024>
 class TrieSpline {
 private:
@@ -63,10 +37,6 @@ private:
     typedef decltype(R::value) V;
 
 public:
-
-    // FIXME: there has to be a better way to do this
-    friend class TrieSplineRangeQuery<R>;
-
     TrieSpline(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0) {
 
@@ -254,8 +224,6 @@ public:
         return 0;
     }
 
-private:
-
     size_t get_lower_bound(const K& key) const {
         auto bound = m_ts.GetSearchBound(key);
         size_t idx = bound.begin;
@@ -293,6 +261,8 @@ private:
         return (m_data[idx].rec.key <= key) ? idx : m_reccnt;
     }
 
+private:
+
     Wrapped<R>* m_data;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
@@ -302,154 +272,4 @@ private:
     ts::TrieSpline<K> m_ts;
     BloomFilter<R> *m_bf;
 };
-
-
-template <RecordInterface R>
-class TrieSplineRangeQuery {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(TrieSpline<R> *ts, void *parms) {
-        auto res = new TrieSplineState<R>();
-        auto p = (ts_range_query_parms<R> *) parms;
-
-        res->start_idx = ts->get_lower_bound(p->lower_bound);
-        res->stop_idx = ts->get_record_count();
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        auto res = new TrieSplineBufferState<R>();
-        res->cutoff = buffer->get_record_count();
-
-        return res;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(TrieSpline<R> *ts, void *q_state, void *parms) {
-        //std::vector<Wrapped<R>> records;
-		size_t tot = 0;
-        auto p = (ts_range_query_parms<R> *) parms;
-        auto s = (TrieSplineState<R> *) q_state;
-
-        // if the returned index is one past the end of the
-        // records for the TrieSpline, then there are not records
-        // in the index falling into the specified range.
-        if (s->start_idx == ts->get_record_count()) {
-            return {};
-        }
-
-        auto ptr = ts->get_record_at(s->start_idx);
-
-        // roll the pointer forward to the first record that is
-        // greater than or equal to the lower bound.
-        while(ptr->rec.key < p->lower_bound) {
-            ptr++;
-        }
-
-
-        while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
-            if (ptr->is_tombstone()) --tot;
-			else if (!ptr->is_deleted()) ++tot;
-            //records.emplace_back(*ptr);
-            ptr++;
-        }
-
-		return {Wrapped<R>{0, {tot, 0}}};
-        //return records;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-		size_t tot = 0;
-        auto p = (ts_range_query_parms<R> *) parms;
-        auto s = (TrieSplineBufferState<R> *) state;
-
-        //std::vector<Wrapped<R>> records;
-        for (size_t i=0; i<s->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-            if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-				if (rec->is_tombstone()) --tot;
-				else if (!rec->is_deleted()) ++tot;
-                //records.emplace_back(*rec);
-            }
-
-        }
-
-		return {Wrapped<R>{0, {tot, 0}}};
-        //return records;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-/*
-		std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(results.size());
-
-        PriorityQueue<Wrapped<R>> pq(results.size());
-        size_t total = 0;
-        size_t tmp_n = results.size();
-
-
-        for (size_t i = 0; i < tmp_n; ++i)
-            if (results[i].size() > 0){
-                auto base = results[i].data();
-                cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
-                assert(i == cursors.size() - 1);
-                total += results[i].size();
-                pq.push(cursors[i].ptr, tmp_n - i - 1);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-
-        if (total == 0) {
-            return std::vector<R>();
-        }
-
-        std::vector<R> output;
-        output.reserve(total);
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[tmp_n - now.version - 1];
-                auto& cursor2 = cursors[tmp_n - next.version - 1];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[tmp_n - now.version - 1];
-                if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        return output;*/
-
-		size_t tot = 0;
-		for (auto& result: results)
-			if (result.size() > 0) tot += result[0].rec.key;
-
-        return {{tot, 0}};
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (TrieSplineState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (TrieSplineBufferState<R> *) state;
-        delete s;
-    }
-};
-
 }
diff --git a/include/shard/WSS.h b/include/shard/WSS.h
deleted file mode 100644
index 4e3a326..0000000
--- a/include/shard/WSS.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * include/shard/WSS.h
- *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *                    Dong Xie <dongx@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-
-#include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
-
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-ds/Alias.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
-using psudb::Alias;
-
-namespace de {
-
-thread_local size_t wss_cancelations = 0;
-
-template <WeightedRecordInterface R>
-struct wss_query_parms {
-    size_t sample_size;
-    gsl_rng *rng;
-};
-
-template <WeightedRecordInterface R, bool Rejection>
-class WSSQuery;
-
-template <WeightedRecordInterface R>
-struct WSSState {
-    decltype(R::weight) total_weight;
-    size_t sample_size;
-
-    WSSState() {
-        total_weight = 0;
-    }
-};
-
-template <WeightedRecordInterface R>
-struct WSSBufferState {
-    size_t cutoff;
-    size_t sample_size;
-    Alias* alias;
-    decltype(R::weight) max_weight;
-    decltype(R::weight) total_weight;
-
-    ~WSSBufferState() {
-        delete alias;
-    }
-
-};
-
-template <WeightedRecordInterface R>
-class WSS {
-private:
-    typedef decltype(R::key) K;
-    typedef decltype(R::value) V;
-    typedef decltype(R::weight) W;
-
-public:
-
-    // FIXME: there has to be a better way to do this
-    friend class WSSQuery<R, true>;
-    friend class WSSQuery<R, false>;
-
-    WSS(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        std::vector<W> weights;
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    wss_cancelations++;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            m_total_weight+= base->rec.weight;
-            weights.push_back(base->rec.weight);
-
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
-            }
-            
-            base++;
-        }
-
-        if (m_reccnt > 0) {
-            build_alias_structure(weights);
-        }
-    }
-
-    WSS(WSS** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
-
-        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        std::vector<W> weights;
-        
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    m_total_weight += cursor.ptr->rec.weight;
-                    weights.push_back(cursor.ptr->rec.weight);
-                    if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        if (m_reccnt > 0) {
-            build_alias_structure(weights);
-        }
-   }
-
-    ~WSS() {
-        if (m_data) free(m_data);
-        if (m_alias) delete m_alias;
-        if (m_bf) delete m_bf;
-
-    }
-
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
-
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
-
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
-
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
-
-        return nullptr;
-    }
-
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
-
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
-
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        if (idx >= m_reccnt) return nullptr;
-        return m_data + idx;
-    }
-
-
-    size_t get_memory_usage() {
-        return m_alloc_size;
-    }
-
-    size_t get_aux_memory_usage() {
-        return 0;
-    }
-
-private:
-
-    size_t get_lower_bound(const K& key) const {
-        size_t min = 0;
-        size_t max = m_reccnt - 1;
-
-        const char * record_key;
-        while (min < max) {
-            size_t mid = (min + max) / 2;
-
-            if (key > m_data[mid].rec.key) {
-                min = mid + 1;
-            } else {
-                max = mid;
-            }
-        }
-
-        return min;
-    }
-
-    void build_alias_structure(std::vector<W> &weights) {
-
-        // normalize the weights vector
-        std::vector<double> norm_weights(weights.size());
-
-        for (size_t i=0; i<weights.size(); i++) {
-            norm_weights[i] = (double) weights[i] / (double) m_total_weight;
-        }
-
-        // build the alias structure
-        m_alias = new Alias(norm_weights);
-    }
-
-    Wrapped<R>* m_data;
-    Alias *m_alias;
-    W m_total_weight;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_group_size;
-    size_t m_alloc_size;
-    BloomFilter<R> *m_bf;
-};
-
-
-template <WeightedRecordInterface R, bool Rejection=true>
-class WSSQuery {
-public:
-
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(WSS<R> *wss, void *parms) {
-        auto res = new WSSState<R>();
-        res->total_weight = wss->m_total_weight;
-        res->sample_size = 0;
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        WSSBufferState<R> *state = new WSSBufferState<R>();
-        auto parameters = (wss_query_parms<R>*) parms;
-        if constexpr (Rejection) {
-            state->cutoff = buffer->get_record_count() - 1;
-            state->max_weight = buffer->get_max_weight();
-            state->total_weight = buffer->get_total_weight();
-            return state;
-        }
-
-        std::vector<double> weights;
-
-        state->cutoff = buffer->get_record_count() - 1;
-        double total_weight = 0.0;
-
-        for (size_t i = 0; i <= state->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-            weights.push_back(rec->rec.weight);
-            total_weight += rec->rec.weight;
-        }
-
-        for (size_t i = 0; i < weights.size(); i++) {
-            weights[i] = weights[i] / total_weight;
-        }
-
-        state->alias = new Alias(weights);
-        state->total_weight = total_weight;
-
-        return state;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        auto p = (wss_query_parms<R> *) query_parms;
-        auto bs = (WSSBufferState<R> *) buff_state;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
-        size_t buffer_sz = 0;
-
-        std::vector<decltype(R::weight)> weights;
-        weights.push_back(bs->total_weight);
-
-        decltype(R::weight) total_weight = 0;
-        for (auto &s : shard_states) {
-            auto state = (WSSState<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        std::vector<double> normalized_weights;
-        for (auto w : weights) {
-            normalized_weights.push_back((double) w / (double) total_weight);
-        }
-
-        auto shard_alias = Alias(normalized_weights);
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = shard_alias.get(p->rng);            
-            if (idx == 0) {
-                buffer_sz++;
-            } else {
-                shard_sample_sizes[idx - 1]++;
-            }
-        }
-
-
-        bs->sample_size = buffer_sz;
-        for (size_t i=0; i<shard_states.size(); i++) {
-            auto state = (WSSState<R> *) shard_states[i];
-            state->sample_size = shard_sample_sizes[i+1];
-        }
-    }
-
-    static std::vector<Wrapped<R>> query(WSS<R> *wss, void *q_state, void *parms) { 
-        auto rng = ((wss_query_parms<R> *) parms)->rng;
-
-        auto state = (WSSState<R> *) q_state;
-        auto sample_size = state->sample_size;
-
-        std::vector<Wrapped<R>> result_set;
-
-        if (sample_size == 0) {
-            return result_set;
-        }
-        size_t attempts = 0;
-        do {
-            attempts++;
-            size_t idx = wss->m_alias->get(rng);
-            result_set.emplace_back(*wss->get_record_at(idx));
-        } while (attempts < sample_size);
-
-        return result_set;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto st = (WSSBufferState<R> *) state;
-        auto p = (wss_query_parms<R> *) parms;
-
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
-
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = buffer->get_data() + idx;
-
-                auto test = gsl_rng_uniform(p->rng) * st->max_weight;
-
-                if (test <= rec->rec.weight) {
-                    result.emplace_back(*rec);
-                }
-            }
-            return result;
-        }
-
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = st->alias->get(p->rng);
-            result.emplace_back(*(buffer->get_data() + idx));
-        }
-
-        return result;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        std::vector<R> output;
-
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (WSSState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (WSSBufferState<R> *) state;
-        delete s;
-    }
-};
-
-}
-- 
cgit v1.2.3


From a2fe4b1616a1b2318f70e842382818ee44aea9e6 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Tue, 7 Nov 2023 12:29:03 -0500
Subject: Alias shard fixes

---
 include/shard/Alias.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index b6b16c5..a4a7d02 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -19,7 +19,7 @@
 
 #include "psu-ds/PriorityQueue.h"
 #include "util/Cursor.h"
-#include "psu-ds/psudb::Alias.h"
+#include "psu-ds/Alias.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
 
@@ -207,7 +207,13 @@ public:
         return 0;
     }
 
-private:
+    W get_total_weight() {
+        return m_total_weight;
+    }
+
+    size_t get_weighted_sample(gsl_rng *rng) const {
+        return m_alias->get(rng);
+    }
 
     size_t get_lower_bound(const K& key) const {
         size_t min = 0;
@@ -227,6 +233,8 @@ private:
         return min;
     }
 
+private:
+
     void build_alias_structure(std::vector<W> &weights) {
 
         // normalize the weights vector
@@ -249,3 +257,4 @@ private:
     size_t m_alloc_size;
     BloomFilter<R> *m_bf;
 };
+}
-- 
cgit v1.2.3


From 9e1c1b1b930031896851b1ed4a15152508327d73 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Tue, 7 Nov 2023 13:35:54 -0500
Subject: Converted WIRS to the new interface

---
 include/shard/AugBTree.h | 371 +++++++++++++++++++++++++++++
 include/shard/WIRS.h     | 594 -----------------------------------------------
 2 files changed, 371 insertions(+), 594 deletions(-)
 create mode 100644 include/shard/AugBTree.h
 delete mode 100644 include/shard/WIRS.h

(limited to 'include/shard')

diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
new file mode 100644
index 0000000..e32ec64
--- /dev/null
+++ b/include/shard/AugBTree.h
@@ -0,0 +1,371 @@
+/*
+ * include/shard/AugBTree.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *                    Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+
+#include <vector>
+#include <cassert>
+#include <queue>
+#include <memory>
+#include <concepts>
+
+#include "framework/ShardRequirements.h"
+
+#include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
+#include "psu-ds/Alias.h"
+#include "psu-ds/BloomFilter.h"
+#include "util/bf_config.h"
+
+using psudb::CACHELINE_SIZE;
+using psudb::BloomFilter;
+using psudb::PriorityQueue;
+using psudb::queue_record;
+using psudb::Alias;
+
+namespace de {
+
+thread_local size_t wirs_cancelations = 0;
+
+template <WeightedRecordInterface R>
+struct AugBTreeNode {
+    struct AugBTreeNode<R> *left, *right;
+    decltype(R::key) low, high;
+    decltype(R::weight) weight;
+    Alias* alias;
+};
+
+template <WeightedRecordInterface R>
+class AugBTree {
+private:
+    typedef decltype(R::key) K;
+    typedef decltype(R::value) V;
+    typedef decltype(R::weight) W;
+
+public:
+    AugBTree(MutableBuffer<R>* buffer)
+    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
+
+        size_t offset = 0;
+        m_reccnt = 0;
+        auto base = buffer->get_data();
+        auto stop = base + buffer->get_record_count();
+
+        std::sort(base, stop, std::less<Wrapped<R>>());
+
+        while (base < stop) {
+            if (!(base->is_tombstone()) && (base + 1) < stop) {
+                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
+                    base += 2;
+                    wirs_cancelations++;
+                    continue;
+                }
+            } else if (base->is_deleted()) {
+                base += 1;
+                continue;
+            }
+
+            // FIXME: this shouldn't be necessary, but the tagged record
+            // bypass doesn't seem to be working on this code-path, so this
+            // ensures that tagged records from the buffer are able to be
+            // dropped, eventually. It should only need to be &= 1
+            base->header &= 3;
+            m_data[m_reccnt++] = *base;
+            m_total_weight+= base->rec.weight;
+
+            if (m_bf && base->is_tombstone()) {
+                m_tombstone_cnt++;
+                m_bf->insert(base->rec);
+            }
+            
+            base++;
+        }
+
+        if (m_reccnt > 0) {
+            build_wirs_structure();
+        }
+    }
+
+    AugBTree(AugBTree** shards, size_t len)
+    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
+        std::vector<Cursor<Wrapped<R>>> cursors;
+        cursors.reserve(len);
+
+        PriorityQueue<Wrapped<R>> pq(len);
+
+        size_t attemp_reccnt = 0;
+        size_t tombstone_count = 0;
+        
+        for (size_t i = 0; i < len; ++i) {
+            if (shards[i]) {
+                auto base = shards[i]->get_data();
+                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
+                attemp_reccnt += shards[i]->get_record_count();
+                tombstone_count += shards[i]->get_tombstone_count();
+                pq.push(cursors[i].ptr, i);
+            } else {
+                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+            }
+        }
+
+        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+        
+        while (pq.size()) {
+            auto now = pq.peek();
+            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+            if (!now.data->is_tombstone() && next.data != nullptr &&
+                now.data->rec == next.data->rec && next.data->is_tombstone()) {
+                
+                pq.pop(); pq.pop();
+                auto& cursor1 = cursors[now.version];
+                auto& cursor2 = cursors[next.version];
+                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+            } else {
+                auto& cursor = cursors[now.version];
+                if (!cursor.ptr->is_deleted()) {
+                    m_data[m_reccnt++] = *cursor.ptr;
+                    m_total_weight += cursor.ptr->rec.weight;
+                    if (m_bf && cursor.ptr->is_tombstone()) {
+                        ++m_tombstone_cnt;
+                        if (m_bf) m_bf->insert(cursor.ptr->rec);
+                    }
+                }
+                pq.pop();
+                
+                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+            }
+        }
+
+        if (m_reccnt > 0) {
+            build_wirs_structure();
+        }
+   }
+
+    ~AugBTree() {
+        if (m_data) free(m_data);
+        for (size_t i=0; i<m_alias.size(); i++) {
+            if (m_alias[i]) delete m_alias[i];
+        }
+
+        if (m_bf) delete m_bf;
+
+        free_tree(m_root);
+    }
+
+    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+        if (filter && !m_bf->lookup(rec)) {
+            return nullptr;
+        }
+
+        size_t idx = get_lower_bound(rec.key);
+        if (idx >= m_reccnt) {
+            return nullptr;
+        }
+
+        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+
+        if (m_data[idx].rec == rec) {
+            return m_data + idx;
+        }
+
+        return nullptr;
+    }
+
+    Wrapped<R>* get_data() const {
+        return m_data;
+    }
+    
+    size_t get_record_count() const {
+        return m_reccnt;
+    }
+
+    size_t get_tombstone_count() const {
+        return m_tombstone_cnt;
+    }
+
+    const Wrapped<R>* get_record_at(size_t idx) const {
+        if (idx >= m_reccnt) return nullptr;
+        return m_data + idx;
+    }
+
+
+    size_t get_memory_usage() {
+        return m_alloc_size + m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>);
+    }
+
+    size_t get_aux_memory_usage() {
+        return 0;
+    }
+
+    size_t get_lower_bound(const K& key) const {
+        size_t min = 0;
+        size_t max = m_reccnt - 1;
+
+        const char * record_key;
+        while (min < max) {
+            size_t mid = (min + max) / 2;
+
+            if (key > m_data[mid].rec.key) {
+                min = mid + 1;
+            } else {
+                max = mid;
+            }
+        }
+
+        return min;
+    }
+
+    W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) {
+        W total_weight = 0;
+        
+        /* Simulate a stack to unfold recursion. */       
+        struct AugBTreeNode<R>* st[64] = {0};
+        st[0] = m_root;
+        size_t top = 1;
+        while(top > 0) {
+            auto now = st[--top];
+            if (covered_by(now, lower_key, upper_key) ||
+                (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) {
+                nodes.emplace_back(now);
+                weights.emplace_back(now->weight);
+                total_weight += now->weight;
+            } else {
+                if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
+                if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
+            }
+        }
+        
+
+        return total_weight;
+    }
+
+    Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) {
+        /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */
+
+        /* first level */ 
+        auto node = (AugBTreeNode<R>*) internal_node;
+
+        /* second level */
+        auto fat_point = node->low + node->alias->get(rng);
+
+        /* third level */
+        size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng);
+        auto record = m_data + rec_offset;
+
+        /* bounds rejection */
+        if (lower_key > record->rec.key || upper_key < record->rec.key) {
+            return nullptr;
+        } 
+
+        return record;
+    }
+
+private:
+
+    bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
+        auto low_index = node->low * m_group_size;
+        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
+        return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key;
+    }
+
+    bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
+        auto low_index = node->low * m_group_size;
+        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
+        return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key;
+    }
+
+    void build_wirs_structure() {
+        m_group_size = std::ceil(std::log(m_reccnt));
+        size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size);
+        
+        // Fat point construction + low level alias....
+        double sum_weight = 0.0;
+        std::vector<W> weights;
+        std::vector<double> group_norm_weight;
+        size_t i = 0;
+        size_t group_no = 0;
+        while (i < m_reccnt) {
+            double group_weight = 0.0;
+            group_norm_weight.clear();
+            for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) {
+                auto w = m_data[i].rec.weight;
+                group_norm_weight.emplace_back(w);
+                group_weight += w;
+                sum_weight += w;
+            }
+
+            for (auto& w: group_norm_weight)
+                if (group_weight) w /= group_weight;
+                else w = 1.0 / group_norm_weight.size();
+            m_alias.emplace_back(new Alias(group_norm_weight));
+
+            
+            weights.emplace_back(group_weight);
+        }
+
+        assert(weights.size() == n_groups);
+
+        m_root = construct_AugBTreeNode(weights, 0, n_groups-1);
+    }
+
+     struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) {
+        if (low == high) {
+            return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
+        } else if (low > high) return nullptr;
+
+        std::vector<double> node_weights;
+        W sum = 0;
+        for (size_t i = low; i < high; ++i) {
+            node_weights.emplace_back(weights[i]);
+            sum += weights[i];
+        }
+
+        for (auto& w: node_weights)
+            if (sum) w /= sum;
+            else w = 1.0 / node_weights.size();
+        
+        m_node_cnt += 1; 
+        size_t mid = (low + high) / 2;
+        return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid),
+                                construct_AugBTreeNode(weights, mid + 1, high),
+                                low, high, sum, new Alias(node_weights)};
+    }
+
+    void free_tree(struct AugBTreeNode<R>* node) {
+        if (node) {
+            delete node->alias;
+            free_tree(node->left);
+            free_tree(node->right);
+            delete node;
+        }
+    }
+
+    Wrapped<R>* m_data;
+    std::vector<Alias *> m_alias;
+    AugBTreeNode<R>* m_root;
+    W m_total_weight;
+    size_t m_reccnt;
+    size_t m_tombstone_cnt;
+    size_t m_group_size;
+    size_t m_alloc_size;
+    size_t m_node_cnt;
+    BloomFilter<R> *m_bf;
+};
+}
diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
deleted file mode 100644
index bf29325..0000000
--- a/include/shard/WIRS.h
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * include/shard/WIRS.h
- *
- * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
- *                    Douglas B. Rumbaugh <drumbaugh@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-
-#include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
-
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-ds/Alias.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
-using psudb::Alias;
-
-namespace de {
-
-thread_local size_t wirs_cancelations = 0;
-
-template <WeightedRecordInterface R>
-struct wirs_query_parms {
-    decltype(R::key) lower_bound;
-    decltype(R::key) upper_bound;
-    size_t sample_size;
-    gsl_rng *rng;
-};
-
-template <WeightedRecordInterface R, bool Rejection>
-class WIRSQuery;
-
-template <WeightedRecordInterface R>
-struct wirs_node {
-    struct wirs_node<R> *left, *right;
-    decltype(R::key) low, high;
-    decltype(R::weight) weight;
-    Alias* alias;
-};
-
-template <WeightedRecordInterface R>
-struct WIRSState {
-    decltype(R::weight) total_weight;
-    std::vector<wirs_node<R>*> nodes;
-    Alias* top_level_alias;
-    size_t sample_size;
-
-    WIRSState() {
-        total_weight = 0;
-        top_level_alias = nullptr;
-    }
-
-    ~WIRSState() {
-        if (top_level_alias) delete top_level_alias;
-    }
-};
-
-template <WeightedRecordInterface R>
-struct WIRSBufferState {
-    size_t cutoff;
-    Alias* alias;
-    std::vector<Wrapped<R>> records;
-    decltype(R::weight) max_weight;
-    size_t sample_size;
-    decltype(R::weight) total_weight;
-
-    ~WIRSBufferState() {
-        delete alias;
-    }
-
-};
-
-template <WeightedRecordInterface R>
-class WIRS {
-private:
-
-    typedef decltype(R::key) K;
-    typedef decltype(R::value) V;
-    typedef decltype(R::weight) W;
-
-public:
-
-    // FIXME: there has to be a better way to do this
-    friend class WIRSQuery<R, true>;
-    friend class WIRSQuery<R, false>;
-
-    WIRS(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    wirs_cancelations++;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            m_total_weight+= base->rec.weight;
-
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
-            }
-            
-            base++;
-        }
-
-        if (m_reccnt > 0) {
-            build_wirs_structure();
-        }
-    }
-
-    WIRS(WIRS** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
-
-        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    m_total_weight += cursor.ptr->rec.weight;
-                    if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
-
-        if (m_reccnt > 0) {
-            build_wirs_structure();
-        }
-   }
-
-    ~WIRS() {
-        if (m_data) free(m_data);
-        for (size_t i=0; i<m_alias.size(); i++) {
-            if (m_alias[i]) delete m_alias[i];
-        }
-
-        if (m_bf) delete m_bf;
-
-        free_tree(m_root);
-    }
-
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
-        if (filter && !m_bf->lookup(rec)) {
-            return nullptr;
-        }
-
-        size_t idx = get_lower_bound(rec.key);
-        if (idx >= m_reccnt) {
-            return nullptr;
-        }
-
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
-
-        if (m_data[idx].rec == rec) {
-            return m_data + idx;
-        }
-
-        return nullptr;
-    }
-
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
-
-    size_t get_tombstone_count() const {
-        return m_tombstone_cnt;
-    }
-
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        if (idx >= m_reccnt) return nullptr;
-        return m_data + idx;
-    }
-
-
-    size_t get_memory_usage() {
-        return m_alloc_size + m_node_cnt * sizeof(wirs_node<Wrapped<R>>);
-    }
-
-    size_t get_aux_memory_usage() {
-        return 0;
-    }
-
-private:
-
-    size_t get_lower_bound(const K& key) const {
-        size_t min = 0;
-        size_t max = m_reccnt - 1;
-
-        const char * record_key;
-        while (min < max) {
-            size_t mid = (min + max) / 2;
-
-            if (key > m_data[mid].rec.key) {
-                min = mid + 1;
-            } else {
-                max = mid;
-            }
-        }
-
-        return min;
-    }
-
-    bool covered_by(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) {
-        auto low_index = node->low * m_group_size;
-        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
-        return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key;
-    }
-
-    bool intersects(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) {
-        auto low_index = node->low * m_group_size;
-        auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
-        return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key;
-    }
-
-    void build_wirs_structure() {
-        m_group_size = std::ceil(std::log(m_reccnt));
-        size_t n_groups = std::ceil((double) m_reccnt / (double) m_group_size);
-        
-        // Fat point construction + low level alias....
-        double sum_weight = 0.0;
-        std::vector<W> weights;
-        std::vector<double> group_norm_weight;
-        size_t i = 0;
-        size_t group_no = 0;
-        while (i < m_reccnt) {
-            double group_weight = 0.0;
-            group_norm_weight.clear();
-            for (size_t k = 0; k < m_group_size && i < m_reccnt; ++k, ++i) {
-                auto w = m_data[i].rec.weight;
-                group_norm_weight.emplace_back(w);
-                group_weight += w;
-                sum_weight += w;
-            }
-
-            for (auto& w: group_norm_weight)
-                if (group_weight) w /= group_weight;
-                else w = 1.0 / group_norm_weight.size();
-            m_alias.emplace_back(new Alias(group_norm_weight));
-
-            
-            weights.emplace_back(group_weight);
-        }
-
-        assert(weights.size() == n_groups);
-
-        m_root = construct_wirs_node(weights, 0, n_groups-1);
-    }
-
-     struct wirs_node<R>* construct_wirs_node(const std::vector<W>& weights, size_t low, size_t high) {
-        if (low == high) {
-            return new wirs_node<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
-        } else if (low > high) return nullptr;
-
-        std::vector<double> node_weights;
-        W sum = 0;
-        for (size_t i = low; i < high; ++i) {
-            node_weights.emplace_back(weights[i]);
-            sum += weights[i];
-        }
-
-        for (auto& w: node_weights)
-            if (sum) w /= sum;
-            else w = 1.0 / node_weights.size();
-        
-        m_node_cnt += 1; 
-        size_t mid = (low + high) / 2;
-        return new wirs_node<R>{construct_wirs_node(weights, low, mid),
-                                construct_wirs_node(weights, mid + 1, high),
-                                low, high, sum, new Alias(node_weights)};
-    }
-
-    void free_tree(struct wirs_node<R>* node) {
-        if (node) {
-            delete node->alias;
-            free_tree(node->left);
-            free_tree(node->right);
-            delete node;
-        }
-    }
-
-    Wrapped<R>* m_data;
-    std::vector<Alias *> m_alias;
-    wirs_node<R>* m_root;
-    W m_total_weight;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_group_size;
-    size_t m_alloc_size;
-    size_t m_node_cnt;
-    BloomFilter<R> *m_bf;
-};
-
-
-template <WeightedRecordInterface R, bool Rejection=true>
-class WIRSQuery {
-public:
-
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=false;
-
-    static void *get_query_state(WIRS<R> *wirs, void *parms) {
-        auto res = new WIRSState<R>();
-        decltype(R::key) lower_key = ((wirs_query_parms<R> *) parms)->lower_bound;
-        decltype(R::key) upper_key = ((wirs_query_parms<R> *) parms)->upper_bound;
-
-        // Simulate a stack to unfold recursion.        
-        double total_weight = 0.0;
-        struct wirs_node<R>* st[64] = {0};
-        st[0] = wirs->m_root;
-        size_t top = 1;
-        while(top > 0) {
-            auto now = st[--top];
-            if (wirs->covered_by(now, lower_key, upper_key) ||
-                (now->left == nullptr && now->right == nullptr && wirs->intersects(now, lower_key, upper_key))) {
-                res->nodes.emplace_back(now);
-                total_weight += now->weight;
-            } else {
-                if (now->left && wirs->intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
-                if (now->right && wirs->intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
-            }
-        }
-        
-        std::vector<double> weights;
-        for (const auto& node: res->nodes) {
-            weights.emplace_back(node->weight / total_weight);
-        }
-        res->total_weight = total_weight;
-        res->top_level_alias = new Alias(weights);
-        res->sample_size = 0;
-
-        return res;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        WIRSBufferState<R> *state = new WIRSBufferState<R>();
-        auto parameters = (wirs_query_parms<R>*) parms;
-        if constexpr (Rejection) {
-            state->cutoff = buffer->get_record_count() - 1;
-            state->max_weight = buffer->get_max_weight();
-            state->total_weight = buffer->get_total_weight();
-            state->sample_size = 0;
-            return state;
-        }
-
-        std::vector<double> weights;
-
-        state->cutoff = buffer->get_record_count() - 1;
-        double total_weight = 0.0;
-
-        for (size_t i = 0; i <= state->cutoff; i++) {
-            auto rec = buffer->get_data() + i;
-
-            if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) {
-              weights.push_back(rec->rec.weight);
-              state->records.push_back(*rec);
-              total_weight += rec->rec.weight;
-            }
-        }
-
-        for (size_t i = 0; i < weights.size(); i++) {
-            weights[i] = weights[i] / total_weight;
-        }
-
-        state->total_weight = total_weight;
-        state->alias = new Alias(weights);
-        state->sample_size = 0;
-
-        return state;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buff_states) {
-        // FIXME: need to redo for the buffer vector interface
-        auto p = (wirs_query_parms<R> *) query_parms;
-
-        std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
-        size_t buffer_sz = 0;
-
-        decltype(R::weight) total_weight = 0;
-        std::vector<decltype(R::weight)> weights;
-        for (auto &s : buff_states) {
-            auto state = (WIRSBufferState<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        for (auto &s : shard_states) {
-            auto state = (WIRSState<R> *) s;
-            total_weight += state->total_weight;
-            weights.push_back(state->total_weight);
-        }
-
-        std::vector<double> normalized_weights;
-        for (auto w : weights) {
-            normalized_weights.push_back((double) w / (double) total_weight);
-        }
-
-        auto shard_alias = Alias(normalized_weights);
-        for (size_t i=0; i<p->sample_size; i++) {
-            auto idx = shard_alias.get(p->rng);            
-            if (idx == 0) {
-                buffer_sz++;
-            } else {
-                shard_sample_sizes[idx - 1]++;
-            }
-        }
-
-        for (size_t i=0; i<shard_states.size(); i++) {
-            auto state = (WIRSState<R> *) shard_states[i];
-            state->sample_size = shard_sample_sizes[i+1];
-        }
-    }
-
-
-
-    static std::vector<Wrapped<R>> query(WIRS<R> *wirs, void *q_state, void *parms) { 
-        auto lower_key = ((wirs_query_parms<R> *) parms)->lower_bound;
-        auto upper_key = ((wirs_query_parms<R> *) parms)->upper_bound;
-        auto rng = ((wirs_query_parms<R> *) parms)->rng;
-
-        auto state = (WIRSState<R> *) q_state;
-        auto sample_size = state->sample_size;
-
-        std::vector<Wrapped<R>> result_set;
-
-        if (sample_size == 0) {
-            return result_set;
-        }
-        // k -> sampling: three levels. 1. select a node -> select a fat point -> select a record.
-        size_t cnt = 0;
-        size_t attempts = 0;
-        do {
-            ++attempts;
-            // first level....
-            auto node = state->nodes[state->top_level_alias->get(rng)];
-            // second level...
-            auto fat_point = node->low + node->alias->get(rng);
-            // third level...
-            size_t rec_offset = fat_point * wirs->m_group_size + wirs->m_alias[fat_point]->get(rng);
-            auto record = wirs->m_data + rec_offset;
-
-            // bounds rejection
-            if (lower_key > record->rec.key || upper_key < record->rec.key) {
-                continue;
-            } 
-
-            result_set.emplace_back(*record);
-            cnt++;
-        } while (attempts < sample_size);
-
-        return result_set;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        auto st = (WIRSBufferState<R> *) state;
-        auto p = (wirs_query_parms<R> *) parms;
-
-        std::vector<Wrapped<R>> result;
-        result.reserve(st->sample_size);
-
-        if constexpr (Rejection) {
-            for (size_t i=0; i<st->sample_size; i++) {
-                auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
-                auto rec = buffer->get_data() + idx;
-
-                auto test = gsl_rng_uniform(p->rng) * st->max_weight;
-
-                if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
-                    result.emplace_back(*rec);
-                }
-            }
-            return result;
-        }
-
-        for (size_t i=0; i<st->sample_size; i++) {
-            auto idx = st->alias->get(p->rng);
-            result.emplace_back(st->records[idx]);
-        }
-
-        return result;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        std::vector<R> output;
-
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                output.emplace_back(results[i][j].rec);
-            }
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (WIRSState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (WIRSBufferState<R> *) state;
-        delete s;
-    }
-
-
-    //{q.get_buffer_query_state(p, p)};
-    //{q.buffer_query(p, p)};
-
-};
-
-}
-- 
cgit v1.2.3


From 357cab549c2ed33970562b84ff6f83923742343d Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Tue, 7 Nov 2023 15:34:24 -0500
Subject: Comment and License updates

---
 include/shard/Alias.h      | 6 +++++-
 include/shard/AugBTree.h   | 6 +++++-
 include/shard/ISAMTree.h   | 4 +++-
 include/shard/PGM.h        | 6 +++++-
 include/shard/TrieSpline.h | 4 +++-
 include/shard/VPTree.h     | 7 ++++++-
 6 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index a4a7d02..a3e8ad8 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -4,7 +4,11 @@
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *                    Dong Xie <dongx@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
+ *
+ * A shard shim around the psudb::Alias Walker's Alias
+ * structure. Designed to be used along side the WSS
+ * query in include/query/wss.h
  *
  */
 #pragma once
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
index e32ec64..be664ac 100644
--- a/include/shard/AugBTree.h
+++ b/include/shard/AugBTree.h
@@ -4,8 +4,12 @@
  * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
  *                    Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
  *
+ * A shard shim around the alias augmented B-tree. Designed to be 
+ * used along side the WIRS query in include/query/wirs.h, but
+ * also supports the necessary methods for other common query
+ * types.
  */
 #pragma once
 
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index a610c09..e11c899 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -4,7 +4,9 @@
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
  *                    Dong Xie <dongx@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
+ *
+ * A shard shim around an in-memory ISAM tree.
  *
  */
 #pragma once
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 6b66b7d..13db26a 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -2,8 +2,12 @@
  * include/shard/PGM.h
  *
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *                    Dong Xie <dongx@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
+ *
+ * A shard shim around the static version of the PGM learned
+ * index.
  *
  */
 #pragma once
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index fdf8edb..56ec357 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -3,7 +3,9 @@
  *
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
+ *
+ * A shard shim around the TrieSpline learned index.
  *
  */
 #pragma once
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 978372b..2f5ebbb 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -3,7 +3,12 @@
  *
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
  *
- * All rights reserved. Published under the Modified BSD License.
+ * Distributed under the Modified BSD License.
+ *
+ * A shard shim around the VPTree spatial index. 
+ *
+ * FIXME: separate the KNN query class out into a standalone
+ * file in include/query . 
  *
  */
 #pragma once
-- 
cgit v1.2.3


From 5a2c378aad3f1a9923db3191ffaa3fb807d392b2 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Thu, 11 Jan 2024 15:29:51 -0500
Subject: Ported ISAMTree over to new buffer setup

I may still play with the shard from shards constructor, and queries
need some work yet too.
---
 include/shard/ISAMTree.h | 125 ++++++++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 60 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index e11c899..6b2f6b5 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -13,8 +13,6 @@
 
 #include <vector>
 #include <cassert>
-#include <queue>
-#include <memory>
 
 #include "framework/ShardRequirements.h"
 
@@ -27,52 +25,54 @@ using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
-using psudb::Alias;
 
 namespace de {
 
-thread_local size_t mrun_cancelations = 0;
-
-template <RecordInterface R>
+template <KVPInterface R>
 class ISAMTree {
 private:
 
 typedef decltype(R::key) K;
 typedef decltype(R::value) V;
 
-constexpr static size_t inmem_isam_node_size = 256;
-constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*));
+constexpr static size_t NODE_SZ = 256;
+constexpr static size_t INTERNAL_FANOUT = NODE_SZ / (sizeof(K) + sizeof(byte*));
 
 struct InternalNode {
-    K keys[inmem_isam_fanout];
-    char* child[inmem_isam_fanout];
+    K keys[INTERNAL_FANOUT];
+    byte* child[INTERNAL_FANOUT];
 };
 
-constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R);
-constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout;
-
-static_assert(sizeof(InternalNode) == inmem_isam_node_size, "node size does not match");
+static_assert(sizeof(InternalNode) == NODE_SZ, "node size does not match");
 
-public:
-    ISAMTree(MutableBuffer<R>* buffer)
-    :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) {
+constexpr static size_t LEAF_FANOUT = NODE_SZ / sizeof(R);
 
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
+public:
+    ISAMTree(BufferView<R> buffer)
+        : m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
+        , m_isam_nodes(nullptr)
+        , m_root(nullptr)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_internal_node_cnt(0)
+        , m_deleted_cnt(0)
+        , m_alloc_size(0)
+        , m_data(nullptr)
+    {
         TIMER_INIT();
 
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>), (byte**) &m_data);
 
         TIMER_START();
+        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>));
+        buffer.copy_to_buffer((byte *) temp_buffer);
+
+        auto base = temp_buffer;
+        auto stop = base + buffer.get_record_count();
         std::sort(base, stop, std::less<Wrapped<R>>());
         TIMER_STOP();
+
         auto sort_time = TIMER_RESULT();
 
         TIMER_START();
@@ -80,7 +80,6 @@ public:
             if (!base->is_tombstone() && (base + 1 < stop)
                 && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
                 base += 2;
-                mrun_cancelations++;
                 continue;
             } else if (base->is_deleted()) {
                 base += 1;
@@ -109,10 +108,21 @@ public:
         }
         TIMER_STOP();
         auto level_time = TIMER_RESULT();
+
+        free(temp_buffer);
     }
 
     ISAMTree(ISAMTree** runs, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) {
+        : m_bf(nullptr) 
+        , m_isam_nodes(nullptr)
+        , m_root(nullptr)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_internal_node_cnt(0)
+        , m_deleted_cnt(0)
+        , m_alloc_size(0)
+        , m_data(nullptr)
+    {
         std::vector<Cursor<Wrapped<R>>> cursors;
         cursors.reserve(len);
 
@@ -139,8 +149,6 @@ public:
         assert(m_alloc_size % CACHELINE_SIZE == 0);
         m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
-        size_t offset = 0;
-        
         while (pq.size()) {
             auto now = pq.peek();
             auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
@@ -173,9 +181,9 @@ public:
     }
 
     ~ISAMTree() {
-        if (m_data) free(m_data);
-        if (m_isam_nodes) free(m_isam_nodes);
-        if (m_bf) delete m_bf;
+        free(m_data);
+        free(m_isam_nodes);
+        delete m_bf;
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
@@ -214,25 +222,25 @@ public:
     }
 
     size_t get_memory_usage() {
-        return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
+        return m_alloc_size;
     }
 
     size_t get_aux_memory_usage() {
-        return 0;
+        return m_bf->memory_usage();
     }
 
     size_t get_lower_bound(const K& key) const {
         const InternalNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const char*>(now))) {
+        while (!is_leaf(reinterpret_cast<const byte*>(now))) {
             const InternalNode* next = nullptr;
-            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+            for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
                 if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
                     next = reinterpret_cast<InternalNode*>(now->child[i]);
                     break;
                 }
             }
 
-            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
         }
 
         const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
@@ -243,16 +251,16 @@ public:
 
     size_t get_upper_bound(const K& key) const {
         const InternalNode* now = m_root;
-        while (!is_leaf(reinterpret_cast<const char*>(now))) {
+        while (!is_leaf(reinterpret_cast<const byte*>(now))) {
             const InternalNode* next = nullptr;
-            for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+            for (size_t i = 0; i < INTERNAL_FANOUT - 1; ++i) {
                 if (now->child[i + 1] == nullptr || key < now->keys[i]) {
                     next = reinterpret_cast<InternalNode*>(now->child[i]);
                     break;
                 }
             }
 
-            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+            now = next ? next : reinterpret_cast<const InternalNode*>(now->child[INTERNAL_FANOUT - 1]);
         }
 
         const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
@@ -264,20 +272,17 @@ public:
 
 private:
     void build_internal_levels() {
-        size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0);
+        size_t n_leaf_nodes = m_reccnt / LEAF_FANOUT + (m_reccnt % LEAF_FANOUT != 0);
+
         size_t level_node_cnt = n_leaf_nodes;
         size_t node_cnt = 0;
         do {
-            level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0);
+            level_node_cnt = level_node_cnt / INTERNAL_FANOUT + (level_node_cnt % INTERNAL_FANOUT != 0);
             node_cnt += level_node_cnt;
         } while (level_node_cnt > 1);
 
-        m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-
-        m_isam_nodes = (InternalNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+        m_alloc_size += psudb::sf_aligned_calloc(CACHELINE_SIZE, node_cnt, NODE_SZ, (byte**) &m_isam_nodes);
         m_internal_node_cnt = node_cnt;
-        memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
 
         InternalNode* current_node = m_isam_nodes;
 
@@ -285,16 +290,16 @@ private:
         const Wrapped<R>* leaf_stop = m_data + m_reccnt;
         while (leaf_base < leaf_stop) {
             size_t fanout = 0;
-            for (size_t i = 0; i < inmem_isam_fanout; ++i) {
-                auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i;
+            for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
+                auto rec_ptr = leaf_base + LEAF_FANOUT * i;
                 if (rec_ptr >= leaf_stop) break;
-                const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1);
+                const Wrapped<R>* sep_key = std::min(rec_ptr + LEAF_FANOUT - 1, leaf_stop - 1);
                 current_node->keys[i] = sep_key->rec.key;
-                current_node->child[i] = (char*)rec_ptr;
+                current_node->child[i] = (byte*)rec_ptr;
                 ++fanout;
             }
             current_node++;
-            leaf_base += fanout * inmem_isam_leaf_fanout;
+            leaf_base += fanout * LEAF_FANOUT;
         }
 
         auto level_start = m_isam_nodes;
@@ -304,12 +309,12 @@ private:
             auto now = level_start;
             while (now < level_stop) {
                 size_t child_cnt = 0;
-                for (size_t i = 0; i < inmem_isam_fanout; ++i) {
+                for (size_t i = 0; i < INTERNAL_FANOUT; ++i) {
                     auto node_ptr = now + i;
                     ++child_cnt;
                     if (node_ptr >= level_stop) break;
-                    current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1];
-                    current_node->child[i] = (char*)node_ptr;
+                    current_node->keys[i] = node_ptr->keys[INTERNAL_FANOUT - 1];
+                    current_node->child[i] = (byte*)node_ptr;
                 }
                 now += child_cnt;
                 current_node++;
@@ -323,12 +328,10 @@ private:
         m_root = level_start;
     }
 
-    bool is_leaf(const char* ptr) const {
-        return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt);
+    bool is_leaf(const byte* ptr) const {
+        return ptr >= (const byte*)m_data && ptr < (const byte*)(m_data + m_reccnt);
     }
 
-    // Members: sorted data, internal ISAM levels, reccnt;
-    Wrapped<R>* m_data;
     psudb::BloomFilter<R> *m_bf;
     InternalNode* m_isam_nodes;
     InternalNode* m_root;
@@ -337,5 +340,7 @@ private:
     size_t m_internal_node_cnt;
     size_t m_deleted_cnt;
     size_t m_alloc_size;
+
+    Wrapped<R>* m_data;
 };
 }
-- 
cgit v1.2.3


From 138c793b0a58577713d98c98bb140cf1d9c79bee Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Wed, 17 Jan 2024 18:22:00 -0500
Subject: Multiple concurrency bug fixes

A poorly organized commit with fixes for a variety of bugs that were
causing missing records. The core problems all appear to be fixed,
though there is an outstanding problem with tombstones not being
completely canceled. A very small number are appearing in the wrong
order during the static structure test.
---
 include/shard/ISAMTree.h | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 6b2f6b5..932e767 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -62,10 +62,13 @@ public:
     {
         TIMER_INIT();
 
-        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>), (byte**) &m_data);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
 
         TIMER_START();
-        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_alloc(CACHELINE_SIZE, buffer.get_record_count() * sizeof(Wrapped<R>));
+        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>));
         buffer.copy_to_buffer((byte *) temp_buffer);
 
         auto base = temp_buffer;
@@ -99,6 +102,7 @@ public:
 
             base++;
         }
+
         TIMER_STOP();
         auto copy_time = TIMER_RESULT();
 
@@ -112,7 +116,7 @@ public:
         free(temp_buffer);
     }
 
-    ISAMTree(ISAMTree** runs, size_t len)
+    ISAMTree(std::vector<ISAMTree*> &shards)
         : m_bf(nullptr) 
         , m_isam_nodes(nullptr)
         , m_root(nullptr)
@@ -124,19 +128,19 @@ public:
         , m_data(nullptr)
     {
         std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
+        cursors.reserve(shards.size());
 
-        PriorityQueue<Wrapped<R>> pq(len);
+        PriorityQueue<Wrapped<R>> pq(shards.size());
 
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
         
-        for (size_t i = 0; i < len; ++i) {
-            if (runs[i]) {
-                auto base = runs[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()});
-                attemp_reccnt += runs[i]->get_record_count();
-                tombstone_count += runs[i]->get_tombstone_count();
+        for (size_t i = 0; i < shards.size(); ++i) {
+            if (shards[i]) {
+                auto base = shards[i]->get_data();
+                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
+                attemp_reccnt += shards[i]->get_record_count();
+                tombstone_count += shards[i]->get_tombstone_count();
                 pq.push(cursors[i].ptr, i);
             } else {
                 cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
@@ -144,10 +148,9 @@ public:
         }
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
 
         while (pq.size()) {
             auto now = pq.peek();
@@ -165,6 +168,8 @@ public:
                 if (!cursor.ptr->is_deleted()) {
                     m_data[m_reccnt++] = *cursor.ptr;
                     if (cursor.ptr->is_tombstone()) {
+                        //fprintf(stderr, "ISAM: Tombstone from shard %ld next record from shard %ld\n",
+                                //now.version, next.version);
                         ++m_tombstone_cnt;
                         m_bf->insert(cursor.ptr->rec);
                     }
-- 
cgit v1.2.3


From 51a85013236f4b2bd596caf179d90e67c848963c Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Tue, 30 Jan 2024 15:31:34 -0500
Subject: TrieSpline + tests

---
 include/shard/TrieSpline.h | 151 +++++++++++++++++++++++++++------------------
 1 file changed, 90 insertions(+), 61 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 56ec357..8142a67 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -12,10 +12,6 @@
 
 
 #include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
 
 #include "framework/ShardRequirements.h"
 #include "ts/builder.h"
@@ -23,62 +19,64 @@
 #include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
+#include "psu-util/timer.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
-using psudb::Alias;
 
 namespace de {
 
-template <RecordInterface R, size_t E=1024>
+template <KVPInterface R, size_t E=1024>
 class TrieSpline {
 private:
     typedef decltype(R::key) K;
     typedef decltype(R::value) V;
 
 public:
-    TrieSpline(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
+    TrieSpline(BufferView<R> buffer)
+        : m_data(nullptr)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0)
+        , m_max_key(0)
+        , m_min_key(0)
+        , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
+    {
+        TIMER_INIT();
+
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
+
+        TIMER_START();
+        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>));
+        buffer.copy_to_buffer((byte *) temp_buffer);
+
+        auto base = temp_buffer;
+        auto stop = base + buffer.get_record_count();
         std::sort(base, stop, std::less<Wrapped<R>>());
 
         K min_key = base->rec.key;
-        K max_key = (stop - 1)->rec.key;
+        K max_key = (stop-1)->rec.key;
+        TIMER_STOP();
 
-        auto bldr = ts::Builder<K>(min_key, max_key, E);
+        auto sort_time = TIMER_RESULT();
 
+        TIMER_START();
+        auto bldr = ts::Builder<K>(min_key, max_key, E);
         while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    continue;
-                }
+            if (!base->is_tombstone() && (base + 1 < stop)
+                && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
+                base += 2;
+                continue;
             } else if (base->is_deleted()) {
                 base += 1;
                 continue;
             }
 
-            if (m_reccnt == 0) {
-                m_max_key = m_min_key = base->rec.key;
-            } else if (base->rec.key > m_max_key) {
-                m_max_key = base->rec.key;
-            } else if (base->rec.key < m_min_key) {
-                m_min_key = base->rec.key;
-            }
-
             // FIXME: this shouldn't be necessary, but the tagged record
             // bypass doesn't seem to be working on this code-path, so this
             // ensures that tagged records from the buffer are able to be
@@ -86,37 +84,67 @@ public:
             base->header &= 3;
             m_data[m_reccnt++] = *base;
             bldr.AddKey(base->rec.key);
-
             if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
+                ++m_tombstone_cnt;
                 m_bf->insert(base->rec);
             }
-            
+
+            /*
+             * determine the "true" min/max keys based on the scan. This is
+             * to avoid situations where the min/max in the input array
+             * are deleted and don't survive into the structure itself.
+             */
+            if (m_reccnt == 0) {
+                m_max_key = m_min_key = base->rec.key;
+            } else if (base->rec.key > m_max_key) {
+                m_max_key = base->rec.key;
+            } else if (base->rec.key < m_min_key) {
+                m_min_key = base->rec.key;
+            }
+
             base++;
         }
 
+        TIMER_STOP();
+        auto copy_time = TIMER_RESULT();
+
+        TIMER_START();
         if (m_reccnt > 0) {
             m_ts = bldr.Finalize();
         }
+        TIMER_STOP();
+        auto level_time = TIMER_RESULT();
+
+        free(temp_buffer);
     }
 
-    TrieSpline(TrieSpline** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0) {
+    TrieSpline(std::vector<TrieSpline*> &shards) 
+        : m_data(nullptr)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0)
+        , m_max_key(0)
+        , m_min_key(0)
+        , m_bf(nullptr)
+    {
+
         std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
+        cursors.reserve(shards.size());
 
-        PriorityQueue<Wrapped<R>> pq(len);
+        PriorityQueue<Wrapped<R>> pq(shards.size());
 
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
 
-        // initialize m_max_key and m_min_key using the values from the
-        // first shard. These will later be updated when building
-        // the initial priority queue to their true values.
+        /*
+         * Initialize m_max_key and m_min_key using the values from the
+         * first shard. These will later be updated when building
+         * the initial priority queue to their true values.
+         */
         m_max_key = shards[0]->m_max_key;
         m_min_key = shards[0]->m_min_key;
         
-        for (size_t i = 0; i < len; ++i) {
+        for (size_t i = 0; i < shards.size(); ++i) {
             if (shards[i]) {
                 auto base = shards[i]->get_data();
                 cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
@@ -137,12 +165,11 @@ public:
         }
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-        auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
-
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
 
+        auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
         while (pq.size()) {
             auto now = pq.peek();
             auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
@@ -152,33 +179,32 @@ public:
                 pq.pop(); pq.pop();
                 auto& cursor1 = cursors[now.version];
                 auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
+                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
             } else {
                 auto& cursor = cursors[now.version];
                 if (!cursor.ptr->is_deleted()) {
                     m_data[m_reccnt++] = *cursor.ptr;
                     bldr.AddKey(cursor.ptr->rec.key);
-                    if (m_bf && cursor.ptr->is_tombstone()) {
+                    if (cursor.ptr->is_tombstone()) {
                         ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
+                        m_bf->insert(cursor.ptr->rec);
                     }
                 }
                 pq.pop();
                 
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
             }
         }
 
         if (m_reccnt > 0) {
             m_ts = bldr.Finalize();
         }
-   }
+    }
 
     ~TrieSpline() {
-        if (m_data) free(m_data);
-        if (m_bf) delete m_bf;
-
+        free(m_data);
+        delete m_bf;
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
@@ -253,14 +279,17 @@ public:
                     max = mid;
                 }
             }
+        }
 
+        if (idx == m_reccnt) {
+            return m_reccnt;
         }
 
         if (m_data[idx].rec.key > key && idx > 0 && m_data[idx-1].rec.key <= key) {
             return idx-1;
         }
 
-        return (m_data[idx].rec.key <= key) ? idx : m_reccnt;
+        return idx;
     }
 
 private:
-- 
cgit v1.2.3


From 2c5d549b3618b9ea72e6eece4cb4f3da5a6811a8 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Wed, 7 Feb 2024 13:42:34 -0500
Subject: Fully realized shard concept interface

---
 include/shard/Alias.h      | 15 ++++++---------
 include/shard/ISAMTree.h   |  8 +++++---
 include/shard/TrieSpline.h |  1 +
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index a3e8ad8..a234575 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -15,9 +15,6 @@
 
 #include <vector>
 #include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
 
 #include "framework/ShardRequirements.h"
 
@@ -34,7 +31,7 @@ using psudb::queue_record;
 
 namespace de {
 
-thread_local size_t wss_cancelations = 0;
+static thread_local size_t wss_cancelations = 0;
 
 template <WeightedRecordInterface R>
 class Alias {
@@ -44,7 +41,7 @@ private:
     typedef decltype(R::weight) W;
 
 public:
-    Alias(MutableBuffer<R>* buffer)
+    Alias(BufferView<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
 
         m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
@@ -96,17 +93,17 @@ public:
         }
     }
 
-    Alias(Alias** shards, size_t len)
+    Alias(std::vector<Alias*> &shards)
     : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
         std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
+        cursors.reserve(shards.size());
 
-        PriorityQueue<Wrapped<R>> pq(len);
+        PriorityQueue<Wrapped<R>> pq(shards.size());
 
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
         
-        for (size_t i = 0; i < len; ++i) {
+        for (size_t i = 0; i < shards.size(); ++i) {
             if (shards[i]) {
                 auto base = shards[i]->get_data();
                 cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 932e767..7de9cb1 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -25,6 +25,7 @@ using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
+using psudb::byte;
 
 namespace de {
 
@@ -222,9 +223,6 @@ public:
         return m_tombstone_cnt;
     }
 
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        return (idx < m_reccnt) ? m_data + idx : nullptr;
-    }
 
     size_t get_memory_usage() {
         return m_alloc_size;
@@ -234,6 +232,7 @@ public:
         return m_bf->memory_usage();
     }
 
+    /* SortedShardInterface methods */
     size_t get_lower_bound(const K& key) const {
         const InternalNode* now = m_root;
         while (!is_leaf(reinterpret_cast<const byte*>(now))) {
@@ -274,6 +273,9 @@ public:
         return pos - m_data;
     }
 
+    const Wrapped<R>* get_record_at(size_t idx) const {
+        return (idx < m_reccnt) ? m_data + idx : nullptr;
+    }
 
 private:
     void build_internal_levels() {
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 8142a67..9473177 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -25,6 +25,7 @@ using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
+using psudb::byte;
 
 namespace de {
 
-- 
cgit v1.2.3


From bd74e27b28bd95267ce50d2e4b6f12b51d9b6aae Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Wed, 7 Feb 2024 17:23:23 -0500
Subject: Cleaned up shard files (except VPTree)

Cleaned up shard implementations, fixed a few bugs, and set up some
tests. There's still some work to be done in creating tests for the
weighted sampling operations for the alias and aug btree shards.
---
 include/shard/Alias.h      | 155 +++++++++++++++------------------------------
 include/shard/AugBTree.h   | 150 ++++++++++++-------------------------------
 include/shard/ISAMTree.h   | 105 ++++--------------------------
 include/shard/PGM.h        | 140 +++++++++++++---------------------------
 include/shard/TrieSpline.h | 149 +++++++------------------------------------
 5 files changed, 171 insertions(+), 528 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index a234575..f0d1d59 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -14,20 +14,19 @@
 #pragma once
 
 #include <vector>
-#include <cassert>
 
 #include "framework/ShardRequirements.h"
 
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
 #include "psu-ds/Alias.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
+#include "util/SortedMerge.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
+using psudb::byte;
 
 namespace de {
 
@@ -41,126 +40,73 @@ private:
     typedef decltype(R::weight) W;
 
 public:
-    Alias(BufferView<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        std::vector<W> weights;
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    wss_cancelations++;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
+    Alias(BufferView<R> buffer)
+        : m_data(nullptr)
+        , m_alias(nullptr)
+        , m_total_weight(0)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0)
+        , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS)) {
+                       
+
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
+
+        auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            m_total_weight+= base->rec.weight;
-            weights.push_back(base->rec.weight);
-
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
+        if (m_reccnt > 0) {
+            std::vector<W> weights;
+            for (size_t i=0; i<m_reccnt; i++) {
+                weights.emplace_back(m_data[i].rec.weight);
+                m_total_weight += m_data[i].rec.weight;
             }
-            
-            base++;
-        }
 
-        if (m_reccnt > 0) {
             build_alias_structure(weights);
         }
     }
 
     Alias(std::vector<Alias*> &shards)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(shards.size());
-
-        PriorityQueue<Wrapped<R>> pq(shards.size());
+        : m_data(nullptr)
+        , m_alias(nullptr)
+        , m_total_weight(0)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0)
+        , m_bf(nullptr) {
 
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < shards.size(); ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
+        auto cursors = build_cursor_vec<R, Alias>(shards, &attemp_reccnt, &tombstone_count);
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
 
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        std::vector<W> weights;
-        
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    m_total_weight += cursor.ptr->rec.weight;
-                    weights.push_back(cursor.ptr->rec.weight);
-                    if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
+            std::vector<W> weights;
+            for (size_t i=0; i<m_reccnt; i++) {
+                weights.emplace_back(m_data[i].rec.weight);
+                m_total_weight += m_data[i].rec.weight;
+            }
+
             build_alias_structure(weights);
         }
    }
 
     ~Alias() {
-        if (m_data) free(m_data);
-        if (m_alias) delete m_alias;
-        if (m_bf) delete m_bf;
-
+        free(m_data);
+        delete m_alias;
+        delete m_bf;
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
@@ -173,7 +119,7 @@ public:
             return nullptr;
         }
 
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+        while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx;
 
         if (m_data[idx].rec == rec) {
             return m_data + idx;
@@ -205,7 +151,7 @@ public:
     }
 
     size_t get_aux_memory_usage() {
-        return 0;
+        return (m_bf) ? m_bf->memory_usage() : 0;
     }
 
     W get_total_weight() {
@@ -254,7 +200,6 @@ private:
     W m_total_weight;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
-    size_t m_group_size;
     size_t m_alloc_size;
     BloomFilter<R> *m_bf;
 };
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
index be664ac..58bd098 100644
--- a/include/shard/AugBTree.h
+++ b/include/shard/AugBTree.h
@@ -16,28 +16,21 @@
 
 #include <vector>
 #include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
 
 #include "framework/ShardRequirements.h"
 
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
 #include "psu-ds/Alias.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
+#include "util/SortedMerge.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
 using psudb::Alias;
+using psudb::byte;
 
 namespace de {
 
-thread_local size_t wirs_cancelations = 0;
-
 template <WeightedRecordInterface R>
 struct AugBTreeNode {
     struct AugBTreeNode<R> *left, *right;
@@ -54,108 +47,52 @@ private:
     typedef decltype(R::weight) W;
 
 public:
-    AugBTree(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
-
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    wirs_cancelations++;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            m_total_weight+= base->rec.weight;
-
-            if (m_bf && base->is_tombstone()) {
-                m_tombstone_cnt++;
-                m_bf->insert(base->rec);
-            }
-            
-            base++;
-        }
+    AugBTree(BufferView<R> buffer)
+    : m_data(nullptr)
+    , m_root(nullptr)
+    , m_reccnt(0)
+    , m_tombstone_cnt(0)
+    , m_group_size(0)
+    , m_alloc_size(0)
+    , m_node_cnt(0)
+    , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
+    {
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
+
+        auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
             build_wirs_structure();
         }
     }
 
-    AugBTree(AugBTree** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
+    AugBTree(std::vector<AugBTree*> shards)
+    : m_data(nullptr)
+    , m_root(nullptr)
+    , m_reccnt(0)
+    , m_tombstone_cnt(0)
+    , m_group_size(0)
+    , m_alloc_size(0)
+    , m_node_cnt(0)
+    , m_bf(nullptr)
+    {
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
+        auto cursors = build_cursor_vec<R, AugBTree>(shards, &attemp_reccnt, &tombstone_count);
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
 
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    m_total_weight += cursor.ptr->rec.weight;
-                    if (m_bf && cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        if (m_bf) m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
             build_wirs_structure();
@@ -163,13 +100,12 @@ public:
    }
 
     ~AugBTree() {
-        if (m_data) free(m_data);
+        free(m_data);
         for (size_t i=0; i<m_alias.size(); i++) {
-            if (m_alias[i]) delete m_alias[i];
+            delete m_alias[i];
         }
 
-        if (m_bf) delete m_bf;
-
+        delete m_bf;
         free_tree(m_root);
     }
 
@@ -183,7 +119,7 @@ public:
             return nullptr;
         }
 
-        while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+        while (idx < (m_reccnt-1) && m_data[idx].rec < rec) ++idx;
 
         if (m_data[idx].rec == rec) {
             return m_data + idx;
@@ -209,13 +145,12 @@ public:
         return m_data + idx;
     }
 
-
     size_t get_memory_usage() {
         return m_alloc_size + m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>);
     }
 
     size_t get_aux_memory_usage() {
-        return 0;
+        return (m_bf) ? m_bf->memory_usage() : 0;
     }
 
     size_t get_lower_bound(const K& key) const {
@@ -364,7 +299,6 @@ private:
     Wrapped<R>* m_data;
     std::vector<Alias *> m_alias;
     AugBTreeNode<R>* m_root;
-    W m_total_weight;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
     size_t m_group_size;
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 7de9cb1..33ba82f 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -17,9 +17,8 @@
 #include "framework/ShardRequirements.h"
 
 #include "util/bf_config.h"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-util/timer.h"
+#include "psu-ds/BloomFilter.h"
+#include "util/SortedMerge.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
@@ -61,60 +60,18 @@ public:
         , m_alloc_size(0)
         , m_data(nullptr)
     {
-        TIMER_INIT();
-
         m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
                                                buffer.get_record_count() * 
                                                  sizeof(Wrapped<R>), 
                                                (byte**) &m_data);
 
-        TIMER_START();
-        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>));
-        buffer.copy_to_buffer((byte *) temp_buffer);
-
-        auto base = temp_buffer;
-        auto stop = base + buffer.get_record_count();
-        std::sort(base, stop, std::less<Wrapped<R>>());
-        TIMER_STOP();
-
-        auto sort_time = TIMER_RESULT();
-
-        TIMER_START();
-        while (base < stop) {
-            if (!base->is_tombstone() && (base + 1 < stop)
-                && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
-                base += 2;
-                continue;
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
-
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            if (m_bf && base->is_tombstone()) {
-                ++m_tombstone_cnt;
-                m_bf->insert(base->rec);
-            }
-
-            base++;
-        }
-
-        TIMER_STOP();
-        auto copy_time = TIMER_RESULT();
+        auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
-        TIMER_START();
         if (m_reccnt > 0) {
             build_internal_levels();
         }
-        TIMER_STOP();
-        auto level_time = TIMER_RESULT();
-
-        free(temp_buffer);
     }
 
     ISAMTree(std::vector<ISAMTree*> &shards)
@@ -128,58 +85,18 @@ public:
         , m_alloc_size(0)
         , m_data(nullptr)
     {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(shards.size());
-
-        PriorityQueue<Wrapped<R>> pq(shards.size());
-
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < shards.size(); ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
+        auto cursors = build_cursor_vec<R, ISAMTree>(shards, &attemp_reccnt, &tombstone_count);
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
         m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
                                                attemp_reccnt * sizeof(Wrapped<R>),
                                                (byte **) &m_data);
 
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    if (cursor.ptr->is_tombstone()) {
-                        //fprintf(stderr, "ISAM: Tombstone from shard %ld next record from shard %ld\n",
-                                //now.version, next.version);
-                        ++m_tombstone_cnt;
-                        m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
             build_internal_levels();
@@ -225,11 +142,11 @@ public:
 
 
     size_t get_memory_usage() {
-        return m_alloc_size;
+        return m_alloc_size + m_internal_node_cnt * NODE_SZ;
     }
 
     size_t get_aux_memory_usage() {
-        return m_bf->memory_usage();
+        return (m_bf) ? m_bf->memory_usage() : 0;
     }
 
     /* SortedShardInterface methods */
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 13db26a..8031870 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -14,24 +14,19 @@
 
 
 #include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
 
 #include "framework/ShardRequirements.h"
 
 #include "pgm/pgm_index.hpp"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
+#include "util/SortedMerge.h"
 #include "util/bf_config.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
-using psudb::Alias;
+using psudb::byte;
 
 namespace de {
 
@@ -41,111 +36,65 @@ private:
     typedef decltype(R::key) K;
     typedef decltype(R::value) V;
 
-
 public:
-    PGM(MutableBuffer<R>* buffer)
-    : m_reccnt(0), m_tombstone_cnt(0) {
-
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        std::vector<K> keys;
-
-        size_t offset = 0;
-        m_reccnt = 0;
-        auto base = buffer->get_data();
-        auto stop = base + buffer->get_record_count();
+    PGM(BufferView<R> buffer)
+        : m_data(nullptr)
+        , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0) {
+
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
+        auto res = sorted_array_from_bufferview<R>(std::move(buffer), m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        K min_key = base->rec.key;
-        K max_key = (stop - 1)->rec.key;
-
-        while (base < stop) {
-            if (!(base->is_tombstone()) && (base + 1) < stop) {
-                if (base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
-                    base += 2;
-                    continue;
-                }
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
+        if (m_reccnt > 0) {
+            std::vector<K> keys;
+            for (size_t i=0; i<m_reccnt; i++) {
+                keys.emplace_back(m_data[i].rec.key);
             }
 
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            keys.emplace_back(base->rec.key);
-            base++;
-        }
-
-        if (m_reccnt > 0) {
             m_pgm = pgm::PGMIndex<K, epsilon>(keys);
         }
     }
 
-    PGM(PGM** shards, size_t len)
-    : m_reccnt(0), m_tombstone_cnt(0) {
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(len);
-
-        PriorityQueue<Wrapped<R>> pq(len);
-
+    PGM(std::vector<PGM*> shards)
+        : m_data(nullptr)
+        , m_bf(nullptr)
+        , m_reccnt(0)
+        , m_tombstone_cnt(0)
+        , m_alloc_size(0) {
+        
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
-        
-        for (size_t i = 0; i < len; ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
+        auto cursors = build_cursor_vec<R, PGM>(shards, &attemp_reccnt, &tombstone_count);
 
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
-        std::vector<K> keys;
-
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    keys.emplace_back(cursor.ptr->rec.key);
-                }
-                pq.pop();
-                
-                if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+        m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
+
+        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
+            std::vector<K> keys;
+            for (size_t i=0; i<m_reccnt; i++) {
+                keys.emplace_back(m_data[i].rec.key);
+            }
+
             m_pgm = pgm::PGMIndex<K, epsilon>(keys);
         }
    }
 
     ~PGM() {
-        if (m_data) free(m_data);
+        free(m_data);
+        delete m_bf;
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
@@ -186,7 +135,7 @@ public:
     }
 
     size_t get_aux_memory_usage() {
-        return 0;
+        return (m_bf) ? m_bf->memory_usage() : 0;
     }
 
     size_t get_lower_bound(const K& key) const {
@@ -228,6 +177,7 @@ public:
 
 private:
     Wrapped<R>* m_data;
+    BloomFilter<R> *m_bf;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
     size_t m_alloc_size;
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index 9473177..f9fb3cb 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -15,11 +15,9 @@
 
 #include "framework/ShardRequirements.h"
 #include "ts/builder.h"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
 #include "psu-ds/BloomFilter.h"
 #include "util/bf_config.h"
-#include "psu-util/timer.h"
+#include "util/SortedMerge.h"
 
 using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
@@ -45,78 +43,26 @@ public:
         , m_min_key(0)
         , m_bf(new BloomFilter<R>(BF_FPR, buffer.get_tombstone_count(), BF_HASH_FUNCS))
     {
-        TIMER_INIT();
-
         m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
                                                buffer.get_record_count() * 
                                                  sizeof(Wrapped<R>), 
                                                (byte**) &m_data);
 
-        TIMER_START();
-        auto temp_buffer = (Wrapped<R> *) psudb::sf_aligned_calloc(CACHELINE_SIZE, buffer.get_record_count(), sizeof(Wrapped<R>));
-        buffer.copy_to_buffer((byte *) temp_buffer);
-
-        auto base = temp_buffer;
-        auto stop = base + buffer.get_record_count();
-        std::sort(base, stop, std::less<Wrapped<R>>());
-
-        K min_key = base->rec.key;
-        K max_key = (stop-1)->rec.key;
-        TIMER_STOP();
-
-        auto sort_time = TIMER_RESULT();
-
-        TIMER_START();
-        auto bldr = ts::Builder<K>(min_key, max_key, E);
-        while (base < stop) {
-            if (!base->is_tombstone() && (base + 1 < stop)
-                && base->rec == (base + 1)->rec  && (base + 1)->is_tombstone()) {
-                base += 2;
-                continue;
-            } else if (base->is_deleted()) {
-                base += 1;
-                continue;
-            }
+        auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
-            // FIXME: this shouldn't be necessary, but the tagged record
-            // bypass doesn't seem to be working on this code-path, so this
-            // ensures that tagged records from the buffer are able to be
-            // dropped, eventually. It should only need to be &= 1
-            base->header &= 3;
-            m_data[m_reccnt++] = *base;
-            bldr.AddKey(base->rec.key);
-            if (m_bf && base->is_tombstone()) {
-                ++m_tombstone_cnt;
-                m_bf->insert(base->rec);
-            }
+        if (m_reccnt > 0) {
+            m_min_key = m_data[0].rec.key;
+            m_max_key = m_data[m_reccnt-1].rec.key;
 
-            /*
-             * determine the "true" min/max keys based on the scan. This is
-             * to avoid situations where the min/max in the input array
-             * are deleted and don't survive into the structure itself.
-             */
-            if (m_reccnt == 0) {
-                m_max_key = m_min_key = base->rec.key;
-            } else if (base->rec.key > m_max_key) {
-                m_max_key = base->rec.key;
-            } else if (base->rec.key < m_min_key) {
-                m_min_key = base->rec.key;
+            auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
+            for (size_t i=0; i<m_reccnt; i++) {
+                bldr.AddKey(m_data[i].rec.key);
             }
 
-            base++;
-        }
-
-        TIMER_STOP();
-        auto copy_time = TIMER_RESULT();
-
-        TIMER_START();
-        if (m_reccnt > 0) {
             m_ts = bldr.Finalize();
         }
-        TIMER_STOP();
-        auto level_time = TIMER_RESULT();
-
-        free(temp_buffer);
     }
 
     TrieSpline(std::vector<TrieSpline*> &shards) 
@@ -128,77 +74,28 @@ public:
         , m_min_key(0)
         , m_bf(nullptr)
     {
-
-        std::vector<Cursor<Wrapped<R>>> cursors;
-        cursors.reserve(shards.size());
-
-        PriorityQueue<Wrapped<R>> pq(shards.size());
-
         size_t attemp_reccnt = 0;
         size_t tombstone_count = 0;
-
-        /*
-         * Initialize m_max_key and m_min_key using the values from the
-         * first shard. These will later be updated when building
-         * the initial priority queue to their true values.
-         */
-        m_max_key = shards[0]->m_max_key;
-        m_min_key = shards[0]->m_min_key;
+        auto cursors = build_cursor_vec<R, TrieSpline>(shards, &attemp_reccnt, &tombstone_count);
         
-        for (size_t i = 0; i < shards.size(); ++i) {
-            if (shards[i]) {
-                auto base = shards[i]->get_data();
-                cursors.emplace_back(Cursor{base, base + shards[i]->get_record_count(), 0, shards[i]->get_record_count()});
-                attemp_reccnt += shards[i]->get_record_count();
-                tombstone_count += shards[i]->get_tombstone_count();
-                pq.push(cursors[i].ptr, i);
-
-                if (shards[i]->m_max_key > m_max_key) {
-                    m_max_key = shards[i]->m_max_key;
-                } 
-
-                if (shards[i]->m_min_key < m_min_key) {
-                    m_min_key = shards[i]->m_min_key;
-                }
-            } else {
-                cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
-            }
-        }
-
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
         m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
                                                attemp_reccnt * sizeof(Wrapped<R>),
                                                (byte **) &m_data);
 
-        auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                if (!cursor.ptr->is_deleted()) {
-                    m_data[m_reccnt++] = *cursor.ptr;
-                    bldr.AddKey(cursor.ptr->rec.key);
-                    if (cursor.ptr->is_tombstone()) {
-                        ++m_tombstone_cnt;
-                        m_bf->insert(cursor.ptr->rec);
-                    }
-                }
-                pq.pop();
-                
-                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+        auto res = sorted_array_merge<R>(cursors, m_data, m_bf);
+        m_reccnt = res.record_count;
+        m_tombstone_cnt = res.tombstone_count;
 
         if (m_reccnt > 0) {
+            m_min_key = m_data[0].rec.key;
+            m_max_key = m_data[m_reccnt-1].rec.key;
+
+            auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
+            for (size_t i=0; i<m_reccnt; i++) {
+                bldr.AddKey(m_data[i].rec.key);
+            }
+
             m_ts = bldr.Finalize();
         }
     }
@@ -250,7 +147,7 @@ public:
     }
 
     size_t get_aux_memory_usage() {
-        return 0;
+        return (m_bf) ? m_bf->memory_usage() : 0;
     }
 
     size_t get_lower_bound(const K& key) const {
-- 
cgit v1.2.3


From 0c4a80b90e1f25b42e00a8af57131040203d2f89 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Thu, 8 Feb 2024 13:10:29 -0500
Subject: Added compiler fence to block reordering

I'm reasonably certain that this is a compiler bug...
---
 include/shard/ISAMTree.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/shard')

diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 33ba82f..9458b1f 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -65,6 +65,12 @@ public:
                                                  sizeof(Wrapped<R>), 
                                                (byte**) &m_data);
 
+        /* 
+         * without this, gcc seems to hoist the building of the array
+         * _above_ its allocation under -O3, resulting in memfaults.
+         */
+        asm volatile ("" ::: "memory");
+
         auto res = sorted_array_from_bufferview(std::move(buffer), m_data, m_bf);
         m_reccnt = res.record_count;
         m_tombstone_cnt = res.tombstone_count;
-- 
cgit v1.2.3


From 711769574e647839677739192698e400529efe75 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Thu, 8 Feb 2024 16:38:44 -0500
Subject: Updated VPTree to new shard/query interfaces

---
 include/shard/VPTree.h | 282 +++++++++----------------------------------------
 1 file changed, 50 insertions(+), 232 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 2f5ebbb..ba13a87 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -5,98 +5,27 @@
  *
  * Distributed under the Modified BSD License.
  *
- * A shard shim around the VPTree spatial index. 
+ * A shard shim around a VPTree for high-dimensional metric similarity
+ * search.
  *
- * FIXME: separate the KNN query class out into a standalone
- * file in include/query . 
+ * FIXME: Does not yet support the tombstone delete policy.
  *
  */
 #pragma once
 
 #include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-#include <concepts>
-#include <map>
-#include <unordered_map>
-#include <functional>
 
+#include <unordered_map>
 #include "framework/ShardRequirements.h"
-
 #include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-ds/BloomFilter.h"
-#include "util/bf_config.h"
 
 using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
 using psudb::PriorityQueue;
 using psudb::queue_record;
-using psudb::Alias;
+using psudb::byte;
 
 namespace de {
 
-template <NDRecordInterface R>
-struct KNNQueryParms {
-    R point;
-    size_t k;
-};
-
-template <NDRecordInterface R>
-class KNNQuery;
-
-template <NDRecordInterface R>
-struct KNNState {
-    size_t k;
-
-    KNNState() {
-        k = 0;
-    }
-};
-
-template <NDRecordInterface R>
-struct KNNBufferState {
-
-};
-
-
-template <typename R>
-class KNNDistCmpMax {
-public:
-    KNNDistCmpMax(R *baseline) : P(baseline) {}
-
-    inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
-        return a->rec.calc_distance(P->rec) > b->rec.calc_distance(P->rec); 
-    }
-
-    inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){
-        return a->calc_distance(*P) > b->calc_distance(*P); 
-    }
-
-private:
-    R *P;
-};
-
-template <typename R>
-class KNNDistCmpMin {
-public:
-    KNNDistCmpMin(R *baseline) : P(baseline) {}
-
-    inline bool operator()(const R *a, const R *b) requires WrappedInterface<R> {
-        return a->rec.calc_distance(P->rec) < b->rec.calc_distance(P->rec); 
-    }
-
-    inline bool operator()(const R *a, const R *b) requires (!WrappedInterface<R>){
-        return a->calc_distance(*P) < b->calc_distance(*P); 
-    }
-
-private:
-    R *P;
-};
-
-
-
 template <NDRecordInterface R, size_t LEAFSZ=100, bool HMAP=false>
 class VPTree {
 private:
@@ -117,16 +46,19 @@ private:
         }
     };
 
-public:
-    friend class KNNQuery<R>;
 
-    VPTree(MutableBuffer<R>* buffer)
+
+public:
+    VPTree(BufferView<R> buffer)
     : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
 
-        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-        m_ptrs = new Wrapped<R>*[buffer->get_record_count()];
+
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               buffer.get_record_count() * 
+                                                 sizeof(Wrapped<R>), 
+                                               (byte**) &m_data);
+
+        m_ptrs = new Wrapped<R>*[buffer.get_record_count()];
 
         size_t offset = 0;
         m_reccnt = 0;
@@ -135,8 +67,8 @@ public:
         //        this one will likely require the multi-pass
         //        approach, as otherwise we'll need to sort the
         //        records repeatedly on each reconstruction.
-        for (size_t i=0; i<buffer->get_record_count(); i++) {
-            auto rec = buffer->get_data() + i;
+        for (size_t i=0; i<buffer.get_record_count(); i++) {
+            auto rec = buffer.get(i);
 
             if (rec->is_deleted()) {
                 continue;
@@ -154,25 +86,24 @@ public:
         }
     }
 
-    VPTree(VPTree** shards, size_t len)
+    VPTree(std::vector<VPTree*> shards) 
     : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
 
         size_t attemp_reccnt = 0;
-
-        for (size_t i=0; i<len; i++) {
+        for (size_t i=0; i<shards.size(); i++) {
             attemp_reccnt += shards[i]->get_record_count();
         }
-        
-        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(m_alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+        m_alloc_size = psudb::sf_aligned_alloc(CACHELINE_SIZE, 
+                                               attemp_reccnt * sizeof(Wrapped<R>),
+                                               (byte **) &m_data);
         m_ptrs = new Wrapped<R>*[attemp_reccnt];
 
         // FIXME: will eventually need to figure out tombstones
         //        this one will likely require the multi-pass
         //        approach, as otherwise we'll need to sort the
         //        records repeatedly on each reconstruction.
-        for (size_t i=0; i<len; i++) {
+        for (size_t i=0; i<shards.size(); i++) {
             for (size_t j=0; j<shards[i]->get_record_count(); j++) {
                 if (shards[i]->get_record_at(j)->is_deleted()) {
                     continue;
@@ -191,9 +122,9 @@ public:
    }
 
     ~VPTree() {
-        if (m_data) free(m_data);
-        if (m_root) delete m_root;
-        if (m_ptrs) delete[] m_ptrs;
+        free(m_data);
+        delete m_root;
+        delete[] m_ptrs;
     }
 
     Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
@@ -248,11 +179,27 @@ public:
     }
 
     size_t get_aux_memory_usage() {
+        // FIXME: need to return the size of the unordered_map
         return 0;
     }
 
+    void search(const R &point, size_t k, PriorityQueue<Wrapped<R>, 
+                DistCmpMax<Wrapped<R>>> &pq) {
+        double farthest = std::numeric_limits<double>::max();
+        
+        internal_search(m_root, point, k, pq, &farthest);
+    }
 
 private:
+    Wrapped<R>* m_data;
+    Wrapped<R>** m_ptrs;
+    std::unordered_map<R, size_t, RecordHash<R>> m_lookup_map;
+    size_t m_reccnt;
+    size_t m_tombstone_cnt;
+    size_t m_node_cnt;
+    size_t m_alloc_size;
+
+    vpnode *m_root;
 
     vpnode *build_vptree() {
         if (m_reccnt == 0) {
@@ -332,7 +279,6 @@ private:
         return node;
     }
 
-
     void quickselect(size_t start, size_t stop, size_t k, Wrapped<R> *p, gsl_rng *rng) {
         if (start == stop) return;
 
@@ -345,7 +291,6 @@ private:
         }
     }
 
-
     size_t partition(size_t start, size_t stop, Wrapped<R> *p, gsl_rng *rng) {
         auto pivot = start + gsl_rng_uniform_int(rng, stop - start);
         double pivot_dist = p->rec.calc_distance(m_ptrs[pivot]->rec);
@@ -364,15 +309,15 @@ private:
         return j;
     }
 
-
     void swap(size_t idx1, size_t idx2) {
         auto tmp = m_ptrs[idx1];
         m_ptrs[idx1] = m_ptrs[idx2];
         m_ptrs[idx2] = tmp;
     }
 
+    void internal_search(vpnode *node, const R &point, size_t k, PriorityQueue<Wrapped<R>, 
+                DistCmpMax<Wrapped<R>>> &pq, double *farthest) {
 
-    void search(vpnode *node, const R &point, size_t k, PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> &pq, double *farthest) {
         if (node == nullptr) return;
 
         if (node->leaf) {
@@ -408,151 +353,24 @@ private:
 
         if (d < node->radius) {
             if (d - (*farthest) <= node->radius) {
-                search(node->inside, point, k, pq, farthest);
+                internal_search(node->inside, point, k, pq, farthest);
             }
 
             if (d + (*farthest) >= node->radius) {
-                search(node->outside, point, k, pq, farthest);
+                internal_search(node->outside, point, k, pq, farthest);
             }
         } else {
             if (d + (*farthest) >= node->radius) {
-                search(node->outside, point, k, pq, farthest);
+                internal_search(node->outside, point, k, pq, farthest);
             }
 
             if (d - (*farthest) <= node->radius) {
-                search(node->inside, point, k, pq, farthest);
+                internal_search(node->inside, point, k, pq, farthest);
             }
         }
     }
 
-    Wrapped<R>* m_data;
-    Wrapped<R>** m_ptrs;
-    std::unordered_map<R, size_t, RecordHash<R>> m_lookup_map;
-    size_t m_reccnt;
-    size_t m_tombstone_cnt;
-    size_t m_node_cnt;
-    size_t m_alloc_size;
-
-    vpnode *m_root;
-};
-
-
-template <NDRecordInterface R>
-class KNNQuery {
-public:
-    constexpr static bool EARLY_ABORT=false;
-    constexpr static bool SKIP_DELETE_FILTER=true;
-
-    static void *get_query_state(VPTree<R> *wss, void *parms) {
-        return nullptr;
-    }
-
-    static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
-        return nullptr;
-    }
-
-    static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
-        return;
-    }
-
-    static std::vector<Wrapped<R>> query(VPTree<R> *wss, void *q_state, void *parms) {
-        std::vector<Wrapped<R>> results;
-        KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms;
-        Wrapped<R> wrec;
-        wrec.rec = p->point;
-        wrec.header = 0;
-
-        PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> pq(p->k, &wrec);
-
-        double farthest = std::numeric_limits<double>::max();
-
-        wss->search(wss->m_root, p->point, p->k, pq, &farthest);
-
-        while (pq.size() > 0) {
-            results.emplace_back(*pq.peek().data);
-            pq.pop();
-        }
-
-        return results;
-    }
-
-    static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
-        KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms;
-        Wrapped<R> wrec;
-        wrec.rec = p->point;
-        wrec.header = 0;
-
-        size_t k = p->k;
-
-        PriorityQueue<Wrapped<R>, KNNDistCmpMax<Wrapped<R>>> pq(k, &wrec);
-        for (size_t i=0; i<buffer->get_record_count(); i++) {
-            // Skip over deleted records (under tagging)
-            if ((buffer->get_data())[i].is_deleted()) {
-                continue;
-            }
-
-            if (pq.size() < k) {
-                pq.push(buffer->get_data() + i);
-            } else {
-                double head_dist = pq.peek().data->rec.calc_distance(wrec.rec);
-                double cur_dist = (buffer->get_data() + i)->rec.calc_distance(wrec.rec);
-
-                if (cur_dist < head_dist) {
-                    pq.pop();
-                    pq.push(buffer->get_data() + i);
-                }
-            }
-        }
-
-        std::vector<Wrapped<R>> results;
-        while (pq.size() > 0) {
-            results.emplace_back(*(pq.peek().data));
-            pq.pop();
-        }
-
-        return results;
-    }
-
-    static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-        KNNQueryParms<R> *p = (KNNQueryParms<R> *) parms;
-        R rec = p->point;
-        size_t k = p->k;
-
-        PriorityQueue<R, KNNDistCmpMax<R>> pq(k, &rec);
-        for (size_t i=0; i<results.size(); i++) {
-            for (size_t j=0; j<results[i].size(); j++) {
-                if (pq.size() < k) {
-                    pq.push(&results[i][j].rec);
-                } else {
-                    double head_dist = pq.peek().data->calc_distance(rec);
-                    double cur_dist = results[i][j].rec.calc_distance(rec);
 
-                    if (cur_dist < head_dist) {
-                        pq.pop();
-                        pq.push(&results[i][j].rec);
-                    }
-                }
-            }
-        }
-
-        std::vector<R> output;
-        while (pq.size() > 0) {
-            output.emplace_back(*pq.peek().data);
-            pq.pop();
-        }
-
-        return output;
-    }
-
-    static void delete_query_state(void *state) {
-        auto s = (KNNState<R> *) state;
-        delete s;
-    }
-
-    static void delete_buffer_query_state(void *state) {
-        auto s = (KNNBufferState<R> *) state;
-        delete s;
-    }
-};
+   };
 
 }
-- 
cgit v1.2.3


From 402fc269c0aaa671d84a6d15918735ad4b90e6b2 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Fri, 9 Feb 2024 12:30:21 -0500
Subject: Comment updates/fixes

---
 include/shard/Alias.h      |  1 +
 include/shard/AugBTree.h   |  2 ++
 include/shard/ISAMTree.h   |  1 +
 include/shard/PGM.h        |  1 +
 include/shard/TrieSpline.h |  1 +
 include/shard/VPTree.h     | 39 +++++++++++++++++++++++----------------
 6 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'include/shard')

diff --git a/include/shard/Alias.h b/include/shard/Alias.h
index f0d1d59..9275952 100644
--- a/include/shard/Alias.h
+++ b/include/shard/Alias.h
@@ -10,6 +10,7 @@
  * structure. Designed to be used along side the WSS
  * query in include/query/wss.h
  *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h
index 58bd098..54931bd 100644
--- a/include/shard/AugBTree.h
+++ b/include/shard/AugBTree.h
@@ -10,6 +10,8 @@
  * used along side the WIRS query in include/query/wirs.h, but
  * also supports the necessary methods for other common query
  * types.
+ *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
index 9458b1f..3763271 100644
--- a/include/shard/ISAMTree.h
+++ b/include/shard/ISAMTree.h
@@ -8,6 +8,7 @@
  *
  * A shard shim around an in-memory ISAM tree.
  *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 8031870..e2752ef 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -9,6 +9,7 @@
  * A shard shim around the static version of the PGM learned
  * index.
  *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index f9fb3cb..2a432e8 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -7,6 +7,7 @@
  *
  * A shard shim around the TrieSpline learned index.
  *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index ba13a87..b342fe6 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -9,7 +9,7 @@
  * search.
  *
  * FIXME: Does not yet support the tombstone delete policy.
- *
+ * TODO: The code in this file is very poorly commented.
  */
 #pragma once
 
@@ -234,13 +234,15 @@ private:
     }
 
     vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) {
-        // base-case: sometimes happens (probably because of the +1 and -1
-        // in the first recursive call)
+        /* 
+         * base-case: sometimes happens (probably because of the +1 and -1
+         * in the first recursive call)
+         */
         if (start > stop) {
             return nullptr;
         }
 
-        // base-case: create a leaf node
+        /* base-case: create a leaf node */
         if (stop - start <= LEAFSZ) {
             vpnode *node = new vpnode();
             node->start = start;
@@ -251,26 +253,30 @@ private:
             return node;
         }
 
-        // select a random element to be the root of the
-        // subtree
+        /* 
+         * select a random element to be the root of the
+         * subtree
+         */
         auto i = start + gsl_rng_uniform_int(rng, stop - start + 1);
         swap(start, i);
 
-        // partition elements based on their distance from the start,
-        // with those elements with distance falling below the median
-        // distance going into the left sub-array and those above 
-        // the median in the right. This is easily done using QuickSelect.
+        /* 
+         * partition elements based on their distance from the start,
+         * with those elements with distance falling below the median
+         * distance going into the left sub-array and those above 
+         * the median in the right. This is easily done using QuickSelect.
+         */
         auto mid = (start + 1 + stop) / 2;
         quickselect(start + 1, stop, mid, m_ptrs[start], rng);
 
-        // Create a new node based on this partitioning
+        /* Create a new node based on this partitioning */
         vpnode *node = new vpnode();
         node->start = start;
 
-        // store the radius of the circle used for partitioning the node.
+        /* store the radius of the circle used for partitioning the node. */
         node->radius = m_ptrs[start]->rec.calc_distance(m_ptrs[mid]->rec);
 
-        // recursively construct the left and right subtrees
+        /* recursively construct the left and right subtrees */
         node->inside = build_subtree(start + 1, mid-1, rng); 
         node->outside = build_subtree(mid, stop, rng);
 
@@ -279,6 +285,8 @@ private:
         return node;
     }
 
+    // TODO: The quickselect code can probably be generalized and moved out
+    //       to psudb-common instead.
     void quickselect(size_t start, size_t stop, size_t k, Wrapped<R> *p, gsl_rng *rng) {
         if (start == stop) return;
 
@@ -291,6 +299,8 @@ private:
         }
     }
 
+    // TODO: The quickselect code can probably be generalized and moved out
+    //       to psudb-common instead.
     size_t partition(size_t start, size_t stop, Wrapped<R> *p, gsl_rng *rng) {
         auto pivot = start + gsl_rng_uniform_int(rng, stop - start);
         double pivot_dist = p->rec.calc_distance(m_ptrs[pivot]->rec);
@@ -369,8 +379,5 @@ private:
             }
         }
     }
-
-
    };
-
 }
-- 
cgit v1.2.3