Code reformatting

author: Douglas Rumbaugh <dbr4@psu.edu> 2025-09-25 14:42:44 -0400
committer: Douglas Rumbaugh <dbr4@psu.edu> 2025-09-25 14:42:44 -0400
commit: cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71 (patch)
tree: 4c17bc3169ee195c236cea9c9efda0aef7488e3c /include/shard/FSTrie.h
parent: 826c1fff5accbaa6b415acc176a5acbeb5f691b6 (diff)
download: dynamic-extension-cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71.tar.gz
1 files changed, 135 insertions, 147 deletions
diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h
index 59ff116..31db40b 100644
--- a/include/shard/FSTrie.h
+++ b/include/shard/FSTrie.h
@@ -9,194 +9,182 @@
  */
 #pragma once
 
-
 #include <vector>
 
 #include "framework/ShardRequirements.h"
 #include "fst.hpp"
 #include "util/SortedMerge.h"
 
-using psudb::CACHELINE_SIZE;
 using psudb::BloomFilter;
+using psudb::byte;
+using psudb::CACHELINE_SIZE;
 using psudb::PriorityQueue;
 using psudb::queue_record;
-using psudb::byte;
 
 namespace de {
 
-template <KVPInterface R>
-class FSTrie {
+template <KVPInterface R> class FSTrie {
 public:
-    typedef R RECORD;
-private:
+  typedef R RECORD;
 
-    typedef decltype(R::key) K;
-    typedef decltype(R::value) V;
-    static_assert(std::is_same_v<K, const char*>, "FST requires const char* keys.");
+private:
+  typedef decltype(R::key) K;
+  typedef decltype(R::value) V;
+  static_assert(std::is_same_v<K, const char *>,
+                "FST requires const char* keys.");
 
 public:
-    FSTrie(BufferView<R> buffer)
-        : m_data(nullptr)
-        , m_reccnt(0)
-        , m_alloc_size(0)
-    {
-        m_data = new Wrapped<R>[buffer.get_record_count()]();
-        m_alloc_size = sizeof(Wrapped<R>) * buffer.get_record_count();
-
-        size_t cnt = 0;
-        std::vector<std::string> keys;
-        keys.reserve(buffer.get_record_count());
-
-        /*
-         * Copy the contents of the buffer view into a temporary buffer, and
-         * sort them. We still need to iterate over these temporary records to 
-         * apply tombstone/deleted record filtering, as well as any possible
-         * per-record processing that is required by the shard being built.
-         */
-        auto temp_buffer = new Wrapped<R>[buffer.get_record_count()]();
-        for (size_t i=0; i<buffer.get_record_count(); i++) {
-            temp_buffer[i] = *(buffer.get(i));
-        }
-
-        auto base = temp_buffer;
-        auto stop = base + buffer.get_record_count();
-        std::sort(base, stop, std::less<Wrapped<R>>());
+  FSTrie(BufferView<R> buffer) : m_data(nullptr), m_reccnt(0), m_alloc_size(0) {
+    m_data = new Wrapped<R>[buffer.get_record_count()]();
+    m_alloc_size = sizeof(Wrapped<R>) * buffer.get_record_count();
+
+    size_t cnt = 0;
+    std::vector<std::string> keys;
+    keys.reserve(buffer.get_record_count());
+
+    /*
+     * Copy the contents of the buffer view into a temporary buffer, and
+     * sort them. We still need to iterate over these temporary records to
+     * apply tombstone/deleted record filtering, as well as any possible
+     * per-record processing that is required by the shard being built.
+     */
+    auto temp_buffer = new Wrapped<R>[buffer.get_record_count()]();
+    for (size_t i = 0; i < buffer.get_record_count(); i++) {
+      temp_buffer[i] = *(buffer.get(i));
+    }
 
-        for (size_t i=0; i<buffer.get_record_count(); i++) {
-            if (temp_buffer[i].is_deleted() || !temp_buffer[i].is_visible() || temp_buffer[i].rec.key[0] != '\0') {
-                continue;
-            }
+    auto base = temp_buffer;
+    auto stop = base + buffer.get_record_count();
+    std::sort(base, stop, std::less<Wrapped<R>>());
 
-            m_data[cnt] = temp_buffer[i];
-            m_data[cnt].clear_timestamp();
+    for (size_t i = 0; i < buffer.get_record_count(); i++) {
+      if (temp_buffer[i].is_deleted() || !temp_buffer[i].is_visible() ||
+          temp_buffer[i].rec.key[0] != '\0') {
+        continue;
+      }
 
-            keys.push_back(std::string(m_data[cnt].rec.key));
-            cnt++;
-        }
+      m_data[cnt] = temp_buffer[i];
+      m_data[cnt].clear_timestamp();
 
-        m_reccnt = cnt;
-        if (m_reccnt > 0) {
-            m_fst = new fst::Trie(keys, true, 1);
-        }
+      keys.push_back(std::string(m_data[cnt].rec.key));
+      cnt++;
+    }
 
-        delete[] temp_buffer;
+    m_reccnt = cnt;
+    if (m_reccnt > 0) {
+      m_fst = new fst::Trie(keys, true, 1);
     }
 
-    FSTrie(std::vector<const FSTrie*> const &shards) 
-        : m_data(nullptr)
-        , m_reccnt(0)
-        , m_alloc_size(0)
-    {
-        size_t attemp_reccnt = 0;
-        size_t tombstone_count = 0;
-        auto cursors = build_cursor_vec<R, FSTrie>(shards, &attemp_reccnt, &tombstone_count);
-        
-        m_data = new Wrapped<R>[attemp_reccnt]();
-        m_alloc_size = attemp_reccnt * sizeof(Wrapped<R>);
-
-        std::vector<std::string> keys;
-        keys.reserve(attemp_reccnt);
-
-        // FIXME: For smaller cursor arrays, it may be more efficient to skip
-        //        the priority queue and just do a scan.
-        PriorityQueue<Wrapped<R>> pq(cursors.size());
-        for (size_t i=0; i<cursors.size(); i++) {
-            pq.push(cursors[i].ptr, i);
-        }
+    delete[] temp_buffer;
+  }
 
-        while (pq.size()) {
-            auto now = pq.peek();
-            auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
-            /* 
-             * if the current record is not a tombstone, and the next record is
-             * a tombstone that matches the current one, then the current one
-             * has been deleted, and both it and its tombstone can be skipped
-             * over.
-             */
-            if (!now.data->is_tombstone() && next.data != nullptr &&
-                now.data->rec == next.data->rec && next.data->is_tombstone()) {
-                
-                pq.pop(); pq.pop();
-                auto& cursor1 = cursors[now.version];
-                auto& cursor2 = cursors[next.version];
-                if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
-                if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
-            } else {
-                auto& cursor = cursors[now.version];
-                /* skip over records that have been deleted via tagging */
-                if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') {
-                    m_data[m_reccnt] = *cursor.ptr;
-                    keys.push_back(std::string(m_data[m_reccnt].rec.key));
-
-                    m_reccnt++;
-                }
-                pq.pop();
-                
-                if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
-            }
-        }
+  FSTrie(std::vector<const FSTrie *> const &shards)
+      : m_data(nullptr), m_reccnt(0), m_alloc_size(0) {
+    size_t attemp_reccnt = 0;
+    size_t tombstone_count = 0;
+    auto cursors =
+        build_cursor_vec<R, FSTrie>(shards, &attemp_reccnt, &tombstone_count);
 
-        if (m_reccnt > 0) {
-            m_fst = new fst::Trie(keys, true, 1);
-        }
+    m_data = new Wrapped<R>[attemp_reccnt]();
+    m_alloc_size = attemp_reccnt * sizeof(Wrapped<R>);
+
+    std::vector<std::string> keys;
+    keys.reserve(attemp_reccnt);
+
+    // FIXME: For smaller cursor arrays, it may be more efficient to skip
+    //        the priority queue and just do a scan.
+    PriorityQueue<Wrapped<R>> pq(cursors.size());
+    for (size_t i = 0; i < cursors.size(); i++) {
+      pq.push(cursors[i].ptr, i);
     }
 
-    ~FSTrie() {
-        delete[] m_data;
-        delete m_fst;
+    while (pq.size()) {
+      auto now = pq.peek();
+      auto next =
+          pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+      /*
+       * if the current record is not a tombstone, and the next record is
+       * a tombstone that matches the current one, then the current one
+       * has been deleted, and both it and its tombstone can be skipped
+       * over.
+       */
+      if (!now.data->is_tombstone() && next.data != nullptr &&
+          now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+        pq.pop();
+        pq.pop();
+        auto &cursor1 = cursors[now.version];
+        auto &cursor2 = cursors[next.version];
+        if (advance_cursor(cursor1))
+          pq.push(cursor1.ptr, now.version);
+        if (advance_cursor(cursor2))
+          pq.push(cursor2.ptr, next.version);
+      } else {
+        auto &cursor = cursors[now.version];
+        /* skip over records that have been deleted via tagging */
+        if (!cursor.ptr->is_deleted() && cursor.ptr->rec.key[0] != '\0') {
+          m_data[m_reccnt] = *cursor.ptr;
+          keys.push_back(std::string(m_data[m_reccnt].rec.key));
+
+          m_reccnt++;
+        }
+        pq.pop();
+
+        if (advance_cursor(cursor))
+          pq.push(cursor.ptr, now.version);
+      }
     }
 
-    Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+    if (m_reccnt > 0) {
+      m_fst = new fst::Trie(keys, true, 1);
+    }
+  }
 
-        auto idx = m_fst->exactSearch(rec.key);
+  ~FSTrie() {
+    delete[] m_data;
+    delete m_fst;
+  }
 
-        if (idx == fst::kNotFound) {
-            return nullptr;
-        }
+  Wrapped<R> *point_lookup(const R &rec, bool filter = false) {
 
-        // FIXME: for convenience, I'm treating this Trie as a unique index
-        // for now, so no need to scan forward and/or check values. This
-        // also makes the point lookup query class a lot easier to make.
-        // Ultimately, though, we can support non-unique indexes with some
-        // extra work.
+    auto idx = m_fst->exactSearch(rec.key);
 
-        return m_data + idx;
+    if (idx == fst::kNotFound) {
+      return nullptr;
     }
 
-    Wrapped<R>* get_data() const {
-        return m_data;
-    }
-    
-    size_t get_record_count() const {
-        return m_reccnt;
-    }
+    // FIXME: for convenience, I'm treating this Trie as a unique index
+    // for now, so no need to scan forward and/or check values. This
+    // also makes the point lookup query class a lot easier to make.
+    // Ultimately, though, we can support non-unique indexes with some
+    // extra work.
 
-    size_t get_tombstone_count() const {
-        return 0;
-    }
+    return m_data + idx;
+  }
 
-    const Wrapped<R>* get_record_at(size_t idx) const {
-        if (idx >= m_reccnt) return nullptr;
-        return m_data + idx;
-    }
+  Wrapped<R> *get_data() const { return m_data; }
 
+  size_t get_record_count() const { return m_reccnt; }
 
-    size_t get_memory_usage() const {
-        return m_fst->getMemoryUsage();
-    }
+  size_t get_tombstone_count() const { return 0; }
 
-    size_t get_aux_memory_usage() const {
-        return m_alloc_size;
-    }
+  const Wrapped<R> *get_record_at(size_t idx) const {
+    if (idx >= m_reccnt)
+      return nullptr;
+    return m_data + idx;
+  }
 
-    size_t get_lower_bound(R &rec) {return 0;}
-    size_t get_upper_bound(R &rec) {return 0;}
+  size_t get_memory_usage() const { return m_fst->getMemoryUsage(); }
 
-private:
+  size_t get_aux_memory_usage() const { return m_alloc_size; }
+
+  size_t get_lower_bound(R &rec) { return 0; }
+  size_t get_upper_bound(R &rec) { return 0; }
 
-    Wrapped<R>* m_data;
-    size_t m_reccnt;
-    size_t m_alloc_size;
-    fst::Trie *m_fst;
+private:
+  Wrapped<R> *m_data;
+  size_t m_reccnt;
+  size_t m_alloc_size;
+  fst::Trie *m_fst;
 };
-}
+} // namespace de
author	Douglas Rumbaugh <dbr4@psu.edu>	2025-09-25 14:42:44 -0400
committer	Douglas Rumbaugh <dbr4@psu.edu>	2025-09-25 14:42:44 -0400
commit	cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71 (patch)
tree	4c17bc3169ee195c236cea9c9efda0aef7488e3c /include/shard/FSTrie.h
parent	826c1fff5accbaa6b415acc176a5acbeb5f691b6 (diff)
download	dynamic-extension-cf5f3bbb0cb58430ed68ad3ebfcefc009e553d71.tar.gz