Tombstone Compaction: re-enabled tombstone compaction

Currently, proactive buffer tombstone compaction is disabled by forcing the buffer tombstone capacity to match its record capacity. It isn't clear how to best handle proactive buffer compactions in an environment where new buffers are spawned anyway.
author: Douglas Rumbaugh <dbr4@psu.edu> 2023-11-13 11:44:09 -0500
committer: Douglas Rumbaugh <dbr4@psu.edu> 2023-11-13 11:44:09 -0500
commit: 90bb0614fc1d8f1a185a778e31aaf9027c01aeb8 (patch)
tree: 22a865978e398cef2fdcef763ab893c2da769a4a /include/framework/structure
parent: 83486744600e8be338c75c2e3d2339452a392a9d (diff)
download: dynamic-extension-90bb0614fc1d8f1a185a778e31aaf9027c01aeb8.tar.gz
2 files changed, 52 insertions, 1 deletions
diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h
index 74cede6..a174805 100644
--- a/include/framework/structure/ExtensionStructure.h
+++ b/include/framework/structure/ExtensionStructure.h
@@ -201,6 +201,57 @@ public:
         return m_levels;
     }
 
+    std::vector<MergeTask> get_compaction_tasks() {
+        std::vector<MergeTask> tasks;
+
+        /* if the tombstone/delete invariant is satisfied, no need for compactions */
+        if (validate_tombstone_proportion()) {
+            return tasks;
+        }
+
+        /* locate the first level to violate the invariant */
+        level_index violation_idx = -1;
+        for (level_index i=0; i<m_levels.size(); i++) {
+            if (!validate_tombstone_proportion(i))  {
+                violation_idx = i;
+                break;
+            }
+        }
+
+        assert(violation_idx != -1);
+
+        level_index merge_base_level = find_mergable_level(violation_idx);
+        if (merge_base_level == -1) {
+            merge_base_level = grow();
+        }
+
+        for (level_index i=merge_base_level; i>0; i--) {
+            MergeTask task = {i-1, i};
+
+            /*
+             * The amount of storage required for the merge accounts
+             * for the cost of storing the new records, along with the
+             * cost of retaining the old records during the process 
+             * (hence the 2x multiplier). 
+             *
+             * FIXME: currently does not account for the *actual* size 
+             * of the shards, only the storage for the records 
+             * themselves.
+             */
+            size_t reccnt = m_levels[i-1]->get_record_count();
+            if constexpr (L == LayoutPolicy::LEVELING) {
+                if (can_merge_with(i, reccnt)) {
+                    reccnt += m_levels[i]->get_record_count();
+                }
+            }
+            //task.m_size = 2* reccnt * sizeof(R);
+
+            tasks.push_back(task);
+        }
+
+        return tasks;
+    }
+
     /*
      *
      */
diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h
index 671824f..8b17091 100644
--- a/include/framework/structure/MutableBuffer.h
+++ b/include/framework/structure/MutableBuffer.h
@@ -32,7 +32,7 @@ template <RecordInterface R>
 class MutableBuffer {
 public:
     MutableBuffer(size_t capacity, size_t max_tombstone_cap)
-    : m_cap(capacity), m_tombstone_cap(max_tombstone_cap), m_reccnt(0)
+    : m_cap(capacity), m_tombstone_cap(capacity), m_reccnt(0)
     , m_tombstonecnt(0), m_weight(0), m_max_weight(0), m_tail(0) {
         m_data = (Wrapped<R>*) psudb::sf_aligned_alloc(CACHELINE_SIZE, capacity*sizeof(Wrapped<R>));
         m_merge_data = (Wrapped<R>*) psudb::sf_aligned_alloc(CACHELINE_SIZE, capacity*sizeof(Wrapped<R>));
author	Douglas Rumbaugh <dbr4@psu.edu>	2023-11-13 11:44:09 -0500
committer	Douglas Rumbaugh <dbr4@psu.edu>	2023-11-13 11:44:09 -0500
commit	90bb0614fc1d8f1a185a778e31aaf9027c01aeb8 (patch)
tree	22a865978e398cef2fdcef763ab893c2da769a4a /include/framework/structure
parent	83486744600e8be338c75c2e3d2339452a392a9d (diff)
download	dynamic-extension-90bb0614fc1d8f1a185a778e31aaf9027c01aeb8.tar.gz