From 63929187f2c1d0c95719d8435794a5136cb1cc73 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Wed, 26 Jul 2023 09:44:44 -0400
Subject: Adjusted calculation for index size

---
 benchmarks/alex_rq_bench.cpp   |  2 +-
 benchmarks/btree_irs_bench.cpp |  1 +
 benchmarks/btree_rq_bench.cpp  |  1 +
 include/shard/MemISAM.h        | 21 +++++++++++----------
 include/shard/PGM.h            | 15 ++++++++-------
 include/shard/TrieSpline.h     | 15 ++++++++-------
 include/shard/VPTree.h         | 15 ++++++++-------
 include/shard/WIRS.h           | 18 ++++++++++--------
 include/shard/WSS.h            | 15 ++++++++-------
 9 files changed, 56 insertions(+), 47 deletions(-)
diff --git a/benchmarks/alex_rq_bench.cpp b/benchmarks/alex_rq_bench.cpp
index 00e0002..54df024 100644
--- a/benchmarks/alex_rq_bench.cpp
+++ b/benchmarks/alex_rq_bench.cpp
@@ -203,7 +203,7 @@ int main(int argc, char **argv)
     size_t insert_cnt = record_count - warmup_cnt;
 
     alex_rq_insert(alex, datafile, insert_cnt, delete_prop, to_delete, true);
-    size_t memory_usage = alex.model_size();
+    size_t memory_usage = alex.model_size() + alex.data_size();
     fprintf(stdout, "%ld\t", memory_usage);
 
     alex_rq_bench(alex, queries);
diff --git a/benchmarks/btree_irs_bench.cpp b/benchmarks/btree_irs_bench.cpp
index c64df8a..862fc6b 100644
--- a/benchmarks/btree_irs_bench.cpp
+++ b/benchmarks/btree_irs_bench.cpp
@@ -77,6 +77,7 @@ int main(int argc, char **argv)
 
     insert_tput_bench<TreeMap, btree_record>(btree, datafile, insert_cnt, delete_prop, to_delete, true);
     size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits<key_type, btree_record>::inner_slots * (sizeof(key_type) + sizeof(void*));
+    memory_usage += btree.get_stats().leaves * tlx::btree_default_traits<key_type, btree_record>::leaf_slots * sizeof(btree_record);
     fprintf(stdout, "%ld\t", memory_usage);
 
     btree_sample_bench(btree, queries);
diff --git a/benchmarks/btree_rq_bench.cpp b/benchmarks/btree_rq_bench.cpp
index 818e6f4..d92b45d 100644
--- a/benchmarks/btree_rq_bench.cpp
+++ b/benchmarks/btree_rq_bench.cpp
@@ -76,6 +76,7 @@ int main(int argc, char **argv)
 
     insert_tput_bench<TreeMap, btree_record>(btree, datafile, insert_cnt, delete_prop, to_delete, true);
     size_t memory_usage = btree.get_stats().inner_nodes * tlx::btree_default_traits<key_type, btree_record>::inner_slots * (sizeof(key_type) + sizeof(void*));
+    memory_usage += btree.get_stats().leaves * tlx::btree_default_traits<key_type, btree_record>::leaf_slots * sizeof(btree_record);
     fprintf(stdout, "%ld\t", memory_usage);
 
     btree_rq_bench(btree, queries);
diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
index 4680cb9..3e3215f 100644
--- a/include/shard/MemISAM.h
+++ b/include/shard/MemISAM.h
@@ -99,9 +99,9 @@ public:
 
         m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         TIMER_INIT();
 
@@ -175,9 +175,9 @@ public:
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
 
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         size_t offset = 0;
         
@@ -254,7 +254,7 @@ public:
     }
 
     size_t get_memory_usage() {
-        return m_internal_node_cnt * inmem_isam_node_size;
+        return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
     }
 
 private:
@@ -307,10 +307,10 @@ private:
             node_cnt += level_node_cnt;
         } while (level_node_cnt > 1);
 
-        size_t alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
+        m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
 
-        m_isam_nodes = (InMemISAMNode*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_isam_nodes = (InMemISAMNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         m_internal_node_cnt = node_cnt;
         memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
 
@@ -371,6 +371,7 @@ private:
     size_t m_tombstone_cnt;
     size_t m_internal_node_cnt;
     size_t m_deleted_cnt;
+    size_t m_alloc_size;
 };
 
 template <RecordInterface R, bool Rejection=true>
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index d72ad55..0d4268a 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -67,9 +67,9 @@ public:
     PGM(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0) {
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         std::vector<K> keys;
 
         m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
@@ -141,9 +141,9 @@ public:
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
 
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         std::vector<K> keys;
 
@@ -223,7 +223,7 @@ public:
 
 
     size_t get_memory_usage() {
-        return m_pgm.size_in_bytes();
+        return m_pgm.size_in_bytes() + m_alloc_size;
     }
 
     size_t get_lower_bound(const K& key) const {
@@ -267,6 +267,7 @@ private:
     Wrapped<R>* m_data;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
+    size_t m_alloc_size;
     K m_max_key;
     K m_min_key;
     pgm::PGMIndex<K> m_pgm;
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index d09d2a6..f06756f 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -67,9 +67,9 @@ public:
     TrieSpline(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0) {
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
 
@@ -164,9 +164,9 @@ public:
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
         auto bldr = ts::Builder<K>(m_min_key, m_max_key, E);
 
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         while (pq.size()) {
             auto now = pq.peek();
@@ -244,7 +244,7 @@ public:
 
 
     size_t get_memory_usage() {
-        return 0;
+        return m_ts.GetSize() + m_alloc_size;
     }
 
 private:
@@ -289,6 +289,7 @@ private:
     Wrapped<R>* m_data;
     size_t m_reccnt;
     size_t m_tombstone_cnt;
+    size_t m_alloc_size;
     K m_max_key;
     K m_min_key;
     ts::TrieSpline<K> m_ts;
diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h
index 6f0423b..5f740dc 100644
--- a/include/shard/VPTree.h
+++ b/include/shard/VPTree.h
@@ -112,9 +112,9 @@ public:
     VPTree(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0), m_root(nullptr), m_node_cnt(0) {
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         m_ptrs = new Wrapped<R>*[buffer->get_record_count()];
 
         size_t offset = 0;
@@ -152,9 +152,9 @@ public:
             attemp_reccnt += shards[i]->get_record_count();
         }
         
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         m_ptrs = new Wrapped<R>*[attemp_reccnt];
 
         // FIXME: will eventually need to figure out tombstones
@@ -233,7 +233,7 @@ public:
     }
 
     size_t get_memory_usage() {
-        return m_node_cnt * sizeof(vpnode) + m_reccnt * sizeof(R*);
+        return m_node_cnt * sizeof(vpnode) + m_reccnt * sizeof(R*) + m_alloc_size;
     }
 
 private:
@@ -415,6 +415,7 @@ private:
     size_t m_reccnt;
     size_t m_tombstone_cnt;
     size_t m_node_cnt;
+    size_t m_alloc_size;
 
     vpnode *m_root;
 };
diff --git a/include/shard/WIRS.h b/include/shard/WIRS.h
index 9760443..1a63092 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/WIRS.h
@@ -98,9 +98,9 @@ public:
     WIRS(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
 
@@ -168,9 +168,9 @@ public:
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
 
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
         
         while (pq.size()) {
             auto now = pq.peek();
@@ -253,7 +253,7 @@ public:
 
 
     size_t get_memory_usage() {
-        return 0;
+        return m_alloc_size + m_node_cnt * sizeof(wirs_node<Wrapped<R>>);
     }
 
 private:
@@ -338,7 +338,7 @@ private:
             if (sum) w /= sum;
             else w = 1.0 / node_weights.size();
         
-        
+        m_node_cnt += 1; 
         size_t mid = (low + high) / 2;
         return new wirs_node<R>{construct_wirs_node(weights, low, mid),
                                 construct_wirs_node(weights, mid + 1, high),
@@ -361,6 +361,8 @@ private:
     size_t m_reccnt;
     size_t m_tombstone_cnt;
     size_t m_group_size;
+    size_t m_alloc_size;
+    size_t m_node_cnt;
     BloomFilter<R> *m_bf;
 };
 
diff --git a/include/shard/WSS.h b/include/shard/WSS.h
index 94c7ad3..17e9eb9 100644
--- a/include/shard/WSS.h
+++ b/include/shard/WSS.h
@@ -79,9 +79,9 @@ public:
     WSS(MutableBuffer<R>* buffer)
     : m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
 
-        size_t alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
 
@@ -152,9 +152,9 @@ public:
 
         m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
 
-        size_t alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
-        assert(alloc_size % CACHELINE_SIZE == 0);
-        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, alloc_size);
+        m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+        assert(m_alloc_size % CACHELINE_SIZE == 0);
+        m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
 
         std::vector<W> weights;
         
@@ -236,7 +236,7 @@ public:
 
 
     size_t get_memory_usage() {
-        return 0;
+        return m_alloc_size;
     }
 
 private:
@@ -278,6 +278,7 @@ private:
     size_t m_reccnt;
     size_t m_tombstone_cnt;
     size_t m_group_size;
+    size_t m_alloc_size;
     BloomFilter<R> *m_bf;
 };
 
-- 
cgit v1.2.3