From 90194cb5be5c7b80bfa21a353a75920e932d54d8 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 12 Feb 2024 10:39:52 -0500 Subject: Added structure state vector w/ scratch version for reconstruction This approach should allow us to "simulate" a reconstruction to monitor the future state of the structure. The idea being that we can then add pre-emptive reconstructions to load balance and further smooth the tail latency curve. If a given reconstruction is significantly smaller than the next one will be, we can move some of the next one's work preemptively into the current one. The next phase is to do the simulation within the scratch_vector and then do a second pass examining the state of that reconstruction. In principle, we could look arbitrarily far ahead using this technique. --- include/framework/structure/ExtensionStructure.h | 144 ++++++++++++++--------- 1 file changed, 89 insertions(+), 55 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 4802bc1..823a66b 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -27,6 +27,16 @@ class ExtensionStructure { typedef S Shard; typedef BufferView BuffView; + typedef struct { + size_t reccnt; + size_t reccap; + + size_t shardcnt; + size_t shardcap; + } level_state; + + typedef std::vector state_vector; + public: ExtensionStructure(size_t buffer_size, size_t scale_factor, double max_delete_prop) : m_scale_factor(scale_factor) @@ -59,6 +69,7 @@ public: } new_struct->m_refcnt = 0; + new_struct->m_current_state = m_current_state; return new_struct; } @@ -95,8 +106,8 @@ public: * takes a structure as input. */ inline bool flush_buffer(BuffView buffer) { - assert(can_reconstruct_with(0, buffer.get_record_count())); - + state_vector tmp = m_current_state; + assert(can_reconstruct_with(0, buffer.get_record_count(), tmp)); flush_buffer_into_l0(std::move(buffer)); return true; @@ -200,8 +211,16 @@ public: return m_levels; } + /* + * NOTE: This cannot be simulated, because tombstone cancellation is not + * cheaply predictable. It is possible that the worst case number could + * be used instead, to allow for prediction, but compaction isn't a + * major concern outside of sampling; at least for now. So I'm not + * going to focus too much time on it at the moment. + */ std::vector get_compaction_tasks() { std::vector tasks; + state_vector scratch_state = m_current_state; /* if the tombstone/delete invariant is satisfied, no need for compactions */ if (validate_tombstone_proportion()) { @@ -219,9 +238,9 @@ public: assert(violation_idx != -1); - level_index base_level = find_reconstruction_target(violation_idx); + level_index base_level = find_reconstruction_target(violation_idx, scratch_state); if (base_level == -1) { - base_level = grow(); + base_level = grow(scratch_state); } for (level_index i=base_level; i>0; i--) { @@ -239,7 +258,7 @@ public: */ size_t reccnt = m_levels[i - 1]->get_record_count(); if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, reccnt)) { + if (can_reconstruct_with(i, reccnt, scratch_state)) { reccnt += m_levels[i]->get_record_count(); } } @@ -254,46 +273,28 @@ public: /* * */ - std::vector get_reconstruction_tasks(size_t buffer_reccnt) { - std::vector reconstructions; + std::vector get_reconstruction_tasks(size_t buffer_reccnt, + state_vector scratch_state={}) { + /* + * If no scratch state vector is provided, use a copy of the + * current one. The only time an empty vector could be used as + * *real* input to this function is when the current state is also + * empty, so this should would even in that case. + */ + if (scratch_state.size() == 0) { + scratch_state = m_current_state; + } /* * The buffer flush is not included so if that can be done without any * other change, just return an empty list. */ - if (can_reconstruct_with(0, buffer_reccnt)) { + std::vector reconstructions; + if (can_reconstruct_with(0, buffer_reccnt, scratch_state)) { return std::move(reconstructions); } - level_index base_level = find_reconstruction_target(0); - if (base_level == -1) { - base_level = grow(); - } - - for (level_index i=base_level; i>0; i--) { - ReconstructionTask task = {i-1, i}; - - /* - * The amount of storage required for the reconstruction accounts - * for the cost of storing the new records, along with the - * cost of retaining the old records during the process - * (hence the 2x multiplier). - * - * FIXME: currently does not account for the *actual* size - * of the shards, only the storage for the records - * themselves. - */ - size_t reccnt = m_levels[i-1]->get_record_count(); - if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, reccnt)) { - reccnt += m_levels[i]->get_record_count(); - } - } - //task.m_size = 2* reccnt * sizeof(R); - - reconstructions.push_back(task); - } - + reconstructions = get_reconstruction_tasks_from_level(0, scratch_state); return std::move(reconstructions); } @@ -301,12 +302,12 @@ public: /* * */ - std::vector get_reconstruction_tasks_from_level(level_index source_level) { + std::vector get_reconstruction_tasks_from_level(level_index source_level, state_vector &scratch_state) { std::vector reconstructions; - level_index base_level = find_reconstruction_target(source_level); + level_index base_level = find_reconstruction_target(source_level, scratch_state); if (base_level == -1) { - base_level = grow(); + base_level = grow(scratch_state); } for (level_index i=base_level; i>source_level; i--) { @@ -323,7 +324,7 @@ public: */ size_t reccnt = m_levels[i-1]->get_record_count(); if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, reccnt)) { + if (can_reconstruct_with(i, reccnt, scratch_state)) { reccnt += m_levels[i]->get_record_count(); } } @@ -332,7 +333,7 @@ public: reconstructions.push_back(task); } - return reconstructions; + return std::move(reconstructions); } /* @@ -342,7 +343,10 @@ public: * tombstone ordering invariant may be violated. */ inline void reconstruction(level_index base_level, level_index incoming_level) { + + size_t shard_capacity; if constexpr (L == LayoutPolicy::LEVELING) { + shard_capacity = 1; /* if the base level has a shard, merge the base and incoming together to make a new one */ if (m_levels[base_level]->get_shard_count() > 0) { m_levels[base_level] = InternalLevel::reconstruction(m_levels[base_level].get(), m_levels[incoming_level].get()); @@ -350,13 +354,23 @@ public: } else { m_levels[base_level] = m_levels[incoming_level]; } + } else { + shard_capacity = m_scale_factor; m_levels[base_level]->append_level(m_levels[incoming_level].get()); m_levels[base_level]->finalize(); } /* place a new, empty level where the incoming level used to be */ m_levels[incoming_level] = std::shared_ptr>(new InternalLevel(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor)); + + /* + * Update the state vector to match the *real* stage following + * the reconstruction + */ + m_current_state[base_level] = {m_levels[base_level]->get_record_count(), + calc_level_record_capacity(base_level), m_levels[base_level]->get_shard_count(), shard_capacity}; + m_current_state[incoming_level] = {0, calc_level_record_capacity(incoming_level), 0, shard_capacity}; } bool take_reference() { @@ -393,14 +407,26 @@ private: std::vector>> m_levels; + /* + * A pair of for each level in the + * structure. Record counts may be slightly inaccurate due to + * deletes. + */ + state_vector m_current_state; + /* * Add a new level to the structure and return its index. */ - inline level_index grow() { + inline level_index grow(state_vector &scratch_state) { level_index new_idx = m_levels.size(); - size_t new_shard_cnt = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + m_levels.emplace_back(std::shared_ptr>(new InternalLevel(new_idx, new_shard_cap))); - m_levels.emplace_back(std::shared_ptr>(new InternalLevel(new_idx, new_shard_cnt))); + m_current_state.push_back({0, calc_level_record_capacity(new_idx), + 0, new_shard_cap}); + scratch_state.push_back({0, calc_level_record_capacity(new_idx), + 0, new_shard_cap}); return new_idx; } @@ -411,17 +437,21 @@ private: * returns -1 if idx==0, and no such level exists, to simplify * the logic of the first buffer flush. */ - inline level_index find_reconstruction_target(level_index idx) { + inline level_index find_reconstruction_target(level_index idx, state_vector &state) { - if (idx == 0 && m_levels.size() == 0) return -1; + /* + * this handles the very first buffer flush, when the state vector + * is empty. + */ + if (idx == 0 && state.size() == 0) return -1; - size_t incoming_rec_cnt = get_level_record_count(idx); - for (level_index i=idx+1; iappend_buffer(std::move(buffer)); } + + /* update the state vector */ + m_current_state[0].reccnt = m_levels[0]->get_record_count(); + m_current_state[0].shardcnt = m_levels[0]->get_shard_count(); } /* @@ -475,15 +509,15 @@ private: * Determines if a level can sustain a reconstruction with incoming_rec_cnt * additional records without exceeding its capacity. */ - inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt) { - if (idx >= m_levels.size() || !m_levels[idx]) { + inline bool can_reconstruct_with(level_index idx, size_t incoming_rec_cnt, state_vector &state) { + if (idx >= state.size()) { return false; } if (L == LayoutPolicy::LEVELING) { - return m_levels[idx]->get_record_count() + incoming_rec_cnt <= calc_level_record_capacity(idx); + return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap; } else { - return m_levels[idx]->get_shard_count() < m_scale_factor; + return state[idx].shardcnt < state[idx].shardcap; } /* unreachable */ -- cgit v1.2.3 From 5c229093f9af21514b17cf778dbec7ac657e31d2 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 12 Feb 2024 11:42:18 -0500 Subject: Refactored Reconstruction Tasks Added a ReconVector type to make it easier to do load balancing by shifting tasks around, and clean up a few interfaces. --- include/framework/structure/ExtensionStructure.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 823a66b..f1c7535 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -218,8 +218,8 @@ public: * major concern outside of sampling; at least for now. So I'm not * going to focus too much time on it at the moment. */ - std::vector get_compaction_tasks() { - std::vector tasks; + ReconstructionVector get_compaction_tasks() { + ReconstructionVector tasks; state_vector scratch_state = m_current_state; /* if the tombstone/delete invariant is satisfied, no need for compactions */ @@ -244,8 +244,6 @@ public: } for (level_index i=base_level; i>0; i--) { - ReconstructionTask task = {i-1, i}; - /* * The amount of storage required for the reconstruction accounts * for the cost of storing the new records, along with the @@ -262,9 +260,7 @@ public: reccnt += m_levels[i]->get_record_count(); } } - //task.m_size = 2* reccnt * sizeof(R); - - tasks.push_back(task); + tasks.add_reconstruction(i-i, i, reccnt); } return tasks; @@ -273,7 +269,7 @@ public: /* * */ - std::vector get_reconstruction_tasks(size_t buffer_reccnt, + ReconstructionVector get_reconstruction_tasks(size_t buffer_reccnt, state_vector scratch_state={}) { /* * If no scratch state vector is provided, use a copy of the @@ -289,7 +285,7 @@ public: * The buffer flush is not included so if that can be done without any * other change, just return an empty list. */ - std::vector reconstructions; + ReconstructionVector reconstructions; if (can_reconstruct_with(0, buffer_reccnt, scratch_state)) { return std::move(reconstructions); } @@ -302,8 +298,8 @@ public: /* * */ - std::vector get_reconstruction_tasks_from_level(level_index source_level, state_vector &scratch_state) { - std::vector reconstructions; + ReconstructionVector get_reconstruction_tasks_from_level(level_index source_level, state_vector &scratch_state) { + ReconstructionVector reconstructions; level_index base_level = find_reconstruction_target(source_level, scratch_state); if (base_level == -1) { @@ -311,7 +307,6 @@ public: } for (level_index i=base_level; i>source_level; i--) { - ReconstructionTask task = {i - 1, i}; /* * The amount of storage required for the reconstruction accounts * for the cost of storing the new records, along with the @@ -328,9 +323,7 @@ public: reccnt += m_levels[i]->get_record_count(); } } -// task.m_size = 2* reccnt * sizeof(R); - - reconstructions.push_back(task); + reconstructions.add_reconstruction(i-1, i, reccnt); } return std::move(reconstructions); -- cgit v1.2.3 From 61f8bd422aed3301850c4502e9f5b7f33dff2e12 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 12 Feb 2024 12:38:29 -0500 Subject: MutableBuffer: Allow hwm to equal lwm The high watermark and low watermark can now be equal, to allow for blocking reconstruction without requiring odd buffer sizes. --- include/framework/structure/MutableBuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h index 415c95a..c0c87cc 100644 --- a/include/framework/structure/MutableBuffer.h +++ b/include/framework/structure/MutableBuffer.h @@ -57,7 +57,7 @@ public: , m_active_head_advance(false) { assert(m_cap > m_hwm); - assert(m_hwm > m_lwm); + assert(m_hwm >= m_lwm); } ~MutableBuffer() { -- cgit v1.2.3 From 6d7067b4cef51e92d8cb54bb502d6133269728a9 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 12 Feb 2024 12:52:12 -0500 Subject: ExtensionStructure: Added simulated reconstruction lookahead The reconstruction task procedure can now simulate future reconstructions to a specified depth. --- include/framework/structure/ExtensionStructure.h | 116 +++++++++++++++++------ 1 file changed, 88 insertions(+), 28 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index f1c7535..0e2118e 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -107,6 +107,11 @@ public: */ inline bool flush_buffer(BuffView buffer) { state_vector tmp = m_current_state; + + if (tmp.size() == 0) { + grow(tmp); + } + assert(can_reconstruct_with(0, buffer.get_record_count(), tmp)); flush_buffer_into_l0(std::move(buffer)); @@ -281,16 +286,34 @@ public: scratch_state = m_current_state; } - /* - * The buffer flush is not included so if that can be done without any - * other change, just return an empty list. - */ ReconstructionVector reconstructions; - if (can_reconstruct_with(0, buffer_reccnt, scratch_state)) { - return std::move(reconstructions); + size_t LOOKAHEAD = 2; + for (size_t i=0; isource_level; i--) { + size_t recon_reccnt = scratch_state[i-1].reccnt; + size_t base_reccnt = recon_reccnt; + /* - * The amount of storage required for the reconstruction accounts - * for the cost of storing the new records, along with the - * cost of retaining the old records during the process - * (hence the 2x multiplier). - * - * FIXME: currently does not account for the *actual* size - * of the shards, only the storage for the records - * themselves. + * If using Leveling, the total reconstruction size will be the + * records in *both* base and target, because they will need to + * be merged (assuming that target isn't empty). */ - size_t reccnt = m_levels[i-1]->get_record_count(); if constexpr (L == LayoutPolicy::LEVELING) { - if (can_reconstruct_with(i, reccnt, scratch_state)) { - reccnt += m_levels[i]->get_record_count(); + if (can_reconstruct_with(i, base_reccnt, scratch_state)) { + recon_reccnt += scratch_state[i].reccnt; } } - reconstructions.add_reconstruction(i-1, i, reccnt); + reconstructions.add_reconstruction(i-1, i, recon_reccnt); + + /* + * The base level will be emptied and its records moved to + * the target. + */ + scratch_state[i-1].reccnt = 0; + scratch_state[i-1].shardcnt = 0; + + /* + * The target level will have the records from the base level + * added to it, and potentially gain a shard if the LayoutPolicy + * is tiering or the level currently lacks any shards at all. + */ + scratch_state[i].reccnt += base_reccnt; + if (L == LayoutPolicy::TEIRING || scratch_state[i].shardcnt == 0) { + scratch_state[i].shardcnt += 1; + } } return std::move(reconstructions); @@ -336,10 +382,16 @@ public: * tombstone ordering invariant may be violated. */ inline void reconstruction(level_index base_level, level_index incoming_level) { + size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + if (base_level >= m_levels.size()) { + m_levels.emplace_back(std::shared_ptr>(new InternalLevel(base_level, shard_capacity))); + + m_current_state.push_back({0, calc_level_record_capacity(base_level), + 0, shard_capacity}); + } - size_t shard_capacity; if constexpr (L == LayoutPolicy::LEVELING) { - shard_capacity = 1; /* if the base level has a shard, merge the base and incoming together to make a new one */ if (m_levels[base_level]->get_shard_count() > 0) { m_levels[base_level] = InternalLevel::reconstruction(m_levels[base_level].get(), m_levels[incoming_level].get()); @@ -349,7 +401,6 @@ public: } } else { - shard_capacity = m_scale_factor; m_levels[base_level]->append_level(m_levels[incoming_level].get()); m_levels[base_level]->finalize(); } @@ -408,16 +459,17 @@ private: state_vector m_current_state; /* - * Add a new level to the structure and return its index. + * Add a new level to the scratch state and return its index. + * + * IMPORTANT: This does _not_ add a level to the extension structure + * anymore. This is handled by the appropriate reconstruction and flush + * methods as needed. This function is for use in "simulated" + * reconstructions. */ inline level_index grow(state_vector &scratch_state) { level_index new_idx = m_levels.size(); size_t new_shard_cap = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; - m_levels.emplace_back(std::shared_ptr>(new InternalLevel(new_idx, new_shard_cap))); - - m_current_state.push_back({0, calc_level_record_capacity(new_idx), - 0, new_shard_cap}); scratch_state.push_back({0, calc_level_record_capacity(new_idx), 0, new_shard_cap}); return new_idx; @@ -451,7 +503,15 @@ private: } inline void flush_buffer_into_l0(BuffView buffer) { - assert(m_levels[0]); + size_t shard_capacity = (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor; + + if (m_levels.size() == 0) { + m_levels.emplace_back(std::shared_ptr>(new InternalLevel(0, shard_capacity))); + + m_current_state.push_back({0, calc_level_record_capacity(0), + 0, shard_capacity}); + } + if constexpr (L == LayoutPolicy::LEVELING) { // FIXME: Kludgey implementation due to interface constraints. auto old_level = m_levels[0].get(); -- cgit v1.2.3 From e4644fec1acc429a540f358b4e7e15af75aa71a9 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 12 Feb 2024 13:09:10 -0500 Subject: ExtensionStructure: first basic test of lookahead task stealing --- include/framework/structure/ExtensionStructure.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 0e2118e..dbfb543 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -275,7 +275,7 @@ public: * */ ReconstructionVector get_reconstruction_tasks(size_t buffer_reccnt, - state_vector scratch_state={}) { + state_vector scratch_state={}) { /* * If no scratch state vector is provided, use a copy of the * current one. The only time an empty vector could be used as @@ -304,6 +304,14 @@ public: */ if (i == 0) { reconstructions = local_recon; + /* + * Quick sanity test of idea: if the next reconstruction + * would be larger than this one, steal the largest + * task from it and run it now instead. + */ + } else if (local_recon.get_total_reccnt() > reconstructions.get_total_reccnt()) { + auto t = local_recon.remove_reconstruction(0); + reconstructions.add_reconstruction(t); } } @@ -312,6 +320,7 @@ public: if (L == LayoutPolicy::TEIRING || scratch_state[0].shardcnt == 0) { scratch_state[0].shardcnt += 1; } + } return std::move(reconstructions); -- cgit v1.2.3 From 9fe190f5d500e22b0894095e7c917f9c652e0a64 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 20 Mar 2024 17:30:14 -0400 Subject: Updates/progress towards succinct trie support --- include/framework/structure/MutableBuffer.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h index c0c87cc..1ed0200 100644 --- a/include/framework/structure/MutableBuffer.h +++ b/include/framework/structure/MutableBuffer.h @@ -50,7 +50,8 @@ public: , m_tail(0) , m_head({0, 0}) , m_old_head({high_watermark, 0}) - , m_data((Wrapped *) psudb::sf_aligned_alloc(CACHELINE_SIZE, m_cap * sizeof(Wrapped))) + //, m_data((Wrapped *) psudb::sf_aligned_alloc(CACHELINE_SIZE, m_cap * sizeof(Wrapped))) + , m_data(new Wrapped[m_cap]()) , m_tombstone_filter(new psudb::BloomFilter(BF_FPR, m_hwm, BF_HASH_FUNCS)) , m_tscnt(0) , m_old_tscnt(0) @@ -61,7 +62,7 @@ public: } ~MutableBuffer() { - free(m_data); + delete[] m_data; delete m_tombstone_filter; } -- cgit v1.2.3 From 9c4884c69486cfc78801ffe4e7cd1c581e0cdb87 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 22 Mar 2024 14:03:00 -0400 Subject: Disabled lookahead for paper revision --- include/framework/structure/ExtensionStructure.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index dbfb543..ffba1c1 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -287,7 +287,7 @@ public: } ReconstructionVector reconstructions; - size_t LOOKAHEAD = 2; + size_t LOOKAHEAD = 1; for (size_t i=0; i Date: Fri, 22 Mar 2024 14:04:58 -0400 Subject: MutableBuffer: added visibility flag to records and refactored timestamp --- include/framework/structure/MutableBuffer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/MutableBuffer.h b/include/framework/structure/MutableBuffer.h index 1ed0200..7db3980 100644 --- a/include/framework/structure/MutableBuffer.h +++ b/include/framework/structure/MutableBuffer.h @@ -77,16 +77,20 @@ public: wrec.header = 0; if (tombstone) wrec.set_tombstone(); + // FIXME: because of the mod, it isn't correct to use `pos` + // as the ordering timestamp in the header anymore. size_t pos = tail % m_cap; m_data[pos] = wrec; - m_data[pos].header |= (pos << 2); + m_data[pos].set_timestamp(pos); if (tombstone) { m_tscnt.fetch_add(1); if (m_tombstone_filter) m_tombstone_filter->insert(rec); } + m_data[pos].set_visible(); + return 1; } -- cgit v1.2.3 From b1b5ab106122e6917f6b34452be95e617506f05d Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Mon, 25 Mar 2024 12:54:17 -0400 Subject: Updates for build on OpenBSD Necessary updates to get the codebase building under OpenBSD 7.5 with clang. This is a minimal set of changes to get building to work, which includes disabling several things that aren't directly compatable. More work will be necessary to get full functionality. In particular, Triespline, PGM, and the reference M-tree do not currently build on OpenBSD with clang due to GNU dependencies or other gcc specific features. --- include/framework/structure/BufferView.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h index 9e0872b..44a2044 100644 --- a/include/framework/structure/BufferView.h +++ b/include/framework/structure/BufferView.h @@ -20,7 +20,7 @@ namespace de { -typedef std::_Bind ReleaseFunction; +typedef std::function ReleaseFunction; template class BufferView { -- cgit v1.2.3 From 7c2f43ff039795576bc0014c367b893fbbaceca4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 14:39:33 -0400 Subject: Benchmark updates --- include/framework/structure/ExtensionStructure.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index ffba1c1..6ad7aad 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -161,9 +161,11 @@ public: size_t get_memory_usage() { size_t cnt = 0; for (size_t i=0; iget_memory_usage()); if (m_levels[i]) cnt += m_levels[i]->get_memory_usage(); } + fprintf(stderr, "%ld\n", cnt); return cnt; } -- cgit v1.2.3 From e677947ed6f40d7847e9989d1226e1e8e0b3e03c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 19 Apr 2024 15:03:45 -0400 Subject: Removed debug print statements --- include/framework/structure/ExtensionStructure.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 6ad7aad..ffba1c1 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -161,11 +161,9 @@ public: size_t get_memory_usage() { size_t cnt = 0; for (size_t i=0; iget_memory_usage()); if (m_levels[i]) cnt += m_levels[i]->get_memory_usage(); } - fprintf(stderr, "%ld\n", cnt); return cnt; } -- cgit v1.2.3 From 96faedaeb92776fd9cc2ed8d8b0878ebc9300cbe Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 18:51:41 -0400 Subject: Added a Bentley-Saxe layout policy --- include/framework/structure/ExtensionStructure.h | 60 ++++++++++++++++++++++-- include/framework/structure/InternalLevel.h | 19 ++++++++ 2 files changed, 75 insertions(+), 4 deletions(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index ffba1c1..b83674b 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -343,6 +343,30 @@ public: base_level = grow(scratch_state); } + if constexpr (L == LayoutPolicy::BSM) { + if (base_level == 0) { + return std::move(reconstructions); + } + + ReconstructionTask task; + task.target = base_level; + + size_t base_reccnt = 0; + for (level_index i=base_level; i>source_level; i--) { + auto recon_reccnt = scratch_state[i-1].reccnt; + base_reccnt += recon_reccnt; + scratch_state[i-1].reccnt = 0; + scratch_state[i-1].shardcnt = 0; + task.add_source(i-1, recon_reccnt); + } + + reconstructions.add_reconstruction(task); + scratch_state[base_level].reccnt = base_reccnt; + scratch_state[base_level].shardcnt = 1; + + return std::move(reconstructions); + } + /* * Determine the full set of reconstructions necessary to open up * space in the source level. @@ -384,6 +408,33 @@ public: return std::move(reconstructions); } + inline void reconstruction(ReconstructionTask task) { + static_assert(L == LayoutPolicy::BSM); + std::vector*> levels(task.sources.size()); + for (size_t i=0; i::reconstruction(levels, task.target); + if (task.target >= m_levels.size()) { + m_current_state.push_back({new_level->get_record_count(), calc_level_record_capacity(task.target), + 1, 1}); + m_levels.emplace_back(new_level); + } else { + m_current_state[task.target] = {new_level->get_record_count(), calc_level_record_capacity(task.target), + 1, 1}; + m_levels[task.target] = new_level; + } + + /* remove all of the levels that have been flattened */ + for (size_t i=0; i>(new InternalLevel(task.sources[i], 1)); + m_current_state[task.sources[i]] = {0, calc_level_record_capacity(task.target), 0, 1}; + } + + return; + } + /* * Combine incoming_level with base_level and reconstruct the shard, * placing it in base_level. The two levels should be sequential--i.e. no @@ -395,7 +446,6 @@ public: if (base_level >= m_levels.size()) { m_levels.emplace_back(std::shared_ptr>(new InternalLevel(base_level, shard_capacity))); - m_current_state.push_back({0, calc_level_record_capacity(base_level), 0, shard_capacity}); } @@ -418,7 +468,7 @@ public: m_levels[incoming_level] = std::shared_ptr>(new InternalLevel(incoming_level, (L == LayoutPolicy::LEVELING) ? 1 : m_scale_factor)); /* - * Update the state vector to match the *real* stage following + * Update the state vector to match the *real* state following * the reconstruction */ m_current_state[base_level] = {m_levels[base_level]->get_record_count(), @@ -576,9 +626,11 @@ private: return false; } - if (L == LayoutPolicy::LEVELING) { + if constexpr (L == LayoutPolicy::LEVELING) { return state[idx].reccnt + incoming_rec_cnt <= state[idx].reccap; - } else { + } else if constexpr (L == LayoutPolicy::BSM) { + return state[idx].reccnt == 0; + } else { return state[idx].shardcnt < state[idx].shardcap; } diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h index db38946..b962dcc 100644 --- a/include/framework/structure/InternalLevel.h +++ b/include/framework/structure/InternalLevel.h @@ -64,6 +64,21 @@ public: return std::shared_ptr(res); } + static std::shared_ptr reconstruction(std::vector levels, size_t level_idx) { + std::vector shards; + for (auto level : levels) { + for (auto shard : level->m_shards) { + if (shard) shards.emplace_back(shard.get()); + } + } + + auto res = new InternalLevel(level_idx, 1); + res->m_shard_cnt = 1; + res->m_shards[0] = std::make_shared(shards); + + return std::shared_ptr(res); + } + /* * Create a new shard combining the records from all of * the shards in level, and append this new shard into @@ -185,6 +200,10 @@ public: } Shard* get_shard(size_t idx) { + if (idx >= m_shard_cnt) { + return nullptr; + } + return m_shards[idx].get(); } -- cgit v1.2.3 From a23bc3341923509be9b2f587ece8cd5a650f6386 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 8 May 2024 13:20:44 -0400 Subject: TSParmsweep: enabled forcing a full buffer scan --- include/framework/structure/BufferView.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h index 44a2044..11b8337 100644 --- a/include/framework/structure/BufferView.h +++ b/include/framework/structure/BufferView.h @@ -112,6 +112,10 @@ public: size_t get_record_count() { return m_tail - m_head; } + + size_t get_capacity() { + return m_cap; + } /* * NOTE: This function returns an upper bound on the number @@ -123,7 +127,7 @@ public: } Wrapped *get(size_t i) { - assert(i < get_record_count()); + //assert(i < get_record_count()); return m_data + to_idx(i); } -- cgit v1.2.3 From 64444e7ea14368d4e6ed40e89b62ff335b87d65b Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Thu, 9 May 2024 15:51:43 -0400 Subject: Fixed arithmetic bug --- include/framework/structure/BufferView.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/framework/structure') diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h index 11b8337..e95a799 100644 --- a/include/framework/structure/BufferView.h +++ b/include/framework/structure/BufferView.h @@ -164,7 +164,7 @@ private: bool m_active; size_t to_idx(size_t i) { - size_t idx = (m_start + i >= m_cap) ? i = (m_cap - m_start) + size_t idx = (m_start + i >= m_cap) ? i - (m_cap - m_start) : m_start + i; assert(idx < m_cap); return idx; -- cgit v1.2.3