From fc8b4c14bd2814447b5d3180c4ecf3742196c6bf Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 23 Jul 2023 14:17:38 -0400 Subject: Benchmarking updates --- include/shard/MemISAM.h | 126 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 2 deletions(-) (limited to 'include/shard/MemISAM.h') diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h index aa31962..897193c 100644 --- a/include/shard/MemISAM.h +++ b/include/shard/MemISAM.h @@ -40,6 +40,7 @@ struct IRSState { size_t lower_bound; size_t upper_bound; size_t sample_size; + size_t total_weight; }; template @@ -49,12 +50,32 @@ struct IRSBufferState { size_t sample_size; }; +template +struct ISAMRangeQueryParms { + decltype(R::key) lower_bound; + decltype(R::key) upper_bound; +}; + +template +class ISAMRangeQuery; + +template +struct ISAMRangeQueryState { + size_t start_idx; + size_t stop_idx; +}; + +template +struct RangeQueryBufferState { + size_t cutoff; +}; template class MemISAM { private: friend class IRSQuery; friend class IRSQuery; + friend class ISAMRangeQuery; typedef decltype(R::key) K; typedef decltype(R::value) V; @@ -233,7 +254,7 @@ public: } size_t get_memory_usage() { - return m_reccnt * sizeof(R) + m_internal_node_cnt * inmem_isam_node_size; + return m_internal_node_cnt * inmem_isam_node_size; } private: @@ -404,7 +425,7 @@ public: weights.push_back(bs->records.size()); } - decltype(R::weight) total_weight = 0; + size_t total_weight = 0; for (auto &s : shard_states) { auto state = (IRSState *) s; total_weight += state->upper_bound - state->lower_bound; @@ -509,4 +530,105 @@ public: } }; + +template +class ISAMRangeQuery { +public: + static void *get_query_state(MemISAM *ts, void *parms) { + auto res = new ISAMRangeQueryState(); + auto p = (ISAMRangeQueryParms *) parms; + + res->start_idx = ts->get_lower_bound(p->lower_bound); + res->stop_idx = ts->get_record_count(); + + return res; + } + + static void* get_buffer_query_state(MutableBuffer *buffer, void *parms) { + auto res = new RangeQueryBufferState(); + res->cutoff = buffer->get_record_count(); + + return res; + } + + static void process_query_states(void *query_parms, std::vector shard_states, void *buff_state) { + return; + } + + static std::vector> query(MemISAM *ts, void *q_state, void *parms) { + std::vector> records; + auto p = (ISAMRangeQueryParms *) parms; + auto s = (ISAMRangeQueryState *) q_state; + + // if the returned index is one past the end of the + // records for the PGM, then there are not records + // in the index falling into the specified range. + if (s->start_idx == ts->get_record_count()) { + return records; + } + + auto ptr = ts->get_record_at(s->start_idx); + + // roll the pointer forward to the first record that is + // greater than or equal to the lower bound. + while(ptr->rec.key < p->lower_bound) { + ptr++; + } + + while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) { + records.emplace_back(*ptr); + ptr++; + } + + return records; + } + + static std::vector> buffer_query(MutableBuffer *buffer, void *state, void *parms) { + auto p = (ISAMRangeQueryParms *) parms; + auto s = (RangeQueryBufferState *) state; + + std::vector> records; + for (size_t i=0; icutoff; i++) { + auto rec = buffer->get_data() + i; + if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) { + records.emplace_back(*rec); + } + } + + return records; + } + + static std::vector merge(std::vector> &results, void *parms) { + size_t total = 0; + for (size_t i=0; i(); + } + + std::vector output; + output.reserve(total); + + for (size_t i=0; i *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (RangeQueryBufferState *) state; + delete s; + } +}; + + + } -- cgit v1.2.3