From 5f6dd8bbc12f981c69d01d9e2c2057bfc97d429c Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 24 Jul 2023 10:56:30 -0400 Subject: ISAM IRS query bugfixes --- include/shard/MemISAM.h | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h index 897193c..4680cb9 100644 --- a/include/shard/MemISAM.h +++ b/include/shard/MemISAM.h @@ -384,8 +384,14 @@ public: res->lower_bound = isam->get_lower_bound(lower_key); res->upper_bound = isam->get_upper_bound(upper_key); - res->sample_size = 0; + if (res->lower_bound == isam->get_record_count()) { + res->total_weight = 0; + } else { + res->total_weight = res->upper_bound - res->lower_bound; + } + + res->sample_size = 0; return res; } @@ -413,25 +419,36 @@ public: static void process_query_states(void *query_parms, std::vector shard_states, void *buff_state) { auto p = (irs_query_parms *) query_parms; - auto bs = (IRSBufferState *) buff_state; + auto bs = (buff_state) ? (IRSBufferState *) buff_state : nullptr; std::vector shard_sample_sizes(shard_states.size()+1, 0); size_t buffer_sz = 0; std::vector weights; - if (Rejection) { - weights.push_back(bs->cutoff); + if constexpr (Rejection) { + weights.push_back((bs) ? bs->cutoff : 0); } else { - weights.push_back(bs->records.size()); + weights.push_back((bs) ? bs->records.size() : 0); } size_t total_weight = 0; for (auto &s : shard_states) { auto state = (IRSState *) s; - total_weight += state->upper_bound - state->lower_bound; + total_weight += state->total_weight; weights.push_back(state->total_weight); } + // if no valid records fall within the query range, just + // set all of the sample sizes to 0 and bail out. + if (total_weight == 0) { + for (size_t i=0; i *) shard_states[i]; + state->sample_size = 0; + } + + return; + } + std::vector normalized_weights; for (auto w : weights) { normalized_weights.push_back((double) w / (double) total_weight); @@ -447,7 +464,9 @@ public: } } - bs->sample_size = buffer_sz; + if (bs) { + bs->sample_size = buffer_sz; + } for (size_t i=0; i *) shard_states[i]; state->sample_size = shard_sample_sizes[i+1]; @@ -464,7 +483,7 @@ public: std::vector> result_set; - if (sample_sz == 0) { + if (sample_sz == 0 || state->lower_bound == isam->get_record_count()) { return result_set; } @@ -472,7 +491,7 @@ public: size_t range_length = state->upper_bound - state->lower_bound; do { attempts++; - size_t idx = gsl_rng_uniform_int(rng, range_length); + size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0; result_set.emplace_back(*isam->get_record_at(state->lower_bound + idx)); } while (attempts < sample_sz); -- cgit v1.2.3