summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-11-07 13:44:21 -0500
committerDouglas Rumbaugh <dbr4@psu.edu>2023-11-07 13:45:00 -0500
commitdca44ff5be67c279ffec11224bb2be74a042be18 (patch)
tree168c7ccf50a4b69926b1d3b0086ef0c8b1d7995c
parent9fd6264122f09752b4278c9ff881b4cfe906bbc8 (diff)
parentcc415c7c100a17c4e944915aeab01be99b14adb9 (diff)
downloaddynamic-extension-dca44ff5be67c279ffec11224bb2be74a042be18.tar.gz
Merge branch 'query-refactor'
-rw-r--r--CMakeLists.txt16
-rw-r--r--include/framework/QueryRequirements.h17
-rw-r--r--include/framework/interface/Shard.h6
-rw-r--r--include/query/irs.h216
-rw-r--r--include/query/rangequery.h167
-rw-r--r--include/query/wirs.h240
-rw-r--r--include/query/wss.h204
-rw-r--r--include/shard/Alias.h (renamed from include/shard/WSS.h)225
-rw-r--r--include/shard/AugBTree.h (renamed from include/shard/WIRS.h)351
-rw-r--r--include/shard/ISAMTree.h339
-rw-r--r--include/shard/MemISAM.h702
-rw-r--r--include/shard/PGM.h267
-rw-r--r--include/shard/TrieSpline.h184
-rw-r--r--tests/alias_tests.cpp (renamed from tests/wss_tests.cpp)97
-rw-r--r--tests/augbtree_tests.cpp (renamed from tests/wirs_tests.cpp)67
-rw-r--r--tests/de_level_tag.cpp5
-rw-r--r--tests/de_level_tomb.cpp5
-rw-r--r--tests/de_tier_tag.cpp5
-rw-r--r--tests/de_tier_tomb.cpp5
-rw-r--r--tests/dynamic_extension_tests.inc211
-rw-r--r--tests/internal_level_tests.cpp13
-rw-r--r--tests/memisam_tests.cpp69
-rw-r--r--tests/pgm_tests.cpp33
-rw-r--r--tests/triespline_tests.cpp17
24 files changed, 1526 insertions, 1935 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2c16006..d0e14c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,9 +29,9 @@ if (tests)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/tests")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/tests/data")
- add_executable(wirs_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/wirs_tests.cpp)
- target_link_libraries(wirs_tests PUBLIC gsl check subunit pthread)
- target_include_directories(wirs_tests PRIVATE include external/psudb-common/cpp/include)
+ add_executable(augbtree_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/augbtree_tests.cpp)
+ target_link_libraries(augbtree_tests PUBLIC gsl check subunit pthread)
+ target_include_directories(augbtree_tests PRIVATE include external/psudb-common/cpp/include)
add_executable(internal_level_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/internal_level_tests.cpp)
target_link_libraries(internal_level_tests PUBLIC gsl check subunit pthread)
@@ -45,10 +45,6 @@ if (tests)
target_link_libraries(vptree_tests PUBLIC gsl check subunit pthread)
target_include_directories(vptree_tests PRIVATE include external/vptree external/psudb-common/cpp/include)
- #add_executable(dynamic_extension_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/dynamic_extension_tests.cpp)
- #target_link_libraries(dynamic_extension_tests PUBLIC gsl check subunit pthread)
- #target_include_directories(dynamic_extension_tests PRIVATE include)
-
add_executable(de_tier_tag ${CMAKE_CURRENT_SOURCE_DIR}/tests/de_tier_tag.cpp)
target_link_libraries(de_tier_tag PUBLIC gsl check subunit pthread)
target_include_directories(de_tier_tag PRIVATE include external/psudb-common/cpp/include external)
@@ -69,9 +65,9 @@ if (tests)
target_link_libraries(memisam_tests PUBLIC gsl check subunit pthread)
target_include_directories(memisam_tests PRIVATE include external/psudb-common/cpp/include)
- add_executable(wss_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/wss_tests.cpp)
- target_link_libraries(wss_tests PUBLIC gsl check subunit pthread)
- target_include_directories(wss_tests PRIVATE include external/psudb-common/cpp/include)
+ add_executable(alias_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/alias_tests.cpp)
+ target_link_libraries(alias_tests PUBLIC gsl check subunit pthread)
+ target_include_directories(alias_tests PRIVATE include external/psudb-common/cpp/include)
add_executable(triespline_tests ${CMAKE_CURRENT_SOURCE_DIR}/tests/triespline_tests.cpp)
target_link_libraries(triespline_tests PUBLIC gsl check subunit pthread)
diff --git a/include/framework/QueryRequirements.h b/include/framework/QueryRequirements.h
new file mode 100644
index 0000000..ff4eaff
--- /dev/null
+++ b/include/framework/QueryRequirements.h
@@ -0,0 +1,17 @@
+/*
+ * include/framework/QueryRequirements.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ * A header file containing the necessary includes for Shard
+ * development.
+ *
+ */
+#pragma once
+
+#include "framework/structure/MutableBuffer.h"
+#include "framework/interface/Record.h"
+#include "framework/interface/Shard.h"
+#include "framework/interface/Query.h"
diff --git a/include/framework/interface/Shard.h b/include/framework/interface/Shard.h
index d3a6cf8..92cdca0 100644
--- a/include/framework/interface/Shard.h
+++ b/include/framework/interface/Shard.h
@@ -33,4 +33,10 @@ concept ShardInterface = requires(S s, S **spp, void *p, bool b, size_t i) {
{s.get_aux_memory_usage()} -> std::convertible_to<size_t>;
};
+template <typename S, typename R>
+concept SortedShardInterface = ShardInterface<S> && requires(S s, R r, R *rp) {
+ {s.lower_bound(r)} -> std::convertible_to<size_t>;
+ {s.upper_bound(r)} -> std::convertible_to<size_t>;
+};
+
}
diff --git a/include/query/irs.h b/include/query/irs.h
new file mode 100644
index 0000000..4cb69b0
--- /dev/null
+++ b/include/query/irs.h
@@ -0,0 +1,216 @@
+/*
+ * include/query/irs.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "framework/QueryRequirements.h"
+
+namespace de { namespace irs {
+
+template <RecordInterface R>
+struct Parms {
+ decltype(R::key) lower_bound;
+ decltype(R::key) upper_bound;
+ size_t sample_size;
+ gsl_rng *rng;
+};
+
+
+template <RecordInterface R>
+struct State {
+ size_t lower_bound;
+ size_t upper_bound;
+ size_t sample_size;
+ size_t total_weight;
+};
+
+template <RecordInterface R>
+struct BufferState {
+ size_t cutoff;
+ std::vector<Wrapped<R>> records;
+ size_t sample_size;
+};
+
+template <ShardInterface S, RecordInterface R, bool Rejection=true>
+class Query {
+public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
+ static void *get_query_state(S *shard, void *parms) {
+ auto res = new State<R>();
+ decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound;
+ decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound;
+
+ res->lower_bound = shard->get_lower_bound(lower_key);
+ res->upper_bound = shard->get_upper_bound(upper_key);
+
+ if (res->lower_bound == shard->get_record_count()) {
+ res->total_weight = 0;
+ } else {
+ res->total_weight = res->upper_bound - res->lower_bound;
+ }
+
+ res->sample_size = 0;
+ return res;
+ }
+
+ static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
+ auto res = new BufferState<R>();
+
+ res->cutoff = buffer->get_record_count();
+ res->sample_size = 0;
+
+ if constexpr (Rejection) {
+ return res;
+ }
+
+ auto lower_key = ((Parms<R> *) parms)->lower_bound;
+ auto upper_key = ((Parms<R> *) parms)->upper_bound;
+
+ for (size_t i=0; i<res->cutoff; i++) {
+ if (((buffer->get_data() + i)->rec.key >= lower_key) && ((buffer->get_data() + i)->rec.key <= upper_key)) {
+ res->records.emplace_back(*(buffer->get_data() + i));
+ }
+ }
+
+ return res;
+ }
+
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
+ auto p = (Parms<R> *) query_parms;
+ auto bs = (buff_state) ? (BufferState<R> *) buff_state : nullptr;
+
+ std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
+ size_t buffer_sz = 0;
+
+ std::vector<size_t> weights;
+ if constexpr (Rejection) {
+ weights.push_back((bs) ? bs->cutoff : 0);
+ } else {
+ weights.push_back((bs) ? bs->records.size() : 0);
+ }
+
+ size_t total_weight = 0;
+ for (auto &s : shard_states) {
+ auto state = (State<R> *) s;
+ total_weight += state->total_weight;
+ weights.push_back(state->total_weight);
+ }
+
+ // if no valid records fall within the query range, just
+ // set all of the sample sizes to 0 and bail out.
+ if (total_weight == 0) {
+ for (size_t i=0; i<shard_states.size(); i++) {
+ auto state = (State<R> *) shard_states[i];
+ state->sample_size = 0;
+ }
+
+ return;
+ }
+
+ std::vector<double> normalized_weights;
+ for (auto w : weights) {
+ normalized_weights.push_back((double) w / (double) total_weight);
+ }
+
+ auto shard_alias = psudb::Alias(normalized_weights);
+ for (size_t i=0; i<p->sample_size; i++) {
+ auto idx = shard_alias.get(p->rng);
+ if (idx == 0) {
+ buffer_sz++;
+ } else {
+ shard_sample_sizes[idx - 1]++;
+ }
+ }
+
+ if (bs) {
+ bs->sample_size = buffer_sz;
+ }
+ for (size_t i=0; i<shard_states.size(); i++) {
+ auto state = (State<R> *) shard_states[i];
+ state->sample_size = shard_sample_sizes[i+1];
+ }
+ }
+
+ static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
+ auto lower_key = ((Parms<R> *) parms)->lower_bound;
+ auto upper_key = ((Parms<R> *) parms)->upper_bound;
+ auto rng = ((Parms<R> *) parms)->rng;
+
+ auto state = (State<R> *) q_state;
+ auto sample_sz = state->sample_size;
+
+ std::vector<Wrapped<R>> result_set;
+
+ if (sample_sz == 0 || state->lower_bound == shard->get_record_count()) {
+ return result_set;
+ }
+
+ size_t attempts = 0;
+ size_t range_length = state->upper_bound - state->lower_bound;
+ do {
+ attempts++;
+ size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0;
+ result_set.emplace_back(*shard->get_record_at(state->lower_bound + idx));
+ } while (attempts < sample_sz);
+
+ return result_set;
+ }
+
+ static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
+ auto st = (BufferState<R> *) state;
+ auto p = (Parms<R> *) parms;
+
+ std::vector<Wrapped<R>> result;
+ result.reserve(st->sample_size);
+
+ if constexpr (Rejection) {
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
+ auto rec = buffer->get_data() + idx;
+
+ if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
+ result.emplace_back(*rec);
+ }
+ }
+
+ return result;
+ }
+
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = gsl_rng_uniform_int(p->rng, st->records.size());
+ result.emplace_back(st->records[idx]);
+ }
+
+ return result;
+ }
+
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<R> output;
+
+ for (size_t i=0; i<results.size(); i++) {
+ for (size_t j=0; j<results[i].size(); j++) {
+ output.emplace_back(results[i][j].rec);
+ }
+ }
+
+ return output;
+ }
+
+ static void delete_query_state(void *state) {
+ auto s = (State<R> *) state;
+ delete s;
+ }
+
+ static void delete_buffer_query_state(void *state) {
+ auto s = (BufferState<R> *) state;
+ delete s;
+ }
+};
+}}
diff --git a/include/query/rangequery.h b/include/query/rangequery.h
new file mode 100644
index 0000000..b9ac9db
--- /dev/null
+++ b/include/query/rangequery.h
@@ -0,0 +1,167 @@
+/*
+ * include/query/rangequery.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "framework/interface/Record.h"
+#include "framework/interface/Shard.h"
+#include "framework/structure/MutableBuffer.h"
+#include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
+
+namespace de { namespace rq {
+
+template <RecordInterface R>
+struct Parms {
+ decltype(R::key) lower_bound;
+ decltype(R::key) upper_bound;
+};
+
+template <RecordInterface R>
+struct State {
+ size_t start_idx;
+ size_t stop_idx;
+};
+
+template <RecordInterface R>
+struct BufferState {
+ size_t cutoff;
+};
+
+template <ShardInterface S, RecordInterface R>
+class Query {
+public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=true;
+
+ static void *get_query_state(S *shard, void *parms) {
+ auto res = new State<R>();
+ auto p = (Parms<R> *) parms;
+
+ res->start_idx = shard->get_lower_bound(p->lower_bound);
+ res->stop_idx = shard->get_record_count();
+
+ return res;
+ }
+
+ static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
+ auto res = new BufferState<R>();
+ res->cutoff = buffer->get_record_count();
+
+ return res;
+ }
+
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
+ return;
+ }
+
+ static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
+ std::vector<Wrapped<R>> records;
+ auto p = (Parms<R> *) parms;
+ auto s = (State<R> *) q_state;
+
+ // if the returned index is one past the end of the
+ // records for the PGM, then there are not records
+ // in the index falling into the specified range.
+ if (s->start_idx == shard->get_record_count()) {
+ return records;
+ }
+
+ auto ptr = shard->get_record_at(s->start_idx);
+
+ // roll the pointer forward to the first record that is
+ // greater than or equal to the lower bound.
+ while(ptr->rec.key < p->lower_bound) {
+ ptr++;
+ }
+
+ while (ptr->rec.key <= p->upper_bound && ptr < shard->get_data() + s->stop_idx) {
+ records.emplace_back(*ptr);
+ ptr++;
+ }
+
+ return records;
+ }
+
+ static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
+ auto p = (Parms<R> *) parms;
+ auto s = (BufferState<R> *) state;
+
+ std::vector<Wrapped<R>> records;
+ for (size_t i=0; i<s->cutoff; i++) {
+ auto rec = buffer->get_data() + i;
+ if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
+ records.emplace_back(*rec);
+ }
+ }
+
+ return records;
+ }
+
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<Cursor<Wrapped<R>>> cursors;
+ cursors.reserve(results.size());
+
+ psudb::PriorityQueue<Wrapped<R>> pq(results.size());
+ size_t total = 0;
+ size_t tmp_n = results.size();
+
+
+ for (size_t i = 0; i < tmp_n; ++i)
+ if (results[i].size() > 0){
+ auto base = results[i].data();
+ cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
+ assert(i == cursors.size() - 1);
+ total += results[i].size();
+ pq.push(cursors[i].ptr, tmp_n - i - 1);
+ } else {
+ cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+ }
+
+ if (total == 0) {
+ return std::vector<R>();
+ }
+
+ std::vector<R> output;
+ output.reserve(total);
+
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next = pq.size() > 1 ? pq.peek(1) : psudb::queue_record<Wrapped<R>>{nullptr, 0};
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop(); pq.pop();
+ auto& cursor1 = cursors[tmp_n - now.version - 1];
+ auto& cursor2 = cursors[tmp_n - next.version - 1];
+ if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
+ if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
+ } else {
+ auto& cursor = cursors[tmp_n - now.version - 1];
+ if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
+ pq.pop();
+
+ if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
+ }
+ }
+
+ return output;
+ }
+
+ static void delete_query_state(void *state) {
+ auto s = (State<R> *) state;
+ delete s;
+ }
+
+ static void delete_buffer_query_state(void *state) {
+ auto s = (BufferState<R> *) state;
+ delete s;
+ }
+};
+
+}}
diff --git a/include/query/wirs.h b/include/query/wirs.h
new file mode 100644
index 0000000..1113b1d
--- /dev/null
+++ b/include/query/wirs.h
@@ -0,0 +1,240 @@
+/*
+ * include/query/wirs.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "framework/interface/Record.h"
+#include "framework/interface/Shard.h"
+#include "framework/structure/MutableBuffer.h"
+#include "psu-ds/Alias.h"
+
+namespace de { namespace wirs {
+
+template <WeightedRecordInterface R>
+struct Parms {
+ decltype(R::key) lower_bound;
+ decltype(R::key) upper_bound;
+ size_t sample_size;
+ gsl_rng *rng;
+};
+
+template <WeightedRecordInterface R>
+struct State {
+ decltype(R::weight) total_weight;
+ std::vector<void*> nodes;
+ psudb::Alias* top_level_alias;
+ size_t sample_size;
+
+ State() {
+ total_weight = 0;
+ top_level_alias = nullptr;
+ }
+
+ ~State() {
+ if (top_level_alias) delete top_level_alias;
+ }
+};
+
+template <RecordInterface R>
+struct BufferState {
+ size_t cutoff;
+ psudb::Alias* alias;
+ std::vector<Wrapped<R>> records;
+ decltype(R::weight) max_weight;
+ size_t sample_size;
+ decltype(R::weight) total_weight;
+
+ ~BufferState() {
+ delete alias;
+ }
+};
+
+template <ShardInterface S, RecordInterface R, bool Rejection=true>
+class Query {
+public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
+ static void *get_query_state(S *shard, void *parms) {
+ auto res = new State<R>();
+ decltype(R::key) lower_key = ((Parms<R> *) parms)->lower_bound;
+ decltype(R::key) upper_key = ((Parms<R> *) parms)->upper_bound;
+
+ std::vector<decltype(R::weight)> weights;
+ res->total_weight = shard->find_covering_nodes(lower_key, upper_key, res->nodes, weights);
+
+ std::vector<double> normalized_weights;
+ for (auto weight : weights) {
+ normalized_weights.emplace_back(weight / res->total_weight);
+ }
+
+ res->top_level_alias = new psudb::Alias(normalized_weights);
+ res->sample_size = 0;
+
+ return res;
+ }
+
+ static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
+ BufferState<R> *state = new BufferState<R>();
+ auto parameters = (Parms<R>*) parms;
+
+ if constexpr (Rejection) {
+ state->cutoff = buffer->get_record_count() - 1;
+ state->max_weight = buffer->get_max_weight();
+ state->total_weight = buffer->get_total_weight();
+ state->sample_size = 0;
+ return state;
+ }
+
+ std::vector<decltype(R::weight)> weights;
+
+ state->cutoff = buffer->get_record_count() - 1;
+ decltype(R::weight) total_weight = 0;
+
+ for (size_t i = 0; i <= state->cutoff; i++) {
+ auto rec = buffer->get_data() + i;
+
+ if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) {
+ weights.push_back(rec->rec.weight);
+ state->records.push_back(*rec);
+ total_weight += rec->rec.weight;
+ }
+ }
+
+ std::vector<double> normalized_weights;
+ for (size_t i = 0; i < weights.size(); i++) {
+ normalized_weights.push_back(weights[i] / total_weight);
+ }
+
+ state->total_weight = total_weight;
+ state->alias = new psudb::Alias(normalized_weights);
+ state->sample_size = 0;
+
+ return state;
+ }
+
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
+ auto p = (Parms<R> *) query_parms;
+
+ std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0);
+ size_t buffer_sz = 0;
+
+ std::vector<decltype(R::weight)> weights;
+
+ decltype(R::weight) total_weight = 0;
+ for (auto &s : buffer_states) {
+ auto bs = (BufferState<R> *) s;
+ total_weight += bs->total_weight;
+ weights.push_back(bs->total_weight);
+ }
+
+ for (auto &s : shard_states) {
+ auto state = (State<R> *) s;
+ total_weight += state->total_weight;
+ weights.push_back(state->total_weight);
+ }
+
+ std::vector<double> normalized_weights;
+ for (auto w : weights) {
+ normalized_weights.push_back((double) w / (double) total_weight);
+ }
+
+ auto shard_alias = psudb::Alias(normalized_weights);
+ for (size_t i=0; i<p->sample_size; i++) {
+ auto idx = shard_alias.get(p->rng);
+
+ if (idx < buffer_states.size()) {
+ auto state = (BufferState<R> *) buffer_states[idx];
+ state->sample_size++;
+ } else {
+ auto state = (State<R> *) shard_states[idx - buffer_states.size()];
+ state->sample_size++;
+ }
+ }
+ }
+
+ static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
+ auto lower_key = ((Parms<R> *) parms)->lower_bound;
+ auto upper_key = ((Parms<R> *) parms)->upper_bound;
+ auto rng = ((Parms<R> *) parms)->rng;
+
+ auto state = (State<R> *) q_state;
+ auto sample_size = state->sample_size;
+
+ std::vector<Wrapped<R>> result_set;
+
+ if (sample_size == 0) {
+ return result_set;
+ }
+ size_t cnt = 0;
+ size_t attempts = 0;
+
+ for (size_t i=0; i<sample_size; i++) {
+ auto rec = shard->get_weighted_sample(lower_key, upper_key,
+ state->nodes[state->top_level_alias->get(rng)],
+ rng);
+ if (rec) {
+ result_set.emplace_back(*rec);
+ }
+ }
+
+ return result_set;
+ }
+
+ static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
+ auto st = (BufferState<R> *) state;
+ auto p = (Parms<R> *) parms;
+
+ std::vector<Wrapped<R>> result;
+ result.reserve(st->sample_size);
+
+ if constexpr (Rejection) {
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
+ auto rec = buffer->get_data() + idx;
+
+ auto test = gsl_rng_uniform(p->rng) * st->max_weight;
+
+ if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
+ result.emplace_back(*rec);
+ }
+ }
+ return result;
+ }
+
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = st->alias->get(p->rng);
+ result.emplace_back(st->records[idx]);
+ }
+
+ return result;
+ }
+
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<R> output;
+
+ for (size_t i=0; i<results.size(); i++) {
+ for (size_t j=0; j<results[i].size(); j++) {
+ output.emplace_back(results[i][j].rec);
+ }
+ }
+
+ return output;
+ }
+
+ static void delete_query_state(void *state) {
+ auto s = (State<R> *) state;
+ delete s;
+ }
+
+ static void delete_buffer_query_state(void *state) {
+ auto s = (BufferState<R> *) state;
+ delete s;
+ }
+};
+}}
diff --git a/include/query/wss.h b/include/query/wss.h
new file mode 100644
index 0000000..794485c
--- /dev/null
+++ b/include/query/wss.h
@@ -0,0 +1,204 @@
+/*
+ * include/query/rangequery.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "framework/interface/Record.h"
+#include "framework/interface/Shard.h"
+#include "framework/structure/MutableBuffer.h"
+
+namespace de { namespace wss {
+
+template <WeightedRecordInterface R>
+struct Parms {
+ size_t sample_size;
+ gsl_rng *rng;
+};
+
+template <WeightedRecordInterface R>
+struct State {
+ decltype(R::weight) total_weight;
+ size_t sample_size;
+
+ State() {
+ total_weight = 0;
+ }
+};
+
+template <RecordInterface R>
+struct BufferState {
+ size_t cutoff;
+ size_t sample_size;
+ psudb::Alias *alias;
+ decltype(R::weight) max_weight;
+ decltype(R::weight) total_weight;
+
+ ~BufferState() {
+ delete alias;
+ }
+};
+
+template <ShardInterface S, RecordInterface R, bool Rejection=true>
+class Query {
+public:
+ constexpr static bool EARLY_ABORT=false;
+ constexpr static bool SKIP_DELETE_FILTER=false;
+
+ static void *get_query_state(S *shard, void *parms) {
+ auto res = new State<R>();
+ res->total_weight = shard->get_total_weight();
+ res->sample_size = 0;
+
+ return res;
+ }
+
+ static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
+ BufferState<R> *state = new BufferState<R>();
+ auto parameters = (Parms<R>*) parms;
+ if constexpr (Rejection) {
+ state->cutoff = buffer->get_record_count() - 1;
+ state->max_weight = buffer->get_max_weight();
+ state->total_weight = buffer->get_total_weight();
+ return state;
+ }
+
+ std::vector<double> weights;
+
+ state->cutoff = buffer->get_record_count() - 1;
+ double total_weight = 0.0;
+
+ for (size_t i = 0; i <= state->cutoff; i++) {
+ auto rec = buffer->get_data() + i;
+ weights.push_back(rec->rec.weight);
+ total_weight += rec->rec.weight;
+ }
+
+ for (size_t i = 0; i < weights.size(); i++) {
+ weights[i] = weights[i] / total_weight;
+ }
+
+ state->alias = new psudb::Alias(weights);
+ state->total_weight = total_weight;
+
+ return state;
+ }
+
+ static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
+ auto p = (Parms<R> *) query_parms;
+
+ std::vector<size_t> shard_sample_sizes(shard_states.size()+buffer_states.size(), 0);
+ size_t buffer_sz = 0;
+
+ std::vector<decltype(R::weight)> weights;
+
+ decltype(R::weight) total_weight = 0;
+ for (auto &s : buffer_states) {
+ auto bs = (BufferState<R> *) s;
+ total_weight += bs->total_weight;
+ weights.push_back(bs->total_weight);
+ }
+
+ for (auto &s : shard_states) {
+ auto state = (State<R> *) s;
+ total_weight += state->total_weight;
+ weights.push_back(state->total_weight);
+ }
+
+ std::vector<double> normalized_weights;
+ for (auto w : weights) {
+ normalized_weights.push_back((double) w / (double) total_weight);
+ }
+
+ auto shard_alias = psudb::Alias(normalized_weights);
+ for (size_t i=0; i<p->sample_size; i++) {
+ auto idx = shard_alias.get(p->rng);
+
+ if (idx < buffer_states.size()) {
+ auto state = (BufferState<R> *) buffer_states[idx];
+ state->sample_size++;
+ } else {
+ auto state = (State<R> *) shard_states[idx - buffer_states.size()];
+ state->sample_size++;
+ }
+ }
+ }
+
+ static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) {
+ auto rng = ((Parms<R> *) parms)->rng;
+
+ auto state = (State<R> *) q_state;
+ auto sample_size = state->sample_size;
+
+ std::vector<Wrapped<R>> result_set;
+
+ if (sample_size == 0) {
+ return result_set;
+ }
+ size_t attempts = 0;
+ do {
+ attempts++;
+ size_t idx = shard->get_weighted_sample(rng);
+ result_set.emplace_back(*shard->get_record_at(idx));
+ } while (attempts < sample_size);
+
+ return result_set;
+ }
+
+ static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
+ auto st = (BufferState<R> *) state;
+ auto p = (Parms<R> *) parms;
+
+ std::vector<Wrapped<R>> result;
+ result.reserve(st->sample_size);
+
+ if constexpr (Rejection) {
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
+ auto rec = buffer->get_data() + idx;
+
+ auto test = gsl_rng_uniform(p->rng) * st->max_weight;
+
+ if (test <= rec->rec.weight) {
+ result.emplace_back(*rec);
+ }
+ }
+ return result;
+ }
+
+ for (size_t i=0; i<st->sample_size; i++) {
+ auto idx = st->alias->get(p->rng);
+ result.emplace_back(*(buffer->get_data() + idx));
+ }
+
+ return result;
+ }
+
+ static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
+ std::vector<R> output;
+
+ for (size_t i=0; i<results.size(); i++) {
+ for (size_t j=0; j<results[i].size(); j++) {
+ output.emplace_back(results[i][j].rec);
+ }
+ }
+
+ return output;
+ }
+
+ static void delete_query_state(void *state) {
+ auto s = (State<R> *) state;
+ delete s;
+ }
+
+ static void delete_buffer_query_state(void *state) {
+ auto s = (BufferState<R> *) state;
+ delete s;
+ }
+};
+
+}}
diff --git a/include/shard/WSS.h b/include/shard/Alias.h
index 4e3a326..a4a7d02 100644
--- a/include/shard/WSS.h
+++ b/include/shard/Alias.h
@@ -1,5 +1,5 @@
/*
- * include/shard/WSS.h
+ * include/shard/Alias.h
*
* Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
* Dong Xie <dongx@psu.edu>
@@ -9,7 +9,6 @@
*/
#pragma once
-
#include <vector>
#include <cassert>
#include <queue>
@@ -28,59 +27,20 @@ using psudb::CACHELINE_SIZE;
using psudb::BloomFilter;
using psudb::PriorityQueue;
using psudb::queue_record;
-using psudb::Alias;
namespace de {
thread_local size_t wss_cancelations = 0;
template <WeightedRecordInterface R>
-struct wss_query_parms {
- size_t sample_size;
- gsl_rng *rng;
-};
-
-template <WeightedRecordInterface R, bool Rejection>
-class WSSQuery;
-
-template <WeightedRecordInterface R>
-struct WSSState {
- decltype(R::weight) total_weight;
- size_t sample_size;
-
- WSSState() {
- total_weight = 0;
- }
-};
-
-template <WeightedRecordInterface R>
-struct WSSBufferState {
- size_t cutoff;
- size_t sample_size;
- Alias* alias;
- decltype(R::weight) max_weight;
- decltype(R::weight) total_weight;
-
- ~WSSBufferState() {
- delete alias;
- }
-
-};
-
-template <WeightedRecordInterface R>
-class WSS {
+class Alias {
private:
typedef decltype(R::key) K;
typedef decltype(R::value) V;
typedef decltype(R::weight) W;
public:
-
- // FIXME: there has to be a better way to do this
- friend class WSSQuery<R, true>;
- friend class WSSQuery<R, false>;
-
- WSS(MutableBuffer<R>* buffer)
+ Alias(MutableBuffer<R>* buffer)
: m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
@@ -132,7 +92,7 @@ public:
}
}
- WSS(WSS** shards, size_t len)
+ Alias(Alias** shards, size_t len)
: m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_alias(nullptr), m_bf(nullptr) {
std::vector<Cursor<Wrapped<R>>> cursors;
cursors.reserve(len);
@@ -195,7 +155,7 @@ public:
}
}
- ~WSS() {
+ ~Alias() {
if (m_data) free(m_data);
if (m_alias) delete m_alias;
if (m_bf) delete m_bf;
@@ -247,7 +207,13 @@ public:
return 0;
}
-private:
+ W get_total_weight() {
+ return m_total_weight;
+ }
+
+ size_t get_weighted_sample(gsl_rng *rng) const {
+ return m_alias->get(rng);
+ }
size_t get_lower_bound(const K& key) const {
size_t min = 0;
@@ -267,6 +233,8 @@ private:
return min;
}
+private:
+
void build_alias_structure(std::vector<W> &weights) {
// normalize the weights vector
@@ -277,11 +245,11 @@ private:
}
// build the alias structure
- m_alias = new Alias(norm_weights);
+ m_alias = new psudb::Alias(norm_weights);
}
Wrapped<R>* m_data;
- Alias *m_alias;
+ psudb::Alias *m_alias;
W m_total_weight;
size_t m_reccnt;
size_t m_tombstone_cnt;
@@ -289,165 +257,4 @@ private:
size_t m_alloc_size;
BloomFilter<R> *m_bf;
};
-
-
-template <WeightedRecordInterface R, bool Rejection=true>
-class WSSQuery {
-public:
-
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=false;
-
- static void *get_query_state(WSS<R> *wss, void *parms) {
- auto res = new WSSState<R>();
- res->total_weight = wss->m_total_weight;
- res->sample_size = 0;
-
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- WSSBufferState<R> *state = new WSSBufferState<R>();
- auto parameters = (wss_query_parms<R>*) parms;
- if constexpr (Rejection) {
- state->cutoff = buffer->get_record_count() - 1;
- state->max_weight = buffer->get_max_weight();
- state->total_weight = buffer->get_total_weight();
- return state;
- }
-
- std::vector<double> weights;
-
- state->cutoff = buffer->get_record_count() - 1;
- double total_weight = 0.0;
-
- for (size_t i = 0; i <= state->cutoff; i++) {
- auto rec = buffer->get_data() + i;
- weights.push_back(rec->rec.weight);
- total_weight += rec->rec.weight;
- }
-
- for (size_t i = 0; i < weights.size(); i++) {
- weights[i] = weights[i] / total_weight;
- }
-
- state->alias = new Alias(weights);
- state->total_weight = total_weight;
-
- return state;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
- auto p = (wss_query_parms<R> *) query_parms;
- auto bs = (WSSBufferState<R> *) buff_state;
-
- std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
- size_t buffer_sz = 0;
-
- std::vector<decltype(R::weight)> weights;
- weights.push_back(bs->total_weight);
-
- decltype(R::weight) total_weight = 0;
- for (auto &s : shard_states) {
- auto state = (WSSState<R> *) s;
- total_weight += state->total_weight;
- weights.push_back(state->total_weight);
- }
-
- std::vector<double> normalized_weights;
- for (auto w : weights) {
- normalized_weights.push_back((double) w / (double) total_weight);
- }
-
- auto shard_alias = Alias(normalized_weights);
- for (size_t i=0; i<p->sample_size; i++) {
- auto idx = shard_alias.get(p->rng);
- if (idx == 0) {
- buffer_sz++;
- } else {
- shard_sample_sizes[idx - 1]++;
- }
- }
-
-
- bs->sample_size = buffer_sz;
- for (size_t i=0; i<shard_states.size(); i++) {
- auto state = (WSSState<R> *) shard_states[i];
- state->sample_size = shard_sample_sizes[i+1];
- }
- }
-
- static std::vector<Wrapped<R>> query(WSS<R> *wss, void *q_state, void *parms) {
- auto rng = ((wss_query_parms<R> *) parms)->rng;
-
- auto state = (WSSState<R> *) q_state;
- auto sample_size = state->sample_size;
-
- std::vector<Wrapped<R>> result_set;
-
- if (sample_size == 0) {
- return result_set;
- }
- size_t attempts = 0;
- do {
- attempts++;
- size_t idx = wss->m_alias->get(rng);
- result_set.emplace_back(*wss->get_record_at(idx));
- } while (attempts < sample_size);
-
- return result_set;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- auto st = (WSSBufferState<R> *) state;
- auto p = (wss_query_parms<R> *) parms;
-
- std::vector<Wrapped<R>> result;
- result.reserve(st->sample_size);
-
- if constexpr (Rejection) {
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
- auto rec = buffer->get_data() + idx;
-
- auto test = gsl_rng_uniform(p->rng) * st->max_weight;
-
- if (test <= rec->rec.weight) {
- result.emplace_back(*rec);
- }
- }
- return result;
- }
-
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = st->alias->get(p->rng);
- result.emplace_back(*(buffer->get_data() + idx));
- }
-
- return result;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- std::vector<R> output;
-
- for (size_t i=0; i<results.size(); i++) {
- for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j].rec);
- }
- }
-
- return output;
- }
-
- static void delete_query_state(void *state) {
- auto s = (WSSState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (WSSBufferState<R> *) state;
- delete s;
- }
-};
-
}
diff --git a/include/shard/WIRS.h b/include/shard/AugBTree.h
index bf29325..e32ec64 100644
--- a/include/shard/WIRS.h
+++ b/include/shard/AugBTree.h
@@ -1,5 +1,5 @@
/*
- * include/shard/WIRS.h
+ * include/shard/AugBTree.h
*
* Copyright (C) 2023 Dong Xie <dongx@psu.edu>
* Douglas B. Rumbaugh <drumbaugh@psu.edu>
@@ -35,73 +35,23 @@ namespace de {
thread_local size_t wirs_cancelations = 0;
template <WeightedRecordInterface R>
-struct wirs_query_parms {
- decltype(R::key) lower_bound;
- decltype(R::key) upper_bound;
- size_t sample_size;
- gsl_rng *rng;
-};
-
-template <WeightedRecordInterface R, bool Rejection>
-class WIRSQuery;
-
-template <WeightedRecordInterface R>
-struct wirs_node {
- struct wirs_node<R> *left, *right;
+struct AugBTreeNode {
+ struct AugBTreeNode<R> *left, *right;
decltype(R::key) low, high;
decltype(R::weight) weight;
Alias* alias;
};
template <WeightedRecordInterface R>
-struct WIRSState {
- decltype(R::weight) total_weight;
- std::vector<wirs_node<R>*> nodes;
- Alias* top_level_alias;
- size_t sample_size;
-
- WIRSState() {
- total_weight = 0;
- top_level_alias = nullptr;
- }
-
- ~WIRSState() {
- if (top_level_alias) delete top_level_alias;
- }
-};
-
-template <WeightedRecordInterface R>
-struct WIRSBufferState {
- size_t cutoff;
- Alias* alias;
- std::vector<Wrapped<R>> records;
- decltype(R::weight) max_weight;
- size_t sample_size;
- decltype(R::weight) total_weight;
-
- ~WIRSBufferState() {
- delete alias;
- }
-
-};
-
-template <WeightedRecordInterface R>
-class WIRS {
+class AugBTree {
private:
-
typedef decltype(R::key) K;
typedef decltype(R::value) V;
typedef decltype(R::weight) W;
public:
-
- // FIXME: there has to be a better way to do this
- friend class WIRSQuery<R, true>;
- friend class WIRSQuery<R, false>;
-
- WIRS(MutableBuffer<R>* buffer)
+ AugBTree(MutableBuffer<R>* buffer)
: m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
-
m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
assert(m_alloc_size % CACHELINE_SIZE == 0);
m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
@@ -148,7 +98,7 @@ public:
}
}
- WIRS(WIRS** shards, size_t len)
+ AugBTree(AugBTree** shards, size_t len)
: m_reccnt(0), m_tombstone_cnt(0), m_total_weight(0), m_root(nullptr) {
std::vector<Cursor<Wrapped<R>>> cursors;
cursors.reserve(len);
@@ -208,7 +158,7 @@ public:
}
}
- ~WIRS() {
+ ~AugBTree() {
if (m_data) free(m_data);
for (size_t i=0; i<m_alias.size(); i++) {
if (m_alias[i]) delete m_alias[i];
@@ -257,15 +207,13 @@ public:
size_t get_memory_usage() {
- return m_alloc_size + m_node_cnt * sizeof(wirs_node<Wrapped<R>>);
+ return m_alloc_size + m_node_cnt * sizeof(AugBTreeNode<Wrapped<R>>);
}
size_t get_aux_memory_usage() {
return 0;
}
-private:
-
size_t get_lower_bound(const K& key) const {
size_t min = 0;
size_t max = m_reccnt - 1;
@@ -284,13 +232,60 @@ private:
return min;
}
- bool covered_by(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) {
+ W find_covering_nodes(K lower_key, K upper_key, std::vector<void *> &nodes, std::vector<W> &weights) {
+ W total_weight = 0;
+
+ /* Simulate a stack to unfold recursion. */
+ struct AugBTreeNode<R>* st[64] = {0};
+ st[0] = m_root;
+ size_t top = 1;
+ while(top > 0) {
+ auto now = st[--top];
+ if (covered_by(now, lower_key, upper_key) ||
+ (now->left == nullptr && now->right == nullptr && intersects(now, lower_key, upper_key))) {
+ nodes.emplace_back(now);
+ weights.emplace_back(now->weight);
+ total_weight += now->weight;
+ } else {
+ if (now->left && intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
+ if (now->right && intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
+ }
+ }
+
+
+ return total_weight;
+ }
+
+ Wrapped<R> *get_weighted_sample(K lower_key, K upper_key, void *internal_node, gsl_rng *rng) {
+ /* k -> sampling: three levels. 1. select a node -> select a fat point -> select a record. */
+
+ /* first level */
+ auto node = (AugBTreeNode<R>*) internal_node;
+
+ /* second level */
+ auto fat_point = node->low + node->alias->get(rng);
+
+ /* third level */
+ size_t rec_offset = fat_point * m_group_size + m_alias[fat_point]->get(rng);
+ auto record = m_data + rec_offset;
+
+ /* bounds rejection */
+ if (lower_key > record->rec.key || upper_key < record->rec.key) {
+ return nullptr;
+ }
+
+ return record;
+ }
+
+private:
+
+ bool covered_by(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
auto low_index = node->low * m_group_size;
auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
return lower_key < m_data[low_index].rec.key && m_data[high_index].rec.key < upper_key;
}
- bool intersects(struct wirs_node<R>* node, const K& lower_key, const K& upper_key) {
+ bool intersects(struct AugBTreeNode<R>* node, const K& lower_key, const K& upper_key) {
auto low_index = node->low * m_group_size;
auto high_index = std::min((node->high + 1) * m_group_size - 1, m_reccnt - 1);
return lower_key < m_data[high_index].rec.key && m_data[low_index].rec.key < upper_key;
@@ -327,12 +322,12 @@ private:
assert(weights.size() == n_groups);
- m_root = construct_wirs_node(weights, 0, n_groups-1);
+ m_root = construct_AugBTreeNode(weights, 0, n_groups-1);
}
- struct wirs_node<R>* construct_wirs_node(const std::vector<W>& weights, size_t low, size_t high) {
+ struct AugBTreeNode<R>* construct_AugBTreeNode(const std::vector<W>& weights, size_t low, size_t high) {
if (low == high) {
- return new wirs_node<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
+ return new AugBTreeNode<R>{nullptr, nullptr, low, high, weights[low], new Alias({1.0})};
} else if (low > high) return nullptr;
std::vector<double> node_weights;
@@ -348,12 +343,12 @@ private:
m_node_cnt += 1;
size_t mid = (low + high) / 2;
- return new wirs_node<R>{construct_wirs_node(weights, low, mid),
- construct_wirs_node(weights, mid + 1, high),
+ return new AugBTreeNode<R>{construct_AugBTreeNode(weights, low, mid),
+ construct_AugBTreeNode(weights, mid + 1, high),
low, high, sum, new Alias(node_weights)};
}
- void free_tree(struct wirs_node<R>* node) {
+ void free_tree(struct AugBTreeNode<R>* node) {
if (node) {
delete node->alias;
free_tree(node->left);
@@ -364,7 +359,7 @@ private:
Wrapped<R>* m_data;
std::vector<Alias *> m_alias;
- wirs_node<R>* m_root;
+ AugBTreeNode<R>* m_root;
W m_total_weight;
size_t m_reccnt;
size_t m_tombstone_cnt;
@@ -373,222 +368,4 @@ private:
size_t m_node_cnt;
BloomFilter<R> *m_bf;
};
-
-
-template <WeightedRecordInterface R, bool Rejection=true>
-class WIRSQuery {
-public:
-
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=false;
-
- static void *get_query_state(WIRS<R> *wirs, void *parms) {
- auto res = new WIRSState<R>();
- decltype(R::key) lower_key = ((wirs_query_parms<R> *) parms)->lower_bound;
- decltype(R::key) upper_key = ((wirs_query_parms<R> *) parms)->upper_bound;
-
- // Simulate a stack to unfold recursion.
- double total_weight = 0.0;
- struct wirs_node<R>* st[64] = {0};
- st[0] = wirs->m_root;
- size_t top = 1;
- while(top > 0) {
- auto now = st[--top];
- if (wirs->covered_by(now, lower_key, upper_key) ||
- (now->left == nullptr && now->right == nullptr && wirs->intersects(now, lower_key, upper_key))) {
- res->nodes.emplace_back(now);
- total_weight += now->weight;
- } else {
- if (now->left && wirs->intersects(now->left, lower_key, upper_key)) st[top++] = now->left;
- if (now->right && wirs->intersects(now->right, lower_key, upper_key)) st[top++] = now->right;
- }
- }
-
- std::vector<double> weights;
- for (const auto& node: res->nodes) {
- weights.emplace_back(node->weight / total_weight);
- }
- res->total_weight = total_weight;
- res->top_level_alias = new Alias(weights);
- res->sample_size = 0;
-
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- WIRSBufferState<R> *state = new WIRSBufferState<R>();
- auto parameters = (wirs_query_parms<R>*) parms;
- if constexpr (Rejection) {
- state->cutoff = buffer->get_record_count() - 1;
- state->max_weight = buffer->get_max_weight();
- state->total_weight = buffer->get_total_weight();
- state->sample_size = 0;
- return state;
- }
-
- std::vector<double> weights;
-
- state->cutoff = buffer->get_record_count() - 1;
- double total_weight = 0.0;
-
- for (size_t i = 0; i <= state->cutoff; i++) {
- auto rec = buffer->get_data() + i;
-
- if (rec->rec.key >= parameters->lower_bound && rec->rec.key <= parameters->upper_bound && !rec->is_tombstone() && !rec->is_deleted()) {
- weights.push_back(rec->rec.weight);
- state->records.push_back(*rec);
- total_weight += rec->rec.weight;
- }
- }
-
- for (size_t i = 0; i < weights.size(); i++) {
- weights[i] = weights[i] / total_weight;
- }
-
- state->total_weight = total_weight;
- state->alias = new Alias(weights);
- state->sample_size = 0;
-
- return state;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buff_states) {
- // FIXME: need to redo for the buffer vector interface
- auto p = (wirs_query_parms<R> *) query_parms;
-
- std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
- size_t buffer_sz = 0;
-
- decltype(R::weight) total_weight = 0;
- std::vector<decltype(R::weight)> weights;
- for (auto &s : buff_states) {
- auto state = (WIRSBufferState<R> *) s;
- total_weight += state->total_weight;
- weights.push_back(state->total_weight);
- }
-
- for (auto &s : shard_states) {
- auto state = (WIRSState<R> *) s;
- total_weight += state->total_weight;
- weights.push_back(state->total_weight);
- }
-
- std::vector<double> normalized_weights;
- for (auto w : weights) {
- normalized_weights.push_back((double) w / (double) total_weight);
- }
-
- auto shard_alias = Alias(normalized_weights);
- for (size_t i=0; i<p->sample_size; i++) {
- auto idx = shard_alias.get(p->rng);
- if (idx == 0) {
- buffer_sz++;
- } else {
- shard_sample_sizes[idx - 1]++;
- }
- }
-
- for (size_t i=0; i<shard_states.size(); i++) {
- auto state = (WIRSState<R> *) shard_states[i];
- state->sample_size = shard_sample_sizes[i+1];
- }
- }
-
-
-
- static std::vector<Wrapped<R>> query(WIRS<R> *wirs, void *q_state, void *parms) {
- auto lower_key = ((wirs_query_parms<R> *) parms)->lower_bound;
- auto upper_key = ((wirs_query_parms<R> *) parms)->upper_bound;
- auto rng = ((wirs_query_parms<R> *) parms)->rng;
-
- auto state = (WIRSState<R> *) q_state;
- auto sample_size = state->sample_size;
-
- std::vector<Wrapped<R>> result_set;
-
- if (sample_size == 0) {
- return result_set;
- }
- // k -> sampling: three levels. 1. select a node -> select a fat point -> select a record.
- size_t cnt = 0;
- size_t attempts = 0;
- do {
- ++attempts;
- // first level....
- auto node = state->nodes[state->top_level_alias->get(rng)];
- // second level...
- auto fat_point = node->low + node->alias->get(rng);
- // third level...
- size_t rec_offset = fat_point * wirs->m_group_size + wirs->m_alias[fat_point]->get(rng);
- auto record = wirs->m_data + rec_offset;
-
- // bounds rejection
- if (lower_key > record->rec.key || upper_key < record->rec.key) {
- continue;
- }
-
- result_set.emplace_back(*record);
- cnt++;
- } while (attempts < sample_size);
-
- return result_set;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- auto st = (WIRSBufferState<R> *) state;
- auto p = (wirs_query_parms<R> *) parms;
-
- std::vector<Wrapped<R>> result;
- result.reserve(st->sample_size);
-
- if constexpr (Rejection) {
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
- auto rec = buffer->get_data() + idx;
-
- auto test = gsl_rng_uniform(p->rng) * st->max_weight;
-
- if (test <= rec->rec.weight && rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
- result.emplace_back(*rec);
- }
- }
- return result;
- }
-
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = st->alias->get(p->rng);
- result.emplace_back(st->records[idx]);
- }
-
- return result;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- std::vector<R> output;
-
- for (size_t i=0; i<results.size(); i++) {
- for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j].rec);
- }
- }
-
- return output;
- }
-
- static void delete_query_state(void *state) {
- auto s = (WIRSState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (WIRSBufferState<R> *) state;
- delete s;
- }
-
-
- //{q.get_buffer_query_state(p, p)};
- //{q.buffer_query(p, p)};
-
-};
-
}
diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h
new file mode 100644
index 0000000..a610c09
--- /dev/null
+++ b/include/shard/ISAMTree.h
@@ -0,0 +1,339 @@
+/*
+ * include/shard/ISAMTree.h
+ *
+ * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <queue>
+#include <memory>
+
+#include "framework/ShardRequirements.h"
+
+#include "util/bf_config.h"
+#include "psu-ds/PriorityQueue.h"
+#include "util/Cursor.h"
+#include "psu-util/timer.h"
+
+using psudb::CACHELINE_SIZE;
+using psudb::BloomFilter;
+using psudb::PriorityQueue;
+using psudb::queue_record;
+using psudb::Alias;
+
+namespace de {
+
+thread_local size_t mrun_cancelations = 0;
+
+template <RecordInterface R>
+class ISAMTree {
+private:
+
+typedef decltype(R::key) K;
+typedef decltype(R::value) V;
+
+constexpr static size_t inmem_isam_node_size = 256;
+constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*));
+
+struct InternalNode {
+ K keys[inmem_isam_fanout];
+ char* child[inmem_isam_fanout];
+};
+
+constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R);
+constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout;
+
+static_assert(sizeof(InternalNode) == inmem_isam_node_size, "node size does not match");
+
+public:
+ ISAMTree(MutableBuffer<R>* buffer)
+ :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) {
+
+ m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
+
+ m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+ assert(m_alloc_size % CACHELINE_SIZE == 0);
+ m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+ TIMER_INIT();
+
+ size_t offset = 0;
+ m_reccnt = 0;
+ auto base = buffer->get_data();
+ auto stop = base + buffer->get_record_count();
+
+ TIMER_START();
+ std::sort(base, stop, std::less<Wrapped<R>>());
+ TIMER_STOP();
+ auto sort_time = TIMER_RESULT();
+
+ TIMER_START();
+ while (base < stop) {
+ if (!base->is_tombstone() && (base + 1 < stop)
+ && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
+ base += 2;
+ mrun_cancelations++;
+ continue;
+ } else if (base->is_deleted()) {
+ base += 1;
+ continue;
+ }
+
+ // FIXME: this shouldn't be necessary, but the tagged record
+ // bypass doesn't seem to be working on this code-path, so this
+ // ensures that tagged records from the buffer are able to be
+ // dropped, eventually. It should only need to be &= 1
+ base->header &= 3;
+ m_data[m_reccnt++] = *base;
+ if (m_bf && base->is_tombstone()) {
+ ++m_tombstone_cnt;
+ m_bf->insert(base->rec);
+ }
+
+ base++;
+ }
+ TIMER_STOP();
+ auto copy_time = TIMER_RESULT();
+
+ TIMER_START();
+ if (m_reccnt > 0) {
+ build_internal_levels();
+ }
+ TIMER_STOP();
+ auto level_time = TIMER_RESULT();
+ }
+
+ ISAMTree(ISAMTree** runs, size_t len)
+ : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) {
+ std::vector<Cursor<Wrapped<R>>> cursors;
+ cursors.reserve(len);
+
+ PriorityQueue<Wrapped<R>> pq(len);
+
+ size_t attemp_reccnt = 0;
+ size_t tombstone_count = 0;
+
+ for (size_t i = 0; i < len; ++i) {
+ if (runs[i]) {
+ auto base = runs[i]->get_data();
+ cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()});
+ attemp_reccnt += runs[i]->get_record_count();
+ tombstone_count += runs[i]->get_tombstone_count();
+ pq.push(cursors[i].ptr, i);
+ } else {
+ cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
+ }
+ }
+
+ m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
+
+ m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
+ assert(m_alloc_size % CACHELINE_SIZE == 0);
+ m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+
+ size_t offset = 0;
+
+ while (pq.size()) {
+ auto now = pq.peek();
+ auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
+ if (!now.data->is_tombstone() && next.data != nullptr &&
+ now.data->rec == next.data->rec && next.data->is_tombstone()) {
+
+ pq.pop(); pq.pop();
+ auto& cursor1 = cursors[now.version];
+ auto& cursor2 = cursors[next.version];
+ if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
+ if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
+ } else {
+ auto& cursor = cursors[now.version];
+ if (!cursor.ptr->is_deleted()) {
+ m_data[m_reccnt++] = *cursor.ptr;
+ if (cursor.ptr->is_tombstone()) {
+ ++m_tombstone_cnt;
+ m_bf->insert(cursor.ptr->rec);
+ }
+ }
+ pq.pop();
+
+ if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
+ }
+ }
+
+ if (m_reccnt > 0) {
+ build_internal_levels();
+ }
+ }
+
+ ~ISAMTree() {
+ if (m_data) free(m_data);
+ if (m_isam_nodes) free(m_isam_nodes);
+ if (m_bf) delete m_bf;
+ }
+
+ Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
+ if (filter && !m_bf->lookup(rec)) {
+ return nullptr;
+ }
+
+ size_t idx = get_lower_bound(rec.key);
+ if (idx >= m_reccnt) {
+ return nullptr;
+ }
+
+ while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
+
+ if (m_data[idx].rec == rec) {
+ return m_data + idx;
+ }
+
+ return nullptr;
+ }
+
+ Wrapped<R>* get_data() const {
+ return m_data;
+ }
+
+ size_t get_record_count() const {
+ return m_reccnt;
+ }
+
+ size_t get_tombstone_count() const {
+ return m_tombstone_cnt;
+ }
+
+ const Wrapped<R>* get_record_at(size_t idx) const {
+ return (idx < m_reccnt) ? m_data + idx : nullptr;
+ }
+
+ size_t get_memory_usage() {
+ return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
+ }
+
+ size_t get_aux_memory_usage() {
+ return 0;
+ }
+
+ size_t get_lower_bound(const K& key) const {
+ const InternalNode* now = m_root;
+ while (!is_leaf(reinterpret_cast<const char*>(now))) {
+ const InternalNode* next = nullptr;
+ for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+ if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
+ next = reinterpret_cast<InternalNode*>(now->child[i]);
+ break;
+ }
+ }
+
+ now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+ }
+
+ const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
+ while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
+
+ return pos - m_data;
+ }
+
+ size_t get_upper_bound(const K& key) const {
+ const InternalNode* now = m_root;
+ while (!is_leaf(reinterpret_cast<const char*>(now))) {
+ const InternalNode* next = nullptr;
+ for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
+ if (now->child[i + 1] == nullptr || key < now->keys[i]) {
+ next = reinterpret_cast<InternalNode*>(now->child[i]);
+ break;
+ }
+ }
+
+ now = next ? next : reinterpret_cast<const InternalNode*>(now->child[inmem_isam_fanout - 1]);
+ }
+
+ const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
+ while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
+
+ return pos - m_data;
+ }
+
+
+private:
+ void build_internal_levels() {
+ size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0);
+ size_t level_node_cnt = n_leaf_nodes;
+ size_t node_cnt = 0;
+ do {
+ level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0);
+ node_cnt += level_node_cnt;
+ } while (level_node_cnt > 1);
+
+ m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
+ assert(m_alloc_size % CACHELINE_SIZE == 0);
+
+ m_isam_nodes = (InternalNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
+ m_internal_node_cnt = node_cnt;
+ memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
+
+ InternalNode* current_node = m_isam_nodes;
+
+ const Wrapped<R>* leaf_base = m_data;
+ const Wrapped<R>* leaf_stop = m_data + m_reccnt;
+ while (leaf_base < leaf_stop) {
+ size_t fanout = 0;
+ for (size_t i = 0; i < inmem_isam_fanout; ++i) {
+ auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i;
+ if (rec_ptr >= leaf_stop) break;
+ const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1);
+ current_node->keys[i] = sep_key->rec.key;
+ current_node->child[i] = (char*)rec_ptr;
+ ++fanout;
+ }
+ current_node++;
+ leaf_base += fanout * inmem_isam_leaf_fanout;
+ }
+
+ auto level_start = m_isam_nodes;
+ auto level_stop = current_node;
+ auto current_level_node_cnt = level_stop - level_start;
+ while (current_level_node_cnt > 1) {
+ auto now = level_start;
+ while (now < level_stop) {
+ size_t child_cnt = 0;
+ for (size_t i = 0; i < inmem_isam_fanout; ++i) {
+ auto node_ptr = now + i;
+ ++child_cnt;
+ if (node_ptr >= level_stop) break;
+ current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1];
+ current_node->child[i] = (char*)node_ptr;
+ }
+ now += child_cnt;
+ current_node++;
+ }
+ level_start = level_stop;
+ level_stop = current_node;
+ current_level_node_cnt = level_stop - level_start;
+ }
+
+ assert(current_level_node_cnt == 1);
+ m_root = level_start;
+ }
+
+ bool is_leaf(const char* ptr) const {
+ return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt);
+ }
+
+ // Members: sorted data, internal ISAM levels, reccnt;
+ Wrapped<R>* m_data;
+ psudb::BloomFilter<R> *m_bf;
+ InternalNode* m_isam_nodes;
+ InternalNode* m_root;
+ size_t m_reccnt;
+ size_t m_tombstone_cnt;
+ size_t m_internal_node_cnt;
+ size_t m_deleted_cnt;
+ size_t m_alloc_size;
+};
+}
diff --git a/include/shard/MemISAM.h b/include/shard/MemISAM.h
deleted file mode 100644
index 00fb467..0000000
--- a/include/shard/MemISAM.h
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- * include/shard/MemISAM.h
- *
- * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- * Dong Xie <dongx@psu.edu>
- *
- * All rights reserved. Published under the Modified BSD License.
- *
- */
-#pragma once
-
-#include <vector>
-#include <cassert>
-#include <queue>
-#include <memory>
-
-#include "framework/ShardRequirements.h"
-
-#include "util/bf_config.h"
-#include "psu-ds/PriorityQueue.h"
-#include "util/Cursor.h"
-#include "psu-util/timer.h"
-
-using psudb::CACHELINE_SIZE;
-using psudb::BloomFilter;
-using psudb::PriorityQueue;
-using psudb::queue_record;
-using psudb::Alias;
-
-namespace de {
-
-thread_local size_t mrun_cancelations = 0;
-
-template <RecordInterface R>
-struct irs_query_parms {
- decltype(R::key) lower_bound;
- decltype(R::key) upper_bound;
- size_t sample_size;
- gsl_rng *rng;
-};
-
-template <RecordInterface R, bool Rejection>
-class IRSQuery;
-
-template <RecordInterface R>
-struct IRSState {
- size_t lower_bound;
- size_t upper_bound;
- size_t sample_size;
- size_t total_weight;
-};
-
-template <RecordInterface R>
-struct IRSBufferState {
- size_t cutoff;
- std::vector<Wrapped<R>> records;
- size_t sample_size;
-};
-
-template <RecordInterface R>
-struct ISAMRangeQueryParms {
- decltype(R::key) lower_bound;
- decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-class ISAMRangeQuery;
-
-template <RecordInterface R>
-struct ISAMRangeQueryState {
- size_t start_idx;
- size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct RangeQueryBufferState {
- size_t cutoff;
-};
-
-template <RecordInterface R>
-class MemISAM {
-private:
- friend class IRSQuery<R, true>;
- friend class IRSQuery<R, false>;
- friend class ISAMRangeQuery<R>;
-
-typedef decltype(R::key) K;
-typedef decltype(R::value) V;
-
-constexpr static size_t inmem_isam_node_size = 256;
-constexpr static size_t inmem_isam_fanout = inmem_isam_node_size / (sizeof(K) + sizeof(char*));
-
-struct InMemISAMNode {
- K keys[inmem_isam_fanout];
- char* child[inmem_isam_fanout];
-};
-
-constexpr static size_t inmem_isam_leaf_fanout = inmem_isam_node_size / sizeof(R);
-constexpr static size_t inmem_isam_node_keyskip = sizeof(K) * inmem_isam_fanout;
-
-static_assert(sizeof(InMemISAMNode) == inmem_isam_node_size, "node size does not match");
-
-public:
- MemISAM(MutableBuffer<R>* buffer)
- :m_reccnt(0), m_tombstone_cnt(0), m_isam_nodes(nullptr), m_deleted_cnt(0) {
-
- m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
- m_alloc_size = (buffer->get_record_count() * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (buffer->get_record_count() * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
- assert(m_alloc_size % CACHELINE_SIZE == 0);
- m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
- TIMER_INIT();
-
- size_t offset = 0;
- m_reccnt = 0;
- auto base = buffer->get_data();
- auto stop = base + buffer->get_record_count();
-
- TIMER_START();
- std::sort(base, stop, std::less<Wrapped<R>>());
- TIMER_STOP();
- auto sort_time = TIMER_RESULT();
-
- TIMER_START();
- while (base < stop) {
- if (!base->is_tombstone() && (base + 1 < stop)
- && base->rec == (base + 1)->rec && (base + 1)->is_tombstone()) {
- base += 2;
- mrun_cancelations++;
- continue;
- } else if (base->is_deleted()) {
- base += 1;
- continue;
- }
-
- // FIXME: this shouldn't be necessary, but the tagged record
- // bypass doesn't seem to be working on this code-path, so this
- // ensures that tagged records from the buffer are able to be
- // dropped, eventually. It should only need to be &= 1
- base->header &= 3;
- m_data[m_reccnt++] = *base;
- if (m_bf && base->is_tombstone()) {
- ++m_tombstone_cnt;
- m_bf->insert(base->rec);
- }
-
- base++;
- }
- TIMER_STOP();
- auto copy_time = TIMER_RESULT();
-
- TIMER_START();
- if (m_reccnt > 0) {
- build_internal_levels();
- }
- TIMER_STOP();
- auto level_time = TIMER_RESULT();
- }
-
- MemISAM(MemISAM** runs, size_t len)
- : m_reccnt(0), m_tombstone_cnt(0), m_deleted_cnt(0), m_isam_nodes(nullptr) {
- std::vector<Cursor<Wrapped<R>>> cursors;
- cursors.reserve(len);
-
- PriorityQueue<Wrapped<R>> pq(len);
-
- size_t attemp_reccnt = 0;
- size_t tombstone_count = 0;
-
- for (size_t i = 0; i < len; ++i) {
- if (runs[i]) {
- auto base = runs[i]->get_data();
- cursors.emplace_back(Cursor{base, base + runs[i]->get_record_count(), 0, runs[i]->get_record_count()});
- attemp_reccnt += runs[i]->get_record_count();
- tombstone_count += runs[i]->get_tombstone_count();
- pq.push(cursors[i].ptr, i);
- } else {
- cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
- }
- }
-
- m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
- m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
- assert(m_alloc_size % CACHELINE_SIZE == 0);
- m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
-
- size_t offset = 0;
-
- while (pq.size()) {
- auto now = pq.peek();
- auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
- if (!now.data->is_tombstone() && next.data != nullptr &&
- now.data->rec == next.data->rec && next.data->is_tombstone()) {
-
- pq.pop(); pq.pop();
- auto& cursor1 = cursors[now.version];
- auto& cursor2 = cursors[next.version];
- if (advance_cursor(cursor1)) pq.push(cursor1.ptr, now.version);
- if (advance_cursor(cursor2)) pq.push(cursor2.ptr, next.version);
- } else {
- auto& cursor = cursors[now.version];
- if (!cursor.ptr->is_deleted()) {
- m_data[m_reccnt++] = *cursor.ptr;
- if (cursor.ptr->is_tombstone()) {
- ++m_tombstone_cnt;
- m_bf->insert(cursor.ptr->rec);
- }
- }
- pq.pop();
-
- if (advance_cursor(cursor)) pq.push(cursor.ptr, now.version);
- }
- }
-
- if (m_reccnt > 0) {
- build_internal_levels();
- }
- }
-
- ~MemISAM() {
- if (m_data) free(m_data);
- if (m_isam_nodes) free(m_isam_nodes);
- if (m_bf) delete m_bf;
- }
-
- Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
- if (filter && !m_bf->lookup(rec)) {
- return nullptr;
- }
-
- size_t idx = get_lower_bound(rec.key);
- if (idx >= m_reccnt) {
- return nullptr;
- }
-
- while (idx < m_reccnt && m_data[idx].rec < rec) ++idx;
-
- if (m_data[idx].rec == rec) {
- return m_data + idx;
- }
-
- return nullptr;
- }
-
- Wrapped<R>* get_data() const {
- return m_data;
- }
-
- size_t get_record_count() const {
- return m_reccnt;
- }
-
- size_t get_tombstone_count() const {
- return m_tombstone_cnt;
- }
-
- const Wrapped<R>* get_record_at(size_t idx) const {
- return (idx < m_reccnt) ? m_data + idx : nullptr;
- }
-
- size_t get_memory_usage() {
- return m_internal_node_cnt * inmem_isam_node_size + m_alloc_size;
- }
-
- size_t get_aux_memory_usage() {
- return 0;
- }
-
-private:
- size_t get_lower_bound(const K& key) const {
- const InMemISAMNode* now = m_root;
- while (!is_leaf(reinterpret_cast<const char*>(now))) {
- const InMemISAMNode* next = nullptr;
- for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
- if (now->child[i + 1] == nullptr || key <= now->keys[i]) {
- next = reinterpret_cast<InMemISAMNode*>(now->child[i]);
- break;
- }
- }
-
- now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]);
- }
-
- const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
- while (pos < m_data + m_reccnt && pos->rec.key < key) pos++;
-
- return pos - m_data;
- }
-
- size_t get_upper_bound(const K& key) const {
- const InMemISAMNode* now = m_root;
- while (!is_leaf(reinterpret_cast<const char*>(now))) {
- const InMemISAMNode* next = nullptr;
- for (size_t i = 0; i < inmem_isam_fanout - 1; ++i) {
- if (now->child[i + 1] == nullptr || key < now->keys[i]) {
- next = reinterpret_cast<InMemISAMNode*>(now->child[i]);
- break;
- }
- }
-
- now = next ? next : reinterpret_cast<const InMemISAMNode*>(now->child[inmem_isam_fanout - 1]);
- }
-
- const Wrapped<R>* pos = reinterpret_cast<const Wrapped<R>*>(now);
- while (pos < m_data + m_reccnt && pos->rec.key <= key) pos++;
-
- return pos - m_data;
- }
-
- void build_internal_levels() {
- size_t n_leaf_nodes = m_reccnt / inmem_isam_leaf_fanout + (m_reccnt % inmem_isam_leaf_fanout != 0);
- size_t level_node_cnt = n_leaf_nodes;
- size_t node_cnt = 0;
- do {
- level_node_cnt = level_node_cnt / inmem_isam_fanout + (level_node_cnt % inmem_isam_fanout != 0);
- node_cnt += level_node_cnt;
- } while (level_node_cnt > 1);
-
- m_alloc_size = (node_cnt * inmem_isam_node_size) + (CACHELINE_SIZE - (node_cnt * inmem_isam_node_size) % CACHELINE_SIZE);
- assert(m_alloc_size % CACHELINE_SIZE == 0);
-
- m_isam_nodes = (InMemISAMNode*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
- m_internal_node_cnt = node_cnt;
- memset(m_isam_nodes, 0, node_cnt * inmem_isam_node_size);
-
- InMemISAMNode* current_node = m_isam_nodes;
-
- const Wrapped<R>* leaf_base = m_data;
- const Wrapped<R>* leaf_stop = m_data + m_reccnt;
- while (leaf_base < leaf_stop) {
- size_t fanout = 0;
- for (size_t i = 0; i < inmem_isam_fanout; ++i) {
- auto rec_ptr = leaf_base + inmem_isam_leaf_fanout * i;
- if (rec_ptr >= leaf_stop) break;
- const Wrapped<R>* sep_key = std::min(rec_ptr + inmem_isam_leaf_fanout - 1, leaf_stop - 1);
- current_node->keys[i] = sep_key->rec.key;
- current_node->child[i] = (char*)rec_ptr;
- ++fanout;
- }
- current_node++;
- leaf_base += fanout * inmem_isam_leaf_fanout;
- }
-
- auto level_start = m_isam_nodes;
- auto level_stop = current_node;
- auto current_level_node_cnt = level_stop - level_start;
- while (current_level_node_cnt > 1) {
- auto now = level_start;
- while (now < level_stop) {
- size_t child_cnt = 0;
- for (size_t i = 0; i < inmem_isam_fanout; ++i) {
- auto node_ptr = now + i;
- ++child_cnt;
- if (node_ptr >= level_stop) break;
- current_node->keys[i] = node_ptr->keys[inmem_isam_fanout - 1];
- current_node->child[i] = (char*)node_ptr;
- }
- now += child_cnt;
- current_node++;
- }
- level_start = level_stop;
- level_stop = current_node;
- current_level_node_cnt = level_stop - level_start;
- }
-
- assert(current_level_node_cnt == 1);
- m_root = level_start;
- }
-
- bool is_leaf(const char* ptr) const {
- return ptr >= (const char*)m_data && ptr < (const char*)(m_data + m_reccnt);
- }
-
- // Members: sorted data, internal ISAM levels, reccnt;
- Wrapped<R>* m_data;
- psudb::BloomFilter<R> *m_bf;
- InMemISAMNode* m_isam_nodes;
- InMemISAMNode* m_root;
- size_t m_reccnt;
- size_t m_tombstone_cnt;
- size_t m_internal_node_cnt;
- size_t m_deleted_cnt;
- size_t m_alloc_size;
-};
-
-template <RecordInterface R, bool Rejection=true>
-class IRSQuery {
-public:
-
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=false;
-
- static void *get_query_state(MemISAM<R> *isam, void *parms) {
- auto res = new IRSState<R>();
- decltype(R::key) lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
- decltype(R::key) upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
-
- res->lower_bound = isam->get_lower_bound(lower_key);
- res->upper_bound = isam->get_upper_bound(upper_key);
-
- if (res->lower_bound == isam->get_record_count()) {
- res->total_weight = 0;
- } else {
- res->total_weight = res->upper_bound - res->lower_bound;
- }
-
- res->sample_size = 0;
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- auto res = new IRSBufferState<R>();
-
- res->cutoff = buffer->get_record_count();
- res->sample_size = 0;
-
- if constexpr (Rejection) {
- return res;
- }
-
- auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
- auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
-
- for (size_t i=0; i<res->cutoff; i++) {
- if (((buffer->get_data() + i)->rec.key >= lower_key) && ((buffer->get_data() + i)->rec.key <= upper_key)) {
- res->records.emplace_back(*(buffer->get_data() + i));
- }
- }
-
- return res;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
- auto p = (irs_query_parms<R> *) query_parms;
- auto bs = (buff_state) ? (IRSBufferState<R> *) buff_state : nullptr;
-
- std::vector<size_t> shard_sample_sizes(shard_states.size()+1, 0);
- size_t buffer_sz = 0;
-
- std::vector<size_t> weights;
- if constexpr (Rejection) {
- weights.push_back((bs) ? bs->cutoff : 0);
- } else {
- weights.push_back((bs) ? bs->records.size() : 0);
- }
-
- size_t total_weight = 0;
- for (auto &s : shard_states) {
- auto state = (IRSState<R> *) s;
- total_weight += state->total_weight;
- weights.push_back(state->total_weight);
- }
-
- // if no valid records fall within the query range, just
- // set all of the sample sizes to 0 and bail out.
- if (total_weight == 0) {
- for (size_t i=0; i<shard_states.size(); i++) {
- auto state = (IRSState<R> *) shard_states[i];
- state->sample_size = 0;
- }
-
- return;
- }
-
- std::vector<double> normalized_weights;
- for (auto w : weights) {
- normalized_weights.push_back((double) w / (double) total_weight);
- }
-
- auto shard_alias = Alias(normalized_weights);
- for (size_t i=0; i<p->sample_size; i++) {
- auto idx = shard_alias.get(p->rng);
- if (idx == 0) {
- buffer_sz++;
- } else {
- shard_sample_sizes[idx - 1]++;
- }
- }
-
- if (bs) {
- bs->sample_size = buffer_sz;
- }
- for (size_t i=0; i<shard_states.size(); i++) {
- auto state = (IRSState<R> *) shard_states[i];
- state->sample_size = shard_sample_sizes[i+1];
- }
- }
-
- static std::vector<Wrapped<R>> query(MemISAM<R> *isam, void *q_state, void *parms) {
- auto lower_key = ((irs_query_parms<R> *) parms)->lower_bound;
- auto upper_key = ((irs_query_parms<R> *) parms)->upper_bound;
- auto rng = ((irs_query_parms<R> *) parms)->rng;
-
- auto state = (IRSState<R> *) q_state;
- auto sample_sz = state->sample_size;
-
- std::vector<Wrapped<R>> result_set;
-
- if (sample_sz == 0 || state->lower_bound == isam->get_record_count()) {
- return result_set;
- }
-
- size_t attempts = 0;
- size_t range_length = state->upper_bound - state->lower_bound;
- do {
- attempts++;
- size_t idx = (range_length > 0) ? gsl_rng_uniform_int(rng, range_length) : 0;
- result_set.emplace_back(*isam->get_record_at(state->lower_bound + idx));
- } while (attempts < sample_sz);
-
- return result_set;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- auto st = (IRSBufferState<R> *) state;
- auto p = (irs_query_parms<R> *) parms;
-
- std::vector<Wrapped<R>> result;
- result.reserve(st->sample_size);
-
- if constexpr (Rejection) {
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = gsl_rng_uniform_int(p->rng, st->cutoff);
- auto rec = buffer->get_data() + idx;
-
- if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
- result.emplace_back(*rec);
- }
- }
-
- return result;
- }
-
- for (size_t i=0; i<st->sample_size; i++) {
- auto idx = gsl_rng_uniform_int(p->rng, st->records.size());
- result.emplace_back(st->records[idx]);
- }
-
- return result;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- std::vector<R> output;
-
- for (size_t i=0; i<results.size(); i++) {
- for (size_t j=0; j<results[i].size(); j++) {
- output.emplace_back(results[i][j].rec);
- }
- }
-
- return output;
- }
-
- static void delete_query_state(void *state) {
- auto s = (IRSState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (IRSBufferState<R> *) state;
- delete s;
- }
-};
-
-
-template <RecordInterface R>
-class ISAMRangeQuery {
-public:
-
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=true;
-
- static void *get_query_state(MemISAM<R> *ts, void *parms) {
- auto res = new ISAMRangeQueryState<R>();
- auto p = (ISAMRangeQueryParms<R> *) parms;
-
- res->start_idx = ts->get_lower_bound(p->lower_bound);
- res->stop_idx = ts->get_record_count();
-
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- auto res = new RangeQueryBufferState<R>();
- res->cutoff = buffer->get_record_count();
-
- return res;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, std::vector<void*> &buffer_states) {
- return;
- }
-
- static std::vector<Wrapped<R>> query(MemISAM<R> *ts, void *q_state, void *parms) {
- std::vector<Wrapped<R>> records;
- auto p = (ISAMRangeQueryParms<R> *) parms;
- auto s = (ISAMRangeQueryState<R> *) q_state;
-
- // if the returned index is one past the end of the
- // records for the PGM, then there are not records
- // in the index falling into the specified range.
- if (s->start_idx == ts->get_record_count()) {
- return records;
- }
-
- auto ptr = ts->get_record_at(s->start_idx);
-
- // roll the pointer forward to the first record that is
- // greater than or equal to the lower bound.
- while(ptr->rec.key < p->lower_bound) {
- ptr++;
- }
-
- while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
- records.emplace_back(*ptr);
- ptr++;
- }
-
- return records;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- auto p = (ISAMRangeQueryParms<R> *) parms;
- auto s = (RangeQueryBufferState<R> *) state;
-
- std::vector<Wrapped<R>> records;
- for (size_t i=0; i<s->cutoff; i++) {
- auto rec = buffer->get_data() + i;
- if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
- records.emplace_back(*rec);
- }
- }
-
- return records;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- std::vector<Cursor<Wrapped<R>>> cursors;
- cursors.reserve(results.size());
-
- PriorityQueue<Wrapped<R>> pq(results.size());
- size_t total = 0;
- size_t tmp_n = results.size();
-
-
- for (size_t i = 0; i < tmp_n; ++i)
- if (results[i].size() > 0){
- auto base = results[i].data();
- cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
- assert(i == cursors.size() - 1);
- total += results[i].size();
- pq.push(cursors[i].ptr, tmp_n - i - 1);
- } else {
- cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
- }
-
- if (total == 0) {
- return std::vector<R>();
- }
-
- std::vector<R> output;
- output.reserve(total);
-
- while (pq.size()) {
- auto now = pq.peek();
- auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
- if (!now.data->is_tombstone() && next.data != nullptr &&
- now.data->rec == next.data->rec && next.data->is_tombstone()) {
-
- pq.pop(); pq.pop();
- auto& cursor1 = cursors[tmp_n - now.version - 1];
- auto& cursor2 = cursors[tmp_n - next.version - 1];
- if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
- if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
- } else {
- auto& cursor = cursors[tmp_n - now.version - 1];
- if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
- pq.pop();
-
- if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
- }
- }
-
- return output;
- }
-
- static void delete_query_state(void *state) {
- auto s = (ISAMRangeQueryState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (RangeQueryBufferState<R> *) state;
- delete s;
- }
-};
-
-
-
-}
diff --git a/include/shard/PGM.h b/include/shard/PGM.h
index 6d76376..6b66b7d 100644
--- a/include/shard/PGM.h
+++ b/include/shard/PGM.h
@@ -31,34 +31,6 @@ using psudb::Alias;
namespace de {
-template <RecordInterface R>
-struct pgm_range_query_parms {
- decltype(R::key) lower_bound;
- decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-struct PGMPointLookupParms {
- decltype(R::key) target_key;
-};
-
-template <RecordInterface R>
-class PGMRangeQuery;
-
-template <RecordInterface R>
-class PGMPointLookup;
-
-template <RecordInterface R>
-struct PGMState {
- size_t start_idx;
- size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct PGMBufferState {
- size_t cutoff;
-};
-
template <RecordInterface R, size_t epsilon=128>
class PGM {
private:
@@ -67,11 +39,6 @@ private:
public:
-
- // FIXME: there has to be a better way to do this
- friend class PGMRangeQuery<R>;
- friend class PGMPointLookup<R>;
-
PGM(MutableBuffer<R>* buffer)
: m_reccnt(0), m_tombstone_cnt(0) {
@@ -80,8 +47,6 @@ public:
m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
std::vector<K> keys;
- //m_bf = new BloomFilter<R>(BF_FPR, buffer->get_tombstone_count(), BF_HASH_FUNCS);
-
size_t offset = 0;
m_reccnt = 0;
auto base = buffer->get_data();
@@ -110,13 +75,6 @@ public:
base->header &= 3;
m_data[m_reccnt++] = *base;
keys.emplace_back(base->rec.key);
-
- /*
- if (m_bf && base->is_tombstone()) {
- m_tombstone_cnt++;
- m_bf->insert(base->rec);
- }*/
-
base++;
}
@@ -148,8 +106,6 @@ public:
}
}
- //m_bf = new BloomFilter<R>(BF_FPR, tombstone_count, BF_HASH_FUNCS);
-
m_alloc_size = (attemp_reccnt * sizeof(Wrapped<R>)) + (CACHELINE_SIZE - (attemp_reccnt * sizeof(Wrapped<R>)) % CACHELINE_SIZE);
assert(m_alloc_size % CACHELINE_SIZE == 0);
m_data = (Wrapped<R>*)std::aligned_alloc(CACHELINE_SIZE, m_alloc_size);
@@ -172,10 +128,6 @@ public:
if (!cursor.ptr->is_deleted()) {
m_data[m_reccnt++] = *cursor.ptr;
keys.emplace_back(cursor.ptr->rec.key);
- /*if (m_bf && cursor.ptr->is_tombstone()) {
- ++m_tombstone_cnt;
- if (m_bf) m_bf->insert(cursor.ptr->rec);
- }*/
}
pq.pop();
@@ -190,15 +142,9 @@ public:
~PGM() {
if (m_data) free(m_data);
- //if (m_bf) delete m_bf;
-
}
Wrapped<R> *point_lookup(const R &rec, bool filter=false) {
- //if (filter && !m_bf->lookup(rec)) {
- // return nullptr;
- //}
-
size_t idx = get_lower_bound(rec.key);
if (idx >= m_reccnt) {
return nullptr;
@@ -284,219 +230,6 @@ private:
K m_max_key;
K m_min_key;
pgm::PGMIndex<K, epsilon> m_pgm;
- //BloomFilter<R> *m_bf;
-};
-template <RecordInterface R>
-class PGMPointLookup {
-public:
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=false;
-
- static void *get_query_state(PGM<R> *ts, void *parms) {
- return nullptr;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- return nullptr;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
- return;
- }
-
- static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) {
- std::vector<Wrapped<R>> records;
- auto p = (PGMPointLookupParms<R> *) parms;
- auto s = (PGMState<R> *) q_state;
-
- size_t idx = ts->get_lower_bound(p->target_key);
- if (ts->get_record_at(idx)->rec.key == p->target_key) {
- records.emplace_back(*ts->get_record_at(idx));
- }
-
- return records;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- auto p = (PGMPointLookupParms<R> *) parms;
- auto s = (PGMBufferState<R> *) state;
-
- std::vector<Wrapped<R>> records;
- for (size_t i=0; i<buffer->get_record_count(); i++) {
- auto rec = buffer->get_data() + i;
- if (rec->rec.key == p->target_key) {
- records.emplace_back(*rec);
- return records;
- }
- }
-
- return records;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- std::vector<R> output;
- for (size_t i=0 ;i<results.size(); i++) {
- if (results[i].size() > 0) {
- output.emplace_back(results[i][0].rec);
- return output;
- }
- }
-
- return output;
- }
-
- static void delete_query_state(void *state) {
- }
-
- static void delete_buffer_query_state(void *state) {
- }
};
-
-
-template <RecordInterface R>
-class PGMRangeQuery {
-public:
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=false;
-
- static void *get_query_state(PGM<R> *ts, void *parms) {
- auto res = new PGMState<R>();
- auto p = (pgm_range_query_parms<R> *) parms;
-
- res->start_idx = ts->get_lower_bound(p->lower_bound);
- res->stop_idx = ts->get_record_count();
-
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- auto res = new PGMBufferState<R>();
- res->cutoff = buffer->get_record_count();
-
- return res;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
- return;
- }
-
- static std::vector<Wrapped<R>> query(PGM<R> *ts, void *q_state, void *parms) {
- size_t tot = 0;
- //std::vector<Wrapped<R>> records;
- auto p = (pgm_range_query_parms<R> *) parms;
- auto s = (PGMState<R> *) q_state;
-
- // if the returned index is one past the end of the
- // records for the PGM, then there are not records
- // in the index falling into the specified range.
- if (s->start_idx == ts->get_record_count()) {
- return {};
- }
-
- auto ptr = ts->get_record_at(s->start_idx);
-
- // roll the pointer forward to the first record that is
- // greater than or equal to the lower bound.
- while(ptr->rec.key < p->lower_bound) {
- ptr++;
- }
-
- while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
- if (ptr->is_tombstone()) --tot;
- else if (!ptr->is_deleted()) ++tot;
- //records.emplace_back(*ptr);
- ptr++;
- }
-
- return {Wrapped<R>{0, {tot, 0}}};
- //return records;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- size_t tot = 0;
- auto p = (pgm_range_query_parms<R> *) parms;
- auto s = (PGMBufferState<R> *) state;
-
- //std::vector<Wrapped<R>> records;
- for (size_t i=0; i<s->cutoff; i++) {
- auto rec = buffer->get_data() + i;
- if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
- if (rec->is_tombstone()) --tot;
- else if (!rec->is_deleted()) ++tot;
- //records.emplace_back(*rec);
- }
- }
-
- return {Wrapped<R>{0, {tot, 0}}};
- //return records;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
- /*std::vector<Cursor<Wrapped<R>>> cursors;
- cursors.reserve(results.size());
-
- PriorityQueue<Wrapped<R>> pq(results.size());
- size_t total = 0;
- size_t tmp_n = results.size();
-
-
- for (size_t i = 0; i < tmp_n; ++i)
- if (results[i].size() > 0){
- auto base = results[i].data();
- cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
- assert(i == cursors.size() - 1);
- total += results[i].size();
- pq.push(cursors[i].ptr, tmp_n - i - 1);
- } else {
- cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
- }
-
- if (total == 0) {
- return std::vector<R>();
- }
-
- std::vector<R> output;
- output.reserve(total);
-
- while (pq.size()) {
- auto now = pq.peek();
- auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
- if (!now.data->is_tombstone() && next.data != nullptr &&
- now.data->rec == next.data->rec && next.data->is_tombstone()) {
-
- pq.pop(); pq.pop();
- auto& cursor1 = cursors[tmp_n - now.version - 1];
- auto& cursor2 = cursors[tmp_n - next.version - 1];
- if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
- if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
- } else {
- auto& cursor = cursors[tmp_n - now.version - 1];
- if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
- pq.pop();
-
- if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
- }
- }*/
-
- size_t tot = 0;
- for (auto& result: results)
- if (result.size() > 0) tot += result[0].rec.key;
-
- return {{tot, 0}};
- }
-
- static void delete_query_state(void *state) {
- auto s = (PGMState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (PGMBufferState<R> *) state;
- delete s;
- }
-};
-
-;
-
}
diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h
index a784a38..fdf8edb 100644
--- a/include/shard/TrieSpline.h
+++ b/include/shard/TrieSpline.h
@@ -30,32 +30,6 @@ using psudb::Alias;
namespace de {
-template <RecordInterface R>
-struct ts_range_query_parms {
- decltype(R::key) lower_bound;
- decltype(R::key) upper_bound;
-};
-
-template <RecordInterface R>
-class TrieSplineRangeQuery;
-
-template <RecordInterface R>
-struct TrieSplineState {
- size_t start_idx;
- size_t stop_idx;
-};
-
-template <RecordInterface R>
-struct TrieSplineBufferState {
- size_t cutoff;
- Alias* alias;
-
- ~TrieSplineBufferState() {
- delete alias;
- }
-
-};
-
template <RecordInterface R, size_t E=1024>
class TrieSpline {
private:
@@ -63,10 +37,6 @@ private:
typedef decltype(R::value) V;
public:
-
- // FIXME: there has to be a better way to do this
- friend class TrieSplineRangeQuery<R>;
-
TrieSpline(MutableBuffer<R>* buffer)
: m_reccnt(0), m_tombstone_cnt(0) {
@@ -254,8 +224,6 @@ public:
return 0;
}
-private:
-
size_t get_lower_bound(const K& key) const {
auto bound = m_ts.GetSearchBound(key);
size_t idx = bound.begin;
@@ -293,6 +261,8 @@ private:
return (m_data[idx].rec.key <= key) ? idx : m_reccnt;
}
+private:
+
Wrapped<R>* m_data;
size_t m_reccnt;
size_t m_tombstone_cnt;
@@ -302,154 +272,4 @@ private:
ts::TrieSpline<K> m_ts;
BloomFilter<R> *m_bf;
};
-
-
-template <RecordInterface R>
-class TrieSplineRangeQuery {
-public:
- constexpr static bool EARLY_ABORT=false;
- constexpr static bool SKIP_DELETE_FILTER=true;
-
- static void *get_query_state(TrieSpline<R> *ts, void *parms) {
- auto res = new TrieSplineState<R>();
- auto p = (ts_range_query_parms<R> *) parms;
-
- res->start_idx = ts->get_lower_bound(p->lower_bound);
- res->stop_idx = ts->get_record_count();
-
- return res;
- }
-
- static void* get_buffer_query_state(MutableBuffer<R> *buffer, void *parms) {
- auto res = new TrieSplineBufferState<R>();
- res->cutoff = buffer->get_record_count();
-
- return res;
- }
-
- static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void *buff_state) {
- return;
- }
-
- static std::vector<Wrapped<R>> query(TrieSpline<R> *ts, void *q_state, void *parms) {
- //std::vector<Wrapped<R>> records;
- size_t tot = 0;
- auto p = (ts_range_query_parms<R> *) parms;
- auto s = (TrieSplineState<R> *) q_state;
-
- // if the returned index is one past the end of the
- // records for the TrieSpline, then there are not records
- // in the index falling into the specified range.
- if (s->start_idx == ts->get_record_count()) {
- return {};
- }
-
- auto ptr = ts->get_record_at(s->start_idx);
-
- // roll the pointer forward to the first record that is
- // greater than or equal to the lower bound.
- while(ptr->rec.key < p->lower_bound) {
- ptr++;
- }
-
-
- while (ptr->rec.key <= p->upper_bound && ptr < ts->m_data + s->stop_idx) {
- if (ptr->is_tombstone()) --tot;
- else if (!ptr->is_deleted()) ++tot;
- //records.emplace_back(*ptr);
- ptr++;
- }
-
- return {Wrapped<R>{0, {tot, 0}}};
- //return records;
- }
-
- static std::vector<Wrapped<R>> buffer_query(MutableBuffer<R> *buffer, void *state, void *parms) {
- size_t tot = 0;
- auto p = (ts_range_query_parms<R> *) parms;
- auto s = (TrieSplineBufferState<R> *) state;
-
- //std::vector<Wrapped<R>> records;
- for (size_t i=0; i<s->cutoff; i++) {
- auto rec = buffer->get_data() + i;
- if (rec->rec.key >= p->lower_bound && rec->rec.key <= p->upper_bound) {
- if (rec->is_tombstone()) --tot;
- else if (!rec->is_deleted()) ++tot;
- //records.emplace_back(*rec);
- }
-
- }
-
- return {Wrapped<R>{0, {tot, 0}}};
- //return records;
- }
-
- static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) {
-/*
- std::vector<Cursor<Wrapped<R>>> cursors;
- cursors.reserve(results.size());
-
- PriorityQueue<Wrapped<R>> pq(results.size());
- size_t total = 0;
- size_t tmp_n = results.size();
-
-
- for (size_t i = 0; i < tmp_n; ++i)
- if (results[i].size() > 0){
- auto base = results[i].data();
- cursors.emplace_back(Cursor{base, base + results[i].size(), 0, results[i].size()});
- assert(i == cursors.size() - 1);
- total += results[i].size();
- pq.push(cursors[i].ptr, tmp_n - i - 1);
- } else {
- cursors.emplace_back(Cursor<Wrapped<R>>{nullptr, nullptr, 0, 0});
- }
-
- if (total == 0) {
- return std::vector<R>();
- }
-
- std::vector<R> output;
- output.reserve(total);
-
- while (pq.size()) {
- auto now = pq.peek();
- auto next = pq.size() > 1 ? pq.peek(1) : queue_record<Wrapped<R>>{nullptr, 0};
- if (!now.data->is_tombstone() && next.data != nullptr &&
- now.data->rec == next.data->rec && next.data->is_tombstone()) {
-
- pq.pop(); pq.pop();
- auto& cursor1 = cursors[tmp_n - now.version - 1];
- auto& cursor2 = cursors[tmp_n - next.version - 1];
- if (advance_cursor<Wrapped<R>>(cursor1)) pq.push(cursor1.ptr, now.version);
- if (advance_cursor<Wrapped<R>>(cursor2)) pq.push(cursor2.ptr, next.version);
- } else {
- auto& cursor = cursors[tmp_n - now.version - 1];
- if (!now.data->is_tombstone()) output.push_back(cursor.ptr->rec);
- pq.pop();
-
- if (advance_cursor<Wrapped<R>>(cursor)) pq.push(cursor.ptr, now.version);
- }
- }
-
- return output;*/
-
- size_t tot = 0;
- for (auto& result: results)
- if (result.size() > 0) tot += result[0].rec.key;
-
- return {{tot, 0}};
- }
-
- static void delete_query_state(void *state) {
- auto s = (TrieSplineState<R> *) state;
- delete s;
- }
-
- static void delete_buffer_query_state(void *state) {
- auto s = (TrieSplineBufferState<R> *) state;
- delete s;
- }
-};
-
}
diff --git a/tests/wss_tests.cpp b/tests/alias_tests.cpp
index cdc8001..c4a302d 100644
--- a/tests/wss_tests.cpp
+++ b/tests/alias_tests.cpp
@@ -1,7 +1,7 @@
/*
- * tests/wss_tests.cpp
+ * tests/alias_tests.cpp
*
- * Unit tests for WSS (Augmented B+Tree) shard
+ * Unit tests for Alias shard
*
* Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
* Dong Xie <dongx@psu.edu>
@@ -10,14 +10,15 @@
*
*/
-#include "shard/WSS.h"
+#include "shard/Alias.h"
+#include "query/wss.h"
#include "testing.h"
#include <check.h>
using namespace de;
-typedef WSS<WRec> Shard;
+typedef Alias<WRec> Shard;
START_TEST(t_mbuffer_init)
{
@@ -45,7 +46,7 @@ START_TEST(t_mbuffer_init)
}
-START_TEST(t_wss_init)
+START_TEST(t_alias_init)
{
size_t n = 512;
auto mbuffer1 = create_test_mbuffer<WRec>(n);
@@ -101,7 +102,7 @@ START_TEST(t_point_lookup)
size_t n = 10000;
auto buffer = create_double_seq_mbuffer<WRec>(n, false);
- auto wss = Shard(buffer);
+ auto alias = Shard(buffer);
for (size_t i=0; i<n; i++) {
WRec r;
@@ -109,7 +110,7 @@ START_TEST(t_point_lookup)
r.key = rec->rec.key;
r.value = rec->rec.value;
- auto result = wss.point_lookup(r);
+ auto result = alias.point_lookup(r);
ck_assert_ptr_nonnull(result);
ck_assert_int_eq(result->rec.key, r.key);
ck_assert_int_eq(result->rec.value, r.value);
@@ -125,14 +126,14 @@ START_TEST(t_point_lookup_miss)
size_t n = 10000;
auto buffer = create_double_seq_mbuffer<WRec>(n, false);
- auto wss = Shard(buffer);
+ auto alias = Shard(buffer);
for (size_t i=n + 100; i<2*n; i++) {
WRec r;
r.key = i;
r.value = i;
- auto result = wss.point_lookup(r);
+ auto result = alias.point_lookup(r);
ck_assert_ptr_null(result);
}
@@ -169,7 +170,7 @@ START_TEST(t_full_cancelation)
END_TEST
-START_TEST(t_wss_query)
+START_TEST(t_alias_query)
{
size_t n=1000;
auto buffer = create_weighted_mbuffer<WRec>(n);
@@ -179,15 +180,15 @@ START_TEST(t_wss_query)
size_t k = 1000;
size_t cnt[3] = {0};
- wss_query_parms<WRec> parms = {k};
+ wss::Parms<WRec> parms = {k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WSSQuery<WRec>::get_query_state(shard, &parms);
- ((WSSState<WRec> *) state)->sample_size = k;
- auto result = WSSQuery<WRec>::query(shard, state, &parms);
+ auto state = wss::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wss::State<WRec> *) state)->sample_size = k;
+ auto result = wss::Query<Shard, WRec>::query(shard, state, &parms);
total_samples += result.size();
@@ -195,7 +196,7 @@ START_TEST(t_wss_query)
cnt[result[j].rec.key - 1]++;
}
- WSSQuery<WRec>::delete_query_state(state);
+ wss::Query<Shard, WRec>::delete_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05));
@@ -209,7 +210,7 @@ START_TEST(t_wss_query)
END_TEST
-START_TEST(t_wss_query_merge)
+START_TEST(t_alias_query_merge)
{
size_t n=1000;
auto buffer = create_weighted_mbuffer<WRec>(n);
@@ -222,25 +223,25 @@ START_TEST(t_wss_query_merge)
size_t k = 1000;
size_t cnt[3] = {0};
- wss_query_parms<WRec> parms = {k};
+ wss::Parms<WRec> parms = {k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
std::vector<std::vector<Wrapped<WRec>>> results(2);
for (size_t i=0; i<1000; i++) {
- auto state1 = WSSQuery<WRec>::get_query_state(shard, &parms);
- ((WSSState<WRec> *) state1)->sample_size = k;
- results[0] = WSSQuery<WRec>::query(shard, state1, &parms);
+ auto state1 = wss::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wss::State<WRec> *) state1)->sample_size = k;
+ results[0] = wss::Query<Shard, WRec>::query(shard, state1, &parms);
- auto state2 = WSSQuery<WRec>::get_query_state(shard, &parms);
- ((WSSState<WRec> *) state2)->sample_size = k;
- results[1] = WSSQuery<WRec>::query(shard, state2, &parms);
+ auto state2 = wss::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wss::State<WRec> *) state2)->sample_size = k;
+ results[1] = wss::Query<Shard, WRec>::query(shard, state2, &parms);
- WSSQuery<WRec>::delete_query_state(state1);
- WSSQuery<WRec>::delete_query_state(state2);
+ wss::Query<Shard, WRec>::delete_query_state(state1);
+ wss::Query<Shard, WRec>::delete_query_state(state2);
}
- auto merged = WSSQuery<WRec>::merge(results, nullptr);
+ auto merged = wss::Query<Shard, WRec>::merge(results, nullptr);
ck_assert_int_eq(merged.size(), 2*k);
for (size_t i=0; i<merged.size(); i++) {
@@ -255,7 +256,7 @@ START_TEST(t_wss_query_merge)
END_TEST
-START_TEST(t_wss_buffer_query_scan)
+START_TEST(t_alias_buffer_query_scan)
{
size_t n=1000;
auto buffer = create_weighted_mbuffer<WRec>(n);
@@ -266,22 +267,22 @@ START_TEST(t_wss_buffer_query_scan)
size_t k = 1000;
size_t cnt[3] = {0};
- wss_query_parms<WRec> parms = {k};
+ wss::Parms<WRec> parms = {k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WSSQuery<WRec, false>::get_buffer_query_state(buffer, &parms);
- ((WSSBufferState<WRec> *) state)->sample_size = k;
- auto result = WSSQuery<WRec, false>::buffer_query(buffer, state, &parms);
+ auto state = wss::Query<Shard, WRec, false>::get_buffer_query_state(buffer, &parms);
+ ((wss::BufferState<WRec> *) state)->sample_size = k;
+ auto result = wss::Query<Shard, WRec, false>::buffer_query(buffer, state, &parms);
total_samples += result.size();
for (size_t j=0; j<result.size(); j++) {
cnt[result[j].rec.key - 1]++;
}
- WSSQuery<WRec, false>::delete_buffer_query_state(state);
+ wss::Query<Shard, WRec, false>::delete_buffer_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05));
@@ -294,7 +295,7 @@ START_TEST(t_wss_buffer_query_scan)
END_TEST
-START_TEST(t_wss_buffer_query_rejection)
+START_TEST(t_alias_buffer_query_rejection)
{
size_t n=1000;
auto buffer = create_weighted_mbuffer<WRec>(n);
@@ -305,15 +306,15 @@ START_TEST(t_wss_buffer_query_rejection)
size_t k = 1000;
size_t cnt[3] = {0};
- wss_query_parms<WRec> parms = {k};
+ wss::Parms<WRec> parms = {k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WSSQuery<WRec>::get_buffer_query_state(buffer, &parms);
- ((WSSBufferState<WRec> *) state)->sample_size = k;
- auto result = WSSQuery<WRec>::buffer_query(buffer, state, &parms);
+ auto state = wss::Query<Shard, WRec>::get_buffer_query_state(buffer, &parms);
+ ((wss::BufferState<WRec> *) state)->sample_size = k;
+ auto result = wss::Query<Shard, WRec>::buffer_query(buffer, state, &parms);
total_samples += result.size();
@@ -321,7 +322,7 @@ START_TEST(t_wss_buffer_query_rejection)
cnt[result[j].rec.key - 1]++;
}
- WSSQuery<WRec>::delete_buffer_query_state(state);
+ wss::Query<Shard, WRec>::delete_buffer_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .1));
@@ -336,32 +337,32 @@ END_TEST
Suite *unit_testing()
{
- Suite *unit = suite_create("WSS Shard Unit Testing");
+ Suite *unit = suite_create("Alias Shard Unit Testing");
- TCase *create = tcase_create("de::WSS constructor Testing");
+ TCase *create = tcase_create("de::Alias constructor Testing");
tcase_add_test(create, t_mbuffer_init);
- tcase_add_test(create, t_wss_init);
+ tcase_add_test(create, t_alias_init);
tcase_set_timeout(create, 100);
suite_add_tcase(unit, create);
- TCase *tombstone = tcase_create("de:WSS::tombstone cancellation Testing");
+ TCase *tombstone = tcase_create("de:Alias::tombstone cancellation Testing");
tcase_add_test(tombstone, t_full_cancelation);
suite_add_tcase(unit, tombstone);
- TCase *lookup = tcase_create("de:WSS:point_lookup Testing");
+ TCase *lookup = tcase_create("de:Alias:point_lookup Testing");
tcase_add_test(lookup, t_point_lookup);
tcase_add_test(lookup, t_point_lookup_miss);
suite_add_tcase(unit, lookup);
- TCase *sampling = tcase_create("de:WSS::WSSQuery Testing");
- tcase_add_test(sampling, t_wss_query);
- tcase_add_test(sampling, t_wss_query_merge);
- tcase_add_test(sampling, t_wss_buffer_query_rejection);
- tcase_add_test(sampling, t_wss_buffer_query_scan);
+ TCase *sampling = tcase_create("de:Alias::AliasQuery Testing");
+ tcase_add_test(sampling, t_alias_query);
+ tcase_add_test(sampling, t_alias_query_merge);
+ tcase_add_test(sampling, t_alias_buffer_query_rejection);
+ tcase_add_test(sampling, t_alias_buffer_query_scan);
suite_add_tcase(unit, sampling);
return unit;
diff --git a/tests/wirs_tests.cpp b/tests/augbtree_tests.cpp
index a72f950..878af82 100644
--- a/tests/wirs_tests.cpp
+++ b/tests/augbtree_tests.cpp
@@ -1,7 +1,7 @@
/*
* tests/wirs_tests.cpp
*
- * Unit tests for WIRS (Augmented B+Tree) shard
+ * Unit tests for AugBTree (Augmented B+Tree) shard
*
* Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
* Dong Xie <dongx@psu.edu>
@@ -10,14 +10,15 @@
*
*/
-#include "shard/WIRS.h"
+#include "shard/AugBTree.h"
+#include "query/wirs.h"
#include "testing.h"
#include <check.h>
using namespace de;
-typedef WIRS<WRec> Shard;
+typedef AugBTree<WRec> Shard;
START_TEST(t_mbuffer_init)
{
@@ -183,15 +184,15 @@ START_TEST(t_wirs_query)
size_t k = 1000;
size_t cnt[3] = {0};
- wirs_query_parms<WRec> parms = {lower_key, upper_key, k};
+ wirs::Parms<WRec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WIRSQuery<WRec>::get_query_state(shard, &parms);
- ((WIRSState<WRec> *) state)->sample_size = k;
- auto result = WIRSQuery<WRec>::query(shard, state, &parms);
+ auto state = wirs::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wirs::State<WRec> *) state)->sample_size = k;
+ auto result = wirs::Query<Shard, WRec>::query(shard, state, &parms);
total_samples += result.size();
@@ -199,7 +200,7 @@ START_TEST(t_wirs_query)
cnt[result[j].rec.key - 1]++;
}
- WIRSQuery<WRec>::delete_query_state(state);
+ wirs::Query<Shard, WRec>::delete_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05));
@@ -226,25 +227,25 @@ START_TEST(t_wirs_query_merge)
size_t k = 1000;
size_t cnt[3] = {0};
- wirs_query_parms<WRec> parms = {lower_key, upper_key, k};
+ wirs::Parms<WRec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
std::vector<std::vector<Wrapped<WRec>>> results(2);
for (size_t i=0; i<1000; i++) {
- auto state1 = WIRSQuery<WRec>::get_query_state(shard, &parms);
- ((WIRSState<WRec> *) state1)->sample_size = k;
- results[0] = WIRSQuery<WRec>::query(shard, state1, &parms);
+ auto state1 = wirs::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wirs::State<WRec> *) state1)->sample_size = k;
+ results[0] = wirs::Query<Shard, WRec>::query(shard, state1, &parms);
- auto state2 = WIRSQuery<WRec>::get_query_state(shard, &parms);
- ((WIRSState<WRec> *) state2)->sample_size = k;
- results[1] = WIRSQuery<WRec>::query(shard, state2, &parms);
+ auto state2 = wirs::Query<Shard, WRec>::get_query_state(shard, &parms);
+ ((wirs::State<WRec> *) state2)->sample_size = k;
+ results[1] = wirs::Query<Shard, WRec>::query(shard, state2, &parms);
- WIRSQuery<WRec>::delete_query_state(state1);
- WIRSQuery<WRec>::delete_query_state(state2);
+ wirs::Query<Shard, WRec>::delete_query_state(state1);
+ wirs::Query<Shard, WRec>::delete_query_state(state2);
}
- auto merged = WIRSQuery<WRec>::merge(results, nullptr);
+ auto merged = wirs::Query<Shard, WRec>::merge(results, nullptr);
ck_assert_int_eq(merged.size(), 2*k);
for (size_t i=0; i<merged.size(); i++) {
@@ -270,15 +271,15 @@ START_TEST(t_wirs_buffer_query_scan)
size_t k = 1000;
size_t cnt[3] = {0};
- wirs_query_parms<WRec> parms = {lower_key, upper_key, k};
+ wirs::Parms<WRec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WIRSQuery<WRec, false>::get_buffer_query_state(buffer, &parms);
- ((WIRSBufferState<WRec> *) state)->sample_size = k;
- auto result = WIRSQuery<WRec, false>::buffer_query(buffer, state, &parms);
+ auto state = wirs::Query<Shard, WRec, false>::get_buffer_query_state(buffer, &parms);
+ ((wirs::BufferState<WRec> *) state)->sample_size = k;
+ auto result = wirs::Query<Shard, WRec, false>::buffer_query(buffer, state, &parms);
total_samples += result.size();
@@ -286,7 +287,7 @@ START_TEST(t_wirs_buffer_query_scan)
cnt[result[j].rec.key - 1]++;
}
- WIRSQuery<WRec, false>::delete_buffer_query_state(state);
+ wirs::Query<Shard, WRec, false>::delete_buffer_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05));
@@ -310,15 +311,15 @@ START_TEST(t_wirs_buffer_query_rejection)
size_t k = 1000;
size_t cnt[3] = {0};
- wirs_query_parms<WRec> parms = {lower_key, upper_key, k};
+ wirs::Parms<WRec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = WIRSQuery<WRec>::get_buffer_query_state(buffer, &parms);
- ((WIRSBufferState<WRec> *) state)->sample_size = k;
- auto result = WIRSQuery<WRec>::buffer_query(buffer, state, &parms);
+ auto state = wirs::Query<Shard, WRec>::get_buffer_query_state(buffer, &parms);
+ ((wirs::BufferState<WRec> *) state)->sample_size = k;
+ auto result = wirs::Query<Shard, WRec>::buffer_query(buffer, state, &parms);
total_samples += result.size();
@@ -326,7 +327,7 @@ START_TEST(t_wirs_buffer_query_rejection)
cnt[result[j].rec.key - 1]++;
}
- WIRSQuery<WRec>::delete_buffer_query_state(state);
+ wirs::Query<Shard, WRec>::delete_buffer_query_state(state);
}
ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .05));
@@ -341,27 +342,27 @@ END_TEST
Suite *unit_testing()
{
- Suite *unit = suite_create("WIRS Shard Unit Testing");
+ Suite *unit = suite_create("AugBTree Shard Unit Testing");
- TCase *create = tcase_create("de::WIRS constructor Testing");
+ TCase *create = tcase_create("de::AugBTree constructor Testing");
tcase_add_test(create, t_mbuffer_init);
tcase_add_test(create, t_wirs_init);
tcase_set_timeout(create, 100);
suite_add_tcase(unit, create);
- TCase *tombstone = tcase_create("de:WIRS::tombstone cancellation Testing");
+ TCase *tombstone = tcase_create("de:AugBTree::tombstone cancellation Testing");
tcase_add_test(tombstone, t_full_cancelation);
suite_add_tcase(unit, tombstone);
- TCase *lookup = tcase_create("de:WIRS:point_lookup Testing");
+ TCase *lookup = tcase_create("de:AugBTree:point_lookup Testing");
tcase_add_test(lookup, t_point_lookup);
tcase_add_test(lookup, t_point_lookup_miss);
suite_add_tcase(unit, lookup);
- TCase *sampling = tcase_create("de:WIRS::WIRSQuery Testing");
+ TCase *sampling = tcase_create("de:AugBTree::AugBTreeQuery Testing");
tcase_add_test(sampling, t_wirs_query);
tcase_add_test(sampling, t_wirs_query_merge);
tcase_add_test(sampling, t_wirs_buffer_query_rejection);
diff --git a/tests/de_level_tag.cpp b/tests/de_level_tag.cpp
index 822ccc2..f3f099d 100644
--- a/tests/de_level_tag.cpp
+++ b/tests/de_level_tag.cpp
@@ -15,11 +15,12 @@
#include "testing.h"
#include "framework/DynamicExtension.h"
-#include "shard/WIRS.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
#include <check.h>
using namespace de;
-typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE;
+typedef DynamicExtension<Rec, ISAMTree<Rec>, rq::Query<ISAMTree<Rec>, Rec>, LayoutPolicy::LEVELING, DeletePolicy::TAGGING, SerialScheduler> DE;
#include "dynamic_extension_tests.inc"
diff --git a/tests/de_level_tomb.cpp b/tests/de_level_tomb.cpp
index ba3f784..912743c 100644
--- a/tests/de_level_tomb.cpp
+++ b/tests/de_level_tomb.cpp
@@ -15,11 +15,12 @@
#include "testing.h"
#include "framework/DynamicExtension.h"
-#include "shard/WIRS.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
#include <check.h>
using namespace de;
-typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE;
+typedef DynamicExtension<Rec, ISAMTree<Rec>, rq::Query<ISAMTree<Rec>, Rec>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
#include "dynamic_extension_tests.inc"
diff --git a/tests/de_tier_tag.cpp b/tests/de_tier_tag.cpp
index 2c263eb..712c203 100644
--- a/tests/de_tier_tag.cpp
+++ b/tests/de_tier_tag.cpp
@@ -16,11 +16,12 @@
#include "testing.h"
#include "framework/DynamicExtension.h"
#include "framework/scheduling/SerialScheduler.h"
-#include "shard/WIRS.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
#include <check.h>
using namespace de;
-typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
+typedef DynamicExtension<Rec, ISAMTree<Rec>, rq::Query<ISAMTree<Rec>, Rec>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE;
#include "dynamic_extension_tests.inc"
diff --git a/tests/de_tier_tomb.cpp b/tests/de_tier_tomb.cpp
index 509494a..f25ec34 100644
--- a/tests/de_tier_tomb.cpp
+++ b/tests/de_tier_tomb.cpp
@@ -15,11 +15,12 @@
#include "testing.h"
#include "framework/DynamicExtension.h"
-#include "shard/WIRS.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
#include <check.h>
using namespace de;
-typedef DynamicExtension<WRec, WIRS<WRec>, WIRSQuery<WRec>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
+typedef DynamicExtension<Rec, ISAMTree<Rec>, rq::Query<ISAMTree<Rec>, Rec>, LayoutPolicy::TEIRING, DeletePolicy::TOMBSTONE, SerialScheduler> DE;
#include "dynamic_extension_tests.inc"
diff --git a/tests/dynamic_extension_tests.inc b/tests/dynamic_extension_tests.inc
index df88d98..bcb5ae6 100644
--- a/tests/dynamic_extension_tests.inc
+++ b/tests/dynamic_extension_tests.inc
@@ -12,77 +12,76 @@
START_TEST(t_create)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
+ ck_assert_ptr_nonnull(test_de);
+ ck_assert_int_eq(test_de->get_record_count(), 0);
+ ck_assert_int_eq(test_de->get_height(), 0);
- ck_assert_ptr_nonnull(ext_wirs);
- ck_assert_int_eq(ext_wirs->get_record_count(), 0);
- ck_assert_int_eq(ext_wirs->get_height(), 0);
-
- delete ext_wirs;
+ delete test_de;
}
END_TEST
START_TEST(t_insert)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
uint64_t key = 0;
uint32_t val = 0;
for (size_t i=0; i<100; i++) {
- WRec r = {key, val, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
+ Rec r = {key, val};
+ ck_assert_int_eq(test_de->insert(r), 1);
key++;
val++;
}
- ck_assert_int_eq(ext_wirs->get_height(), 0);
- ck_assert_int_eq(ext_wirs->get_record_count(), 100);
+ ck_assert_int_eq(test_de->get_height(), 0);
+ ck_assert_int_eq(test_de->get_record_count(), 100);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
START_TEST(t_debug_insert)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
uint64_t key = 0;
uint32_t val = 0;
for (size_t i=0; i<1000; i++) {
- WRec r = {key, val, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
- ck_assert_int_eq(ext_wirs->get_record_count(), i+1);
+ Rec r = {key, val};
+ ck_assert_int_eq(test_de->insert(r), 1);
+ ck_assert_int_eq(test_de->get_record_count(), i+1);
key++;
val++;
}
- delete ext_wirs;
+ delete test_de;
}
END_TEST
START_TEST(t_insert_with_mem_merges)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
uint64_t key = 0;
uint32_t val = 0;
for (size_t i=0; i<300; i++) {
- WRec r = {key, val, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
+ Rec r = {key, val};
+ ck_assert_int_eq(test_de->insert(r), 1);
key++;
val++;
}
- ext_wirs->await_next_epoch();
+ test_de->await_next_epoch();
- ck_assert_int_eq(ext_wirs->get_record_count(), 300);
- ck_assert_int_eq(ext_wirs->get_height(), 1);
+ ck_assert_int_eq(test_de->get_record_count(), 300);
+ ck_assert_int_eq(test_de->get_height(), 1);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
@@ -90,13 +89,13 @@ END_TEST
/*
START_TEST(t_range_sample_memtable)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
uint64_t key = 0;
uint32_t val = 0;
for (size_t i=0; i<100; i++) {
- WRec r = {key, val, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
+ Rec r = {key, val};
+ ck_assert_int_eq(test_de->insert(r), 1);
key++;
val++;
}
@@ -106,9 +105,9 @@ START_TEST(t_range_sample_memtable)
char *buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE);
char *util_buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE);
- WRec sample_set[100];
+ Rec sample_set[100];
- ext_wirs->range_sample(sample_set, lower_bound, upper_bound, 100);
+ test_de->range_sample(sample_set, lower_bound, upper_bound, 100);
for(size_t i=0; i<100; i++) {
ck_assert_int_le(sample_set[i].key, upper_bound);
@@ -118,20 +117,20 @@ START_TEST(t_range_sample_memtable)
free(buf);
free(util_buf);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
START_TEST(t_range_sample_memlevels)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
uint64_t key = 0;
uint32_t val = 0;
for (size_t i=0; i<300; i++) {
- WRec r = {key, val, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
+ Rec r = {key, val};
+ ck_assert_int_eq(test_de->insert(r), 1);
key++;
val++;
}
@@ -142,8 +141,8 @@ START_TEST(t_range_sample_memlevels)
char *buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE);
char *util_buf = (char *) std::aligned_alloc(SECTOR_SIZE, PAGE_SIZE);
- WRec sample_set[100];
- ext_wirs->range_sample(sample_set, lower_bound, upper_bound, 100);
+ Rec sample_set[100];
+ test_de->range_sample(sample_set, lower_bound, upper_bound, 100);
for(size_t i=0; i<100; i++) {
ck_assert_int_le(sample_set[i].key, upper_bound);
@@ -153,33 +152,19 @@ START_TEST(t_range_sample_memlevels)
free(buf);
free(util_buf);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
*/
-START_TEST(t_range_sample_weighted)
+START_TEST(t_range_query)
{
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
size_t n = 10000;
std::vector<uint64_t> keys;
-
- uint64_t key = 1;
- for (size_t i=0; i< n / 2; i++) {
- keys.push_back(key);
- }
-
- // put in a quarter of the count with weight two.
- key = 2;
- for (size_t i=0; i< n / 4; i++) {
- keys.push_back(key);
- }
-
- // the remaining quarter with weight four.
- key = 3;
- for (size_t i=0; i< n / 4; i++) {
- keys.push_back(key);
+ for (size_t i=0; i<n; i++) {
+ keys.push_back(rand() % 25000);
}
std::random_device rd;
@@ -187,51 +172,33 @@ START_TEST(t_range_sample_weighted)
std::shuffle(keys.begin(), keys.end(), gen);
for (size_t i=0; i<keys.size(); i++) {
- uint64_t weight;
- if (keys[i] == 1) {
- weight = 2;
- } else if (keys[i] == 2) {
- weight = 4;
- } else {
- weight = 8;
- }
-
- WRec r = {keys[i], (uint32_t) i, weight};
- ext_wirs->insert(r);
+ Rec r = {keys[i], (uint32_t) i};
+ ck_assert_int_eq(test_de->insert(r), 1);
}
- ext_wirs->await_next_epoch();
+ test_de->await_next_epoch();
- size_t k = 1000;
- uint64_t lower_key = 0;
- uint64_t upper_key = 5;
+ std::sort(keys.begin(), keys.end());
- size_t cnt[3] = {0};
- size_t total_samples = 0;
+ auto idx = rand() % (keys.size() - 250);
- wirs_query_parms<WRec> p;
+ uint64_t lower_key = keys[idx];
+ uint64_t upper_key = keys[idx + 250];
+
+ rq::Parms<Rec> p;
p.lower_bound = lower_key;
p.upper_bound = upper_key;
- p.sample_size = k;
- p.rng = gsl_rng_alloc(gsl_rng_mt19937);
-
- for (size_t i=0; i<1000; i++) {
- auto result = ext_wirs->query(&p);
- auto r = result.get();
- total_samples += r.size();
+ auto result = test_de->query(&p);
+ auto r = result.get();
+ std::sort(r.begin(), r.end());
+ ck_assert_int_eq(r.size(), 251);
- for (size_t j=0; j<r.size(); j++) {
- cnt[r[j].key - 1]++;
- }
+ for (size_t i=0; i<r.size(); i++) {
+ ck_assert_int_eq(r[i].key, keys[idx + i]);
}
- ck_assert(roughly_equal(cnt[0], (double) total_samples/4.0, total_samples, .03));
- ck_assert(roughly_equal(cnt[1], (double) total_samples/4.0, total_samples, .03));
- ck_assert(roughly_equal(cnt[2], (double) total_samples/2.0, total_samples, .03));
-
- gsl_rng_free(p.rng);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
@@ -239,7 +206,7 @@ END_TEST
START_TEST(t_tombstone_merging_01)
{
size_t reccnt = 100000;
- auto ext_wirs = new DE(100, 2, .01);
+ auto test_de = new DE(100, 2, .01);
auto rng = gsl_rng_alloc(gsl_rng_mt19937);
@@ -259,16 +226,16 @@ START_TEST(t_tombstone_merging_01)
size_t deletes = 0;
size_t cnt=0;
for (auto rec : records) {
- WRec r = {rec.first, rec.second, 1};
- ck_assert_int_eq(ext_wirs->insert(r), 1);
+ Rec r = {rec.first, rec.second};
+ ck_assert_int_eq(test_de->insert(r), 1);
if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
std::vector<std::pair<uint64_t, uint32_t>> del_vec;
std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()});
for (size_t i=0; i<del_vec.size(); i++) {
- WRec dr = {del_vec[i].first, del_vec[i].second, 1};
- ext_wirs->erase(dr);
+ Rec dr = {del_vec[i].first, del_vec[i].second};
+ test_de->erase(dr);
deletes++;
to_delete.erase(del_vec[i]);
deleted.insert(del_vec[i]);
@@ -279,26 +246,26 @@ START_TEST(t_tombstone_merging_01)
to_delete.insert(rec);
}
- ck_assert(ext_wirs->validate_tombstone_proportion());
+ ck_assert(test_de->validate_tombstone_proportion());
}
- ext_wirs->await_next_epoch();
+ test_de->await_next_epoch();
- ck_assert(ext_wirs->validate_tombstone_proportion());
+ ck_assert(test_de->validate_tombstone_proportion());
gsl_rng_free(rng);
- delete ext_wirs;
+ delete test_de;
}
END_TEST
DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
auto rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto ext_wirs = new DE(1000, 2, 1);
+ auto test_de = new DE(1000, 2, 1);
- std::set<WRec> records;
- std::set<WRec> to_delete;
- std::set<WRec> deleted;
+ std::set<Rec> records;
+ std::set<Rec> to_delete;
+ std::set<Rec> deleted;
while (records.size() < reccnt) {
uint64_t key = rand();
@@ -311,14 +278,14 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
size_t deletes = 0;
for (auto rec : records) {
- ck_assert_int_eq(ext_wirs->insert(rec), 1);
+ ck_assert_int_eq(test_de->insert(rec), 1);
if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
- std::vector<WRec> del_vec;
+ std::vector<Rec> del_vec;
std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()});
for (size_t i=0; i<del_vec.size(); i++) {
- ext_wirs->erase(del_vec[i]);
+ test_de->erase(del_vec[i]);
deletes++;
to_delete.erase(del_vec[i]);
deleted.insert(del_vec[i]);
@@ -332,7 +299,7 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) {
gsl_rng_free(rng);
- return ext_wirs;
+ return test_de;
}
START_TEST(t_static_structure)
@@ -340,11 +307,11 @@ START_TEST(t_static_structure)
auto rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t reccnt = 100000;
- auto ext_wirs = new DE(100, 2, 1);
+ auto test_de = new DE(100, 2, 1);
- std::set<WRec> records;
- std::set<WRec> to_delete;
- std::set<WRec> deleted;
+ std::set<Rec> records;
+ std::set<Rec> to_delete;
+ std::set<Rec> deleted;
while (records.size() < reccnt) {
uint64_t key = rand();
@@ -352,7 +319,7 @@ START_TEST(t_static_structure)
if (records.find({key, val}) != records.end()) continue;
- records.insert({key, val, 1});
+ records.insert({key, val});
}
size_t deletes = 0;
@@ -360,15 +327,15 @@ START_TEST(t_static_structure)
size_t k=0;
for (auto rec : records) {
k++;
- ck_assert_int_eq(ext_wirs->insert(rec), 1);
+ ck_assert_int_eq(test_de->insert(rec), 1);
t_reccnt++;
if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) {
- std::vector<WRec> del_vec;
+ std::vector<Rec> del_vec;
std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()});
for (size_t i=0; i<del_vec.size(); i++) {
- ck_assert_int_eq(ext_wirs->erase(del_vec[i]), 1);
+ ck_assert_int_eq(test_de->erase(del_vec[i]), 1);
deletes++;
to_delete.erase(del_vec[i]);
@@ -381,7 +348,7 @@ START_TEST(t_static_structure)
}
}
- auto flat = ext_wirs->create_static_structure();
+ auto flat = test_de->create_static_structure();
ck_assert_int_eq(flat->get_record_count(), reccnt - deletes);
uint64_t prev_key = 0;
@@ -393,7 +360,7 @@ START_TEST(t_static_structure)
gsl_rng_free(rng);
delete flat;
- delete ext_wirs;
+ delete test_de;
}
END_TEST
@@ -406,21 +373,15 @@ Suite *unit_testing()
tcase_add_test(create, t_create);
suite_add_tcase(unit, create);
- TCase *insert = tcase_create("de::DynamicExtension<WIRS>::insert Testing");
+ TCase *insert = tcase_create("de::DynamicExtension<ISAMTree>::insert Testing");
tcase_add_test(insert, t_insert);
tcase_add_test(insert, t_insert_with_mem_merges);
tcase_add_test(insert, t_debug_insert);
suite_add_tcase(unit, insert);
- TCase *sampling = tcase_create("de::DynamicExtension<WIRS>::range_sample Testing");
-
- tcase_add_test(sampling, t_range_sample_weighted);
- suite_add_tcase(unit, sampling);
-
- /*
- tcase_add_test(sampling, t_range_sample_memtable);
- tcase_add_test(sampling, t_range_sample_memlevels);
- */
+ TCase *query = tcase_create("de::DynamicExtension<ISAMTree>::range_query Testing");
+ tcase_add_test(query, t_range_query);
+ suite_add_tcase(unit, query);
TCase *ts = tcase_create("de::DynamicExtension::tombstone_compaction Testing");
tcase_add_test(ts, t_tombstone_merging_01);
diff --git a/tests/internal_level_tests.cpp b/tests/internal_level_tests.cpp
index 056e458..1009ea8 100644
--- a/tests/internal_level_tests.cpp
+++ b/tests/internal_level_tests.cpp
@@ -9,7 +9,8 @@
* All rights reserved. Published under the Modified BSD License.
*
*/
-#include "shard/WIRS.h"
+#include "shard/ISAMTree.h"
+#include "query/rangequery.h"
#include "framework/structure/InternalLevel.h"
#include "framework/interface/Record.h"
#include "framework/interface/Query.h"
@@ -21,12 +22,12 @@
using namespace de;
-typedef InternalLevel<WRec, WIRS<WRec>, WIRSQuery<WRec>> ILevel;
+typedef InternalLevel<Rec, ISAMTree<Rec>, rq::Query<ISAMTree<Rec>, Rec>> ILevel;
START_TEST(t_memlevel_merge)
{
- auto tbl1 = create_test_mbuffer<WRec>(100);
- auto tbl2 = create_test_mbuffer<WRec>(100);
+ auto tbl1 = create_test_mbuffer<Rec>(100);
+ auto tbl2 = create_test_mbuffer<Rec>(100);
auto base_level = new ILevel(1, 1);
base_level->append_buffer(tbl1);
@@ -48,8 +49,8 @@ START_TEST(t_memlevel_merge)
ILevel *create_test_memlevel(size_t reccnt) {
- auto tbl1 = create_test_mbuffer<WRec>(reccnt/2);
- auto tbl2 = create_test_mbuffer<WRec>(reccnt/2);
+ auto tbl1 = create_test_mbuffer<Rec>(reccnt/2);
+ auto tbl2 = create_test_mbuffer<Rec>(reccnt/2);
auto base_level = new ILevel(1, 2);
base_level->append_buffer(tbl1);
diff --git a/tests/memisam_tests.cpp b/tests/memisam_tests.cpp
index 0ae97dc..d3b8087 100644
--- a/tests/memisam_tests.cpp
+++ b/tests/memisam_tests.cpp
@@ -1,7 +1,7 @@
/*
- * tests/irs_tests.cpp
+ * tests/isam_tests.cpp
*
- * Unit tests for MemISAM (Augmented B+Tree) shard
+ * Unit tests for ISAM Tree shard
*
* Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
* Dong Xie <dongx@psu.edu>
@@ -10,14 +10,15 @@
*
*/
-#include "shard/MemISAM.h"
+#include "shard/ISAMTree.h"
+#include "query/irs.h"
#include "testing.h"
#include <check.h>
using namespace de;
-typedef MemISAM<Rec> Shard;
+typedef ISAMTree<Rec> Shard;
START_TEST(t_mbuffer_init)
{
@@ -181,15 +182,15 @@ START_TEST(t_irs_query)
size_t k = 100;
size_t cnt[3] = {0};
- irs_query_parms<Rec> parms = {lower_key, upper_key, k};
+ irs::Parms<Rec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = IRSQuery<Rec, false>::get_query_state(&isam, &parms);
- ((IRSState<WRec> *) state)->sample_size = k;
- auto result = IRSQuery<Rec, false>::query(&isam, state, &parms);
+ auto state = irs::Query<Shard, Rec, false>::get_query_state(&isam, &parms);
+ ((irs::State<WRec> *) state)->sample_size = k;
+ auto result = irs::Query<Shard, Rec, false>::query(&isam, state, &parms);
ck_assert_int_eq(result.size(), k);
@@ -198,7 +199,7 @@ START_TEST(t_irs_query)
ck_assert_int_ge(rec.rec.key, lower_key);
}
- IRSQuery<Rec, false>::delete_query_state(state);
+ irs::Query<Shard, Rec, false>::delete_query_state(state);
}
gsl_rng_free(parms.rng);
@@ -220,25 +221,25 @@ START_TEST(t_irs_query_merge)
size_t k = 1000;
size_t cnt[3] = {0};
- irs_query_parms<Rec> parms = {lower_key, upper_key, k};
+ irs::Parms<Rec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
std::vector<std::vector<de::Wrapped<Rec>>> results(2);
for (size_t i=0; i<1000; i++) {
- auto state1 = IRSQuery<Rec>::get_query_state(&shard, &parms);
- ((IRSState<WRec> *) state1)->sample_size = k;
- results[0] = IRSQuery<Rec>::query(&shard, state1, &parms);
+ auto state1 = irs::Query<Shard, Rec>::get_query_state(&shard, &parms);
+ ((irs::State<WRec> *) state1)->sample_size = k;
+ results[0] = irs::Query<Shard, Rec>::query(&shard, state1, &parms);
- auto state2 = IRSQuery<Rec>::get_query_state(&shard, &parms);
- ((IRSState<WRec> *) state2)->sample_size = k;
- results[1] = IRSQuery<Rec>::query(&shard, state2, &parms);
+ auto state2 = irs::Query<Shard, Rec>::get_query_state(&shard, &parms);
+ ((irs::State<WRec> *) state2)->sample_size = k;
+ results[1] = irs::Query<Shard, Rec>::query(&shard, state2, &parms);
- IRSQuery<Rec>::delete_query_state(state1);
- IRSQuery<Rec>::delete_query_state(state2);
+ irs::Query<Shard, Rec>::delete_query_state(state1);
+ irs::Query<Shard, Rec>::delete_query_state(state2);
}
- auto merged = IRSQuery<Rec>::merge(results, nullptr);
+ auto merged = irs::Query<Shard, Rec>::merge(results, nullptr);
ck_assert_int_eq(merged.size(), 2*k);
for (size_t i=0; i<merged.size(); i++) {
@@ -263,15 +264,15 @@ START_TEST(t_irs_buffer_query_scan)
size_t k = 100;
size_t cnt[3] = {0};
- irs_query_parms<Rec> parms = {lower_key, upper_key, k};
+ irs::Parms<Rec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = IRSQuery<Rec, false>::get_buffer_query_state(buffer, &parms);
- ((IRSBufferState<WRec> *) state)->sample_size = k;
- auto result = IRSQuery<Rec, false>::buffer_query(buffer, state, &parms);
+ auto state = irs::Query<Shard, Rec, false>::get_buffer_query_state(buffer, &parms);
+ ((irs::BufferState<WRec> *) state)->sample_size = k;
+ auto result = irs::Query<Shard, Rec, false>::buffer_query(buffer, state, &parms);
ck_assert_int_eq(result.size(), k);
@@ -280,7 +281,7 @@ START_TEST(t_irs_buffer_query_scan)
ck_assert_int_ge(rec.rec.key, lower_key);
}
- IRSQuery<Rec, false>::delete_buffer_query_state(state);
+ irs::Query<Shard, Rec, false>::delete_buffer_query_state(state);
}
gsl_rng_free(parms.rng);
@@ -300,15 +301,15 @@ START_TEST(t_irs_buffer_query_rejection)
size_t k = 10000;
size_t cnt[3] = {0};
- irs_query_parms<Rec> parms = {lower_key, upper_key, k};
+ irs::Parms<Rec> parms = {lower_key, upper_key, k};
parms.rng = gsl_rng_alloc(gsl_rng_mt19937);
size_t total_samples = 0;
for (size_t i=0; i<1000; i++) {
- auto state = IRSQuery<Rec>::get_buffer_query_state(buffer, &parms);
- ((IRSBufferState<WRec> *) state)->sample_size = k;
- auto result = IRSQuery<Rec>::buffer_query(buffer, state, &parms);
+ auto state = irs::Query<Shard, Rec>::get_buffer_query_state(buffer, &parms);
+ ((irs::BufferState<WRec> *) state)->sample_size = k;
+ auto result = irs::Query<Shard, Rec>::buffer_query(buffer, state, &parms);
ck_assert_int_gt(result.size(), 0);
ck_assert_int_le(result.size(), k);
@@ -318,7 +319,7 @@ START_TEST(t_irs_buffer_query_rejection)
ck_assert_int_ge(rec.rec.key, lower_key);
}
- IRSQuery<Rec>::delete_buffer_query_state(state);
+ irs::Query<Shard, Rec>::delete_buffer_query_state(state);
}
gsl_rng_free(parms.rng);
@@ -329,27 +330,27 @@ END_TEST
Suite *unit_testing()
{
- Suite *unit = suite_create("MemISAM Shard Unit Testing");
+ Suite *unit = suite_create("ISAMTree Shard Unit Testing");
- TCase *create = tcase_create("de::MemISAM constructor Testing");
+ TCase *create = tcase_create("de::ISAMTree constructor Testing");
tcase_add_test(create, t_mbuffer_init);
tcase_add_test(create, t_irs_init);
tcase_set_timeout(create, 100);
suite_add_tcase(unit, create);
- TCase *tombstone = tcase_create("de:MemISAM::tombstone cancellation Testing");
+ TCase *tombstone = tcase_create("de:ISAMTree::tombstone cancellation Testing");
tcase_add_test(tombstone, t_full_cancelation);
suite_add_tcase(unit, tombstone);
- TCase *lookup = tcase_create("de:MemISAM:point_lookup Testing");
+ TCase *lookup = tcase_create("de:ISAMTree:point_lookup Testing");
tcase_add_test(lookup, t_point_lookup);
tcase_add_test(lookup, t_point_lookup_miss);
suite_add_tcase(unit, lookup);
- TCase *sampling = tcase_create("de:MemISAM::MemISAMQuery Testing");
+ TCase *sampling = tcase_create("de:ISAMTree::ISAMTreeQuery Testing");
tcase_add_test(sampling, t_irs_query);
tcase_add_test(sampling, t_irs_query_merge);
tcase_add_test(sampling, t_irs_buffer_query_rejection);
diff --git a/tests/pgm_tests.cpp b/tests/pgm_tests.cpp
index 0552417..1565384 100644
--- a/tests/pgm_tests.cpp
+++ b/tests/pgm_tests.cpp
@@ -11,6 +11,7 @@
*/
#include "shard/PGM.h"
+#include "query/rangequery.h"
#include "testing.h"
#include <check.h>
@@ -144,13 +145,13 @@ START_TEST(t_range_query)
auto buffer = create_sequential_mbuffer<Rec>(100, 1000);
auto shard = Shard(buffer);
- pgm_range_query_parms<Rec> parms;
+ rq::Parms<Rec> parms;
parms.lower_bound = 300;
parms.upper_bound = 500;
- auto state = PGMRangeQuery<Rec>::get_query_state(&shard, &parms);
- auto result = PGMRangeQuery<Rec>::query(&shard, state, &parms);
- PGMRangeQuery<Rec>::delete_query_state(state);
+ auto state = rq::Query<Shard, Rec>::get_query_state(&shard, &parms);
+ auto result = rq::Query<Shard, Rec>::query(&shard, state, &parms);
+ rq::Query<Shard, Rec>::delete_query_state(state);
ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
for (size_t i=0; i<result.size(); i++) {
@@ -167,13 +168,13 @@ START_TEST(t_buffer_range_query)
{
auto buffer = create_sequential_mbuffer<Rec>(100, 1000);
- pgm_range_query_parms<Rec> parms;
+ rq::Parms<Rec> parms;
parms.lower_bound = 300;
parms.upper_bound = 500;
- auto state = PGMRangeQuery<Rec>::get_buffer_query_state(buffer, &parms);
- auto result = PGMRangeQuery<Rec>::buffer_query(buffer, state, &parms);
- PGMRangeQuery<Rec>::delete_buffer_query_state(state);
+ auto state = rq::Query<Shard, Rec>::get_buffer_query_state(buffer, &parms);
+ auto result = rq::Query<Shard, Rec>::buffer_query(buffer, state, &parms);
+ rq::Query<Shard, Rec>::delete_buffer_query_state(state);
ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
for (size_t i=0; i<result.size(); i++) {
@@ -194,21 +195,21 @@ START_TEST(t_range_query_merge)
auto shard1 = Shard(buffer1);
auto shard2 = Shard(buffer2);
- pgm_range_query_parms<Rec> parms;
+ rq::Parms<Rec> parms;
parms.lower_bound = 150;
parms.upper_bound = 500;
size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200;
- auto state1 = PGMRangeQuery<Rec>::get_query_state(&shard1, &parms);
- auto state2 = PGMRangeQuery<Rec>::get_query_state(&shard2, &parms);
+ auto state1 = rq::Query<Shard, Rec>::get_query_state(&shard1, &parms);
+ auto state2 = rq::Query<Shard, Rec>::get_query_state(&shard2, &parms);
std::vector<std::vector<de::Wrapped<Rec>>> results(2);
- results[0] = PGMRangeQuery<Rec>::query(&shard1, state1, &parms);
- results[1] = PGMRangeQuery<Rec>::query(&shard2, state2, &parms);
+ results[0] = rq::Query<Shard, Rec>::query(&shard1, state1, &parms);
+ results[1] = rq::Query<Shard, Rec>::query(&shard2, state2, &parms);
- PGMRangeQuery<Rec>::delete_query_state(state1);
- PGMRangeQuery<Rec>::delete_query_state(state2);
+ rq::Query<Shard, Rec>::delete_query_state(state1);
+ rq::Query<Shard, Rec>::delete_query_state(state2);
ck_assert_int_eq(results[0].size() + results[1].size(), result_size);
@@ -221,7 +222,7 @@ START_TEST(t_range_query_merge)
}
}
- auto result = PGMRangeQuery<Rec>::merge(proc_results, nullptr);
+ auto result = rq::Query<Shard, Rec>::merge(proc_results, nullptr);
std::sort(result.begin(), result.end());
ck_assert_int_eq(result.size(), result_size);
diff --git a/tests/triespline_tests.cpp b/tests/triespline_tests.cpp
index 6f63961..101f143 100644
--- a/tests/triespline_tests.cpp
+++ b/tests/triespline_tests.cpp
@@ -13,6 +13,7 @@
#include <functional>
#include "shard/TrieSpline.h"
+#include "query/rangequery.h"
#include "testing.h"
#include <check.h>
@@ -176,13 +177,13 @@ START_TEST(t_range_query)
auto buffer = create_sequential_mbuffer<Rec>(100, 1000);
auto shard = Shard(buffer);
- ts_range_query_parms<Rec> parms;
+ rq::Parms<Rec> parms;
parms.lower_bound = 300;
parms.upper_bound = 500;
- auto state = TrieSplineRangeQuery<Rec>::get_query_state(&shard, &parms);
- auto result = TrieSplineRangeQuery<Rec>::query(&shard, state, &parms);
- TrieSplineRangeQuery<Rec>::delete_query_state(state);
+ auto state = rq::Query<Shard, Rec>::get_query_state(&shard, &parms);
+ auto result = rq::Query<Shard, Rec>::query(&shard, state, &parms);
+ rq::Query<Shard, Rec>::delete_query_state(state);
ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
for (size_t i=0; i<result.size(); i++) {
@@ -199,13 +200,13 @@ START_TEST(t_buffer_range_query)
{
auto buffer = create_sequential_mbuffer<Rec>(100, 1000);
- ts_range_query_parms<Rec> parms;
+ rq::Parms<Rec> parms;
parms.lower_bound = 300;
parms.upper_bound = 500;
- auto state = TrieSplineRangeQuery<Rec>::get_buffer_query_state(buffer, &parms);
- auto result = TrieSplineRangeQuery<Rec>::buffer_query(buffer, state, &parms);
- TrieSplineRangeQuery<Rec>::delete_buffer_query_state(state);
+ auto state = rq::Query<Shard, Rec>::get_buffer_query_state(buffer, &parms);
+ auto result = rq::Query<Shard, Rec>::buffer_query(buffer, state, &parms);
+ rq::Query<Shard, Rec>::delete_buffer_query_state(state);
ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1);
for (size_t i=0; i<result.size(); i++) {