diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-03-22 15:35:14 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-03-22 15:35:14 -0400 |
| commit | fb4312a883dd0e382ecbcfe1119479e6f44d32a6 (patch) | |
| tree | 0dfee95bbb83fbdd35a9502ce77cd4a6fa221b24 | |
| parent | 7619c90e10adc441e525c98ad068a3639096d0c9 (diff) | |
| download | dynamic-extension-fb4312a883dd0e382ecbcfe1119479e6f44d32a6.tar.gz | |
PointLookup: added a point lookup query for unique indexes, and some tests
| -rw-r--r-- | include/query/pointlookup.h | 117 | ||||
| -rw-r--r-- | include/shard/FSTrie.h | 6 | ||||
| -rw-r--r-- | tests/fst_tests.cpp | 4 | ||||
| -rw-r--r-- | tests/include/pointlookup.h | 111 |
4 files changed, 236 insertions, 2 deletions
diff --git a/include/query/pointlookup.h b/include/query/pointlookup.h new file mode 100644 index 0000000..caaa320 --- /dev/null +++ b/include/query/pointlookup.h @@ -0,0 +1,117 @@ +/* + * include/query/pointlookup.h + * + * Copyright (C) 2024 Douglas B. Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * A query class for point lookup operations. + * + * TODO: Currently, this only supports point lookups for unique keys (which + * is the case for the trie that we're building this to use). It would be + * pretty straightforward to extend it to return *all* records that match + * the search_key (including tombstone cancellation--it's invertible) to + * support non-unique indexes, or at least those implementing + * lower_bound(). + */ +#pragma once + +#include "framework/QueryRequirements.h" + +namespace de { namespace pl { + +template <RecordInterface R> +struct Parms { + decltype(R::key) search_key; +}; + +template <RecordInterface R> +struct State { +}; + +template <RecordInterface R> +struct BufferState { + BufferView<R> *buffer; + + BufferState(BufferView<R> *buffer) + : buffer(buffer) {} +}; + +template <KVPInterface R, ShardInterface<R> S> +class Query { +public: + constexpr static bool EARLY_ABORT=true; + constexpr static bool SKIP_DELETE_FILTER=true; + + static void *get_query_state(S *shard, void *parms) { + return nullptr; + } + + static void* get_buffer_query_state(BufferView<R> *buffer, void *parms) { + auto res = new BufferState<R>(buffer); + + return res; + } + + static void process_query_states(void *query_parms, std::vector<void*> &shard_states, void* buffer_state) { + return; + } + + static std::vector<Wrapped<R>> query(S *shard, void *q_state, void *parms) { + auto p = (Parms<R> *) parms; + auto s = (State<R> *) q_state; + + std::vector<Wrapped<R>> result; + + auto r = shard->point_lookup({p->search_key, 0}); + + if (r) { + result.push_back(*r); + } + + return result; + } + + static std::vector<Wrapped<R>> buffer_query(void *state, void *parms) { + auto p = (Parms<R> *) parms; + auto s = (BufferState<R> *) state; + + std::vector<Wrapped<R>> records; + for (size_t i=0; i<s->buffer->get_record_count(); i++) { + auto rec = s->buffer->get(i); + + if (rec->rec.key == p->search_key) { + records.push_back(*rec); + return records; + } + } + + return records; + } + + static std::vector<R> merge(std::vector<std::vector<Wrapped<R>>> &results, void *parms) { + std::vector<R> output; + for (auto r : results) { + if (r.size() > 0) { + if (r[0].is_deleted() || r[0].is_tombstone()) { + return output; + } + + output.append(r[0].rec); + return output; + } + } + } + + static void delete_query_state(void *state) { + auto s = (State<R> *) state; + delete s; + } + + static void delete_buffer_query_state(void *state) { + auto s = (BufferState<R> *) state; + delete s; + } +}; + +}} diff --git a/include/shard/FSTrie.h b/include/shard/FSTrie.h index aa3c9f4..50bf982 100644 --- a/include/shard/FSTrie.h +++ b/include/shard/FSTrie.h @@ -160,6 +160,12 @@ public: return nullptr; } + // FIXME: for convenience, I'm treating this Trie as a unique index + // for now, so no need to scan forward and/or check values. This + // also makes the point lookup query class a lot easier to make. + // Ultimately, though, we can support non-unique indexes with some + // extra work. + return m_data + idx; } diff --git a/tests/fst_tests.cpp b/tests/fst_tests.cpp index 4c8da4a..778f64a 100644 --- a/tests/fst_tests.cpp +++ b/tests/fst_tests.cpp @@ -20,14 +20,14 @@ typedef StringRec R; typedef FSTrie<R> Shard; #include "include/shard_string.h" -//#include "include/rangequery.h" +#include "include/pointlookup.h" Suite *unit_testing() { Suite *unit = suite_create("Fast-succinct Trie Shard Unit Testing"); - // inject_rangequery_tests(unit); inject_shard_tests(unit); + inject_pointlookup_tests(unit); return unit; } diff --git a/tests/include/pointlookup.h b/tests/include/pointlookup.h new file mode 100644 index 0000000..bf4810b --- /dev/null +++ b/tests/include/pointlookup.h @@ -0,0 +1,111 @@ +/* + * tests/include/pointlookup.h + * + * Standardized unit tests for point lookups against supporting + * shard types (must be unique for the moment) + * + * Copyright (C) 2024 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ + +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "testing.h" + +#include <check.h> + +using namespace de; +typedef StringRec R; +typedef FSTrie<R> Shard; + +START_TEST(t_point_lookup_query) +{ + auto buffer = create_test_mbuffer<R>(1000); + auto shard = Shard(buffer->get_buffer_view()); + + pl::Parms<R> parms; + { + auto bv = buffer->get_buffer_view(); + for (size_t i=0; i<bv.get_record_count(); i++) { + auto key = bv.get(i)->rec.key; + + parms.search_key = key; + auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = pl::Query<R, Shard>::query(&shard, state, &parms); + pl::Query<R, Shard>::delete_query_state(state); + + ck_assert_int_eq(result.size(), 1); + ck_assert_str_eq(result[0].rec.key.c_str(), key.c_str()); + ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value); + } + + /* point lookup miss; result size should be 0 */ + parms.search_key = "computer"; + auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); + auto result = pl::Query<R, Shard>::query(&shard, state, &parms); + pl::Query<R, Shard>::delete_query_state(state); + + ck_assert_int_eq(result.size(), 0); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_point_lookup) +{ + + auto buffer = create_test_mbuffer<R>(1000); + pl::Parms<R> parms; + { + auto view = buffer->get_buffer_view(); + for (int i=view.get_record_count()-1; i>=0; i--) { + parms.search_key = view.get(i)->rec.key; + + auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = pl::Query<R, Shard>::buffer_query(state, &parms); + pl::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), 1); + ck_assert_str_eq(result[0].rec.key.c_str(), view.get(i)->rec.key.c_str()); + ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value); + } + + /* point lookup miss; result size should be 0 */ + parms.search_key = "computer"; + auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); + auto result = pl::Query<R, Shard>::buffer_query(state, &parms); + pl::Query<R, Shard>::delete_buffer_query_state(state); + + ck_assert_int_eq(result.size(), 0); + } + + delete buffer; +} +END_TEST + + +static void inject_pointlookup_tests(Suite *suite) { + TCase *point_lookup_query = tcase_create("Point Lookup Testing"); + tcase_add_test(point_lookup_query, t_point_lookup_query); + tcase_add_test(point_lookup_query, t_buffer_point_lookup); + suite_add_tcase(suite, point_lookup_query); +} |