diff options
| author | Douglas B. Rumbaugh <dbr4@psu.edu> | 2024-12-06 13:13:51 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-12-06 18:13:51 +0000 |
| commit | 9fe305c7d28e993e55c55427f377ae7e3251ea4f (patch) | |
| tree | 384b687f64b84eb81bde2becac8a5f24916b07b4 /tests/include | |
| parent | 47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc (diff) | |
| download | dynamic-extension-9fe305c7d28e993e55c55427f377ae7e3251ea4f.tar.gz | |
Interface update (#5)
* Query Interface Adjustments/Refactoring
Began the process of adjusting the query interface (and also the shard
interface, to a lesser degree) to better accommodate the user. In
particular the following changes have been made,
1. The number of necessary template arguments for the query type
has been drastically reduced, while also removing the void pointers
and manual delete functions from the interface.
This was accomplished by requiring many of the sub-types associated
with a query (parameters, etc.) to be nested inside the main query
class, and by forcing the SHARD type to expose its associated
record type.
2. User-defined query return types are now supported.
Queries no longer are required to return strictly sets of records.
Instead, the query now has LocalResultType and ResultType
template parameters (which can be defaulted using a typedef in
the Query type itself), allowing much more flexibility.
Note that, at least for the short term, the LocalResultType must
still expose the same is_deleted/is_tombstone interface as a
Wrapped<R> used to, as this is currently needed for delete
filtering. A better approach to this is, hopefully, forthcoming.
3. Updated the ISAMTree.h shard and rangequery.h query to use the
new interfaces, and adjusted the associated unit tests as well.
4. Dropped the unnecessary "get_data()" function from the ShardInterface
concept.
5. Dropped the need to specify a record type in the ShardInterface
concept. This is now handled using a required Shard::RECORD
member of the Shard class itself, which should expose the name
of the record type.
* Updates to framework to support new Query/Shard interfaces
Pretty extensive adjustments to the framework, particularly to the
templates themselves, along with some type-renaming work, to support
the new query and shard interfaces.
Adjusted the external query interface to take an rvalue reference, rather
than a pointer, to the query parameters.
* Removed framework-level delete filtering
This was causing some issues with the new query interface, and should
probably be reworked anyway, so I'm temporarily (TM) removing the
feature.
* Updated benchmarks + remaining code for new interface
Diffstat (limited to 'tests/include')
| -rw-r--r-- | tests/include/concurrent_extension.h | 40 | ||||
| -rw-r--r-- | tests/include/dynamic_extension.h | 69 | ||||
| -rw-r--r-- | tests/include/irs.h | 165 | ||||
| -rw-r--r-- | tests/include/pointlookup.h | 54 | ||||
| -rw-r--r-- | tests/include/rangecount.h | 108 | ||||
| -rw-r--r-- | tests/include/rangequery.h | 59 | ||||
| -rw-r--r-- | tests/include/shard_standard.h | 1 | ||||
| -rw-r--r-- | tests/include/shard_string.h | 1 | ||||
| -rw-r--r-- | tests/include/testing.h | 10 | ||||
| -rw-r--r-- | tests/include/wirs.h | 182 | ||||
| -rw-r--r-- | tests/include/wss.h | 119 |
11 files changed, 353 insertions, 455 deletions
diff --git a/tests/include/concurrent_extension.h b/tests/include/concurrent_extension.h index 927a094..02bd694 100644 --- a/tests/include/concurrent_extension.h +++ b/tests/include/concurrent_extension.h @@ -22,17 +22,20 @@ * should be included in the source file that includes this one, above the * include statement. */ -/*#include "testing.h" -#include "framework/DynamicExtension.h" -#include "framework/scheduling/FIFOScheduler.h" -#include "shard/ISAMTree.h" -#include "query/rangequery.h" -#include <check.h> - -//using namespace de; -//typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE, FIFOScheduler> DE; -*/ - +// #include "testing.h" +// #include "framework/DynamicExtension.h" +// //#include "framework/scheduling/FIFOScheduler.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include <check.h> +// #include <set> +// #include <random> + +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> S; +// typedef rq::Query<S> Q; +// typedef DynamicExtension<S, Q, LayoutPolicy::LEVELING, DeletePolicy::TOMBSTONE> DE; //, FIFOScheduler> DE; START_TEST(t_create) { @@ -164,11 +167,11 @@ START_TEST(t_range_query) uint64_t lower_key = keys[idx]; uint64_t upper_key = keys[idx + 250]; - rq::Parms<R> p; + Q::Parameters p; p.lower_bound = lower_key; p.upper_bound = upper_key; - auto result = test_de->query(&p); + auto result = test_de->query(std::move(p)); auto r = result.get(); std::sort(r.begin(), r.end()); @@ -203,8 +206,6 @@ START_TEST(t_tombstone_merging_01) records.insert({key, val}); } - size_t deletes = 0; - size_t cnt=0; for (auto rec : records) { R r = {rec.first, rec.second}; while (!test_de->insert(r)) { @@ -220,7 +221,6 @@ START_TEST(t_tombstone_merging_01) while (!test_de->erase(dr)) { _mm_pause(); } - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -258,7 +258,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { records.insert({key, val}); } - size_t deletes = 0; for (auto rec : records) { ck_assert_int_eq(test_de->insert(rec), 1); @@ -268,7 +267,6 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { for (size_t i=0; i<del_vec.size(); i++) { test_de->erase(del_vec[i]); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -304,15 +302,10 @@ START_TEST(t_static_structure) records.insert({key, val}); } - size_t deletes = 0; - size_t t_reccnt = 0; - size_t k=0; for (auto rec : records) { - k++; while (!test_de->insert(rec)) { _mm_pause(); } - t_reccnt++; if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { std::vector<R> del_vec; @@ -323,7 +316,6 @@ START_TEST(t_static_structure) _mm_pause(); } - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } diff --git a/tests/include/dynamic_extension.h b/tests/include/dynamic_extension.h index 6e9b16c..90c6906 100644 --- a/tests/include/dynamic_extension.h +++ b/tests/include/dynamic_extension.h @@ -22,18 +22,24 @@ * should be included in the source file that includes this one, above the * include statement. */ -/* -#include "testing.h" -#include "framework/DynamicExtension.h" -#include "framework/scheduling/SerialScheduler.h" -#include "shard/ISAMTree.h" -#include "query/rangequery.h" -#include <check.h> -using namespace de; -typedef DynamicExtension<R, ISAMTree<R>, rq::Query<ISAMTree<R>, R>, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; -*/ + +// #include "testing.h" +// #include "framework/DynamicExtension.h" +// #include "framework/scheduling/SerialScheduler.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include <check.h> +// #include <random> +// #include <set> + +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> S; +// typedef rq::Query<S> Q; +// typedef DynamicExtension<S, Q, LayoutPolicy::TEIRING, DeletePolicy::TAGGING, SerialScheduler> DE; +#include "framework/util/Configuration.h" START_TEST(t_create) { auto test_de = new DE(100, 1000, 2); @@ -103,7 +109,16 @@ START_TEST(t_insert_with_mem_merges) test_de->await_next_epoch(); ck_assert_int_eq(test_de->get_record_count(), 300); - ck_assert_int_eq(test_de->get_height(), 1); + + /* + * BSM grows on every flush, so the height will be different than + * normal layout policies + */ + if (test_de->Layout == de::LayoutPolicy::BSM) { + ck_assert_int_eq(test_de->get_height(), 2); + } else { + ck_assert_int_eq(test_de->get_height(), 1); + } delete test_de; } @@ -138,11 +153,12 @@ START_TEST(t_range_query) uint64_t lower_key = keys[idx]; uint64_t upper_key = keys[idx + 250]; - rq::Parms<R> p; + Q::Parameters p; + p.lower_bound = lower_key; p.upper_bound = upper_key; - auto result = test_de->query(&p); + auto result = test_de->query(std::move(p)); auto r = result.get(); std::sort(r.begin(), r.end()); ck_assert_int_eq(r.size(), 251); @@ -176,8 +192,6 @@ START_TEST(t_tombstone_merging_01) records.insert({key, val}); } - size_t deletes = 0; - size_t cnt=0; for (auto rec : records) { R r = {rec.first, rec.second}; ck_assert_int_eq(test_de->insert(r), 1); @@ -189,7 +203,6 @@ START_TEST(t_tombstone_merging_01) for (size_t i=0; i<del_vec.size(); i++) { R dr = {del_vec[i].first, del_vec[i].second}; test_de->erase(dr); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -209,14 +222,14 @@ START_TEST(t_tombstone_merging_01) } END_TEST -DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { +[[maybe_unused]] static DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { auto rng = gsl_rng_alloc(gsl_rng_mt19937); auto test_de = new DE(1000, 10000, 2); - std::set<R> records; - std::set<R> to_delete; - std::set<R> deleted; + std::set<Rec> records; + std::set<Rec> to_delete; + std::set<Rec> deleted; while (records.size() < reccnt) { uint64_t key = rand(); @@ -227,17 +240,15 @@ DE *create_test_tree(size_t reccnt, size_t memlevel_cnt) { records.insert({key, val}); } - size_t deletes = 0; for (auto rec : records) { ck_assert_int_eq(test_de->insert(rec), 1); if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<R> del_vec; + std::vector<Rec> del_vec; std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); for (size_t i=0; i<del_vec.size(); i++) { test_de->erase(del_vec[i]); - deletes++; to_delete.erase(del_vec[i]); deleted.insert(del_vec[i]); } @@ -260,9 +271,9 @@ START_TEST(t_static_structure) size_t reccnt = 100000; auto test_de = new DE(100, 1000, 2); - std::set<R> records; - std::set<R> to_delete; - std::set<R> deleted; + std::set<Rec> records; + std::set<Rec> to_delete; + std::set<Rec> deleted; while (records.size() < reccnt) { uint64_t key = rand(); @@ -274,15 +285,11 @@ START_TEST(t_static_structure) } size_t deletes = 0; - size_t t_reccnt = 0; - size_t k=0; for (auto rec : records) { - k++; ck_assert_int_eq(test_de->insert(rec), 1); - t_reccnt++; if (gsl_rng_uniform(rng) < 0.05 && !to_delete.empty()) { - std::vector<R> del_vec; + std::vector<Rec> del_vec; std::sample(to_delete.begin(), to_delete.end(), std::back_inserter(del_vec), 3, std::mt19937{std::random_device{}()}); for (size_t i=0; i<del_vec.size(); i++) { diff --git a/tests/include/irs.h b/tests/include/irs.h new file mode 100644 index 0000000..1c5be2c --- /dev/null +++ b/tests/include/irs.h @@ -0,0 +1,165 @@ +/* + * tests/include/irs.h + * + * Standardized unit tests for range queries against supporting + * shard types + * + * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * + * Distributed under the Modified BSD License. + * + * WARNING: This file must be included in the main unit test set + * after the definition of an appropriate Shard and R + * type. In particular, R needs to implement the key-value + * pair interface and Shard needs to support lower_bound. + * For other types of record and shard, you'll need to + * use a different set of unit tests. + */ +#pragma once + +#include "query/irs.h" +#include <algorithm> + +/* + * Uncomment these lines temporarily to remove errors in this file + * temporarily for development purposes. They should be removed prior + * to building, to ensure no duplicate definitions. These includes/defines + * should be included in the source file that includes this one, above the + * include statement. + */ +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "testing.h" +#include <check.h> +#include <gsl/gsl_rng.h> +using namespace de; + +typedef Rec R; +typedef ISAMTree<R> Shard; +typedef irs::Query<ISAMTree<R>> Query; + +static gsl_rng *g_rng; + +START_TEST(t_irs) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + auto shard = Shard(buffer->get_buffer_view()); + + size_t k = 5; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + auto local_query = irs::Query<Shard>::local_preproc(&shard, &parms); + irs::Query<Shard>::distribute_query(&parms, {local_query}, nullptr); + + auto result = irs::Query<Shard>::local_query(&shard, local_query); + delete local_query; + + ck_assert_int_eq(result.size(), k); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + + delete buffer; +} +END_TEST + + +START_TEST(t_buffer_irs) +{ + auto buffer = create_sequential_mbuffer<R>(100, 1000); + + size_t k = 5; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 300; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + { + auto view = buffer->get_buffer_view(); + auto query = irs::Query<Shard>::local_preproc_buffer(&view, &parms); + irs::Query<Shard>::distribute_query(&parms, {}, query); + auto result = irs::Query<Shard>::local_query_buffer(query); + delete query; + + ck_assert_int_le(result.size(), k); + for (size_t i=0; i<result.size(); i++) { + ck_assert_int_le(result[i].rec.key, parms.upper_bound); + ck_assert_int_ge(result[i].rec.key, parms.lower_bound); + } + } + + delete buffer; +} +END_TEST + + +START_TEST(t_irs_merge) +{ + auto buffer1 = create_sequential_mbuffer<R>(100, 200); + auto buffer2 = create_sequential_mbuffer<R>(400, 1000); + + auto shard1 = Shard(buffer1->get_buffer_view()); + auto shard2 = Shard(buffer2->get_buffer_view()); + + size_t k = 10; + irs::Query<Shard>::Parameters parms; + parms.lower_bound = 150; + parms.upper_bound = 500; + parms.sample_size = k; + parms.rng = g_rng; + + /* necessary to store the alias structure */ + auto dummy_buffer_query = irs::Query<Shard>::LocalQueryBuffer(); + dummy_buffer_query.buffer = nullptr; + dummy_buffer_query.sample_size = 0; + dummy_buffer_query.cutoff = 0; + dummy_buffer_query.global_parms = parms; + dummy_buffer_query.records = {}; + dummy_buffer_query.alias = nullptr; + + auto query1 = irs::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = irs::Query<Shard>::local_preproc(&shard2, &parms); + + irs::Query<Shard>::distribute_query(&parms, {query1, query2}, &dummy_buffer_query); + + std::vector<std::vector<irs::Query<Shard>::LocalResultType>> results(2); + results[0] = irs::Query<Shard>::local_query(&shard1, query1); + results[1] = irs::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; + + ck_assert_int_eq(results[0].size() + results[1].size(), k); + + std::vector<std::vector<Wrapped<R>>> proc_results; + + for (size_t j=0; j<results.size(); j++) { + proc_results.emplace_back(std::vector<Wrapped<R>>()); + for (size_t i=0; i<results[j].size(); i++) { + proc_results[j].emplace_back(results[j][i]); + } + } + + std::vector<irs::Query<Shard>::ResultType> result; + irs::Query<Shard>::combine(proc_results, nullptr, result); + ck_assert_int_eq(result.size(), k); + + delete buffer1; + delete buffer2; +} +END_TEST + +static void inject_irs_tests(Suite *suite) { + g_rng = gsl_rng_alloc(gsl_rng_mt19937); + + TCase *irs = tcase_create("Independent Range Sampling Query Testing"); + tcase_add_test(irs, t_irs); + tcase_add_test(irs, t_buffer_irs); + tcase_add_test(irs, t_irs_merge); + suite_add_tcase(suite, irs); +} diff --git a/tests/include/pointlookup.h b/tests/include/pointlookup.h index 71a1099..af58440 100644 --- a/tests/include/pointlookup.h +++ b/tests/include/pointlookup.h @@ -17,6 +17,8 @@ */ #pragma once +#include "query/pointlookup.h" + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -25,15 +27,12 @@ * include statement. */ -//#include "shard/FSTrie.h" -#include "query/pointlookup.h" +#include "shard/FSTrie.h" #include "testing.h" - #include <check.h> - using namespace de; -//typedef StringRec R; -//typedef FSTrie<R> Shard; +typedef StringRec R; +typedef FSTrie<R> Shard; START_TEST(t_point_lookup_query) { @@ -45,23 +44,21 @@ START_TEST(t_point_lookup_query) for (size_t i=0; i<bv.get_record_count(); i++) { auto key = bv.get(i)->rec.key; - pl::Parms<R> parms = {key}; - auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = pl::Query<R, Shard>::query(&shard, state, &parms); - pl::Query<R, Shard>::delete_query_state(state); - + pl::Query<Shard>::Parameters parms = {key}; + auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms); + auto result = pl::Query<Shard>::local_query(&shard,local_query); + delete local_query; ck_assert_int_eq(result.size(), 1); - //ck_assert_str_eq(result[0].rec.key, key); - //ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value); + ck_assert_str_eq(result[0].rec.key, key); + ck_assert_int_eq(result[0].rec.value, bv.get(i)->rec.value); } /* point lookup miss; result size should be 0 */ const char *c = "computer"; - pl::Parms<R> parms = {c}; - - auto state = pl::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = pl::Query<R, Shard>::query(&shard, state, &parms); - pl::Query<R, Shard>::delete_query_state(state); + pl::Query<Shard>::Parameters parms = {c}; + auto local_query = pl::Query<Shard>::local_preproc(&shard, &parms); + auto result = pl::Query<Shard>::local_query(&shard,local_query); + delete local_query; ck_assert_int_eq(result.size(), 0); } @@ -78,24 +75,21 @@ START_TEST(t_buffer_point_lookup) { auto view = buffer->get_buffer_view(); for (int i=view.get_record_count()-1; i>=0; i--) { - pl::Parms<R> parms = {view.get(i)->rec.key}; - - auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = pl::Query<R, Shard>::buffer_query(state, &parms); - pl::Query<R, Shard>::delete_buffer_query_state(state); + pl::Query<Shard>::Parameters parms = {view.get(i)->rec.key}; + auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = pl::Query<Shard>::local_query_buffer(local_query); + delete local_query; ck_assert_int_eq(result.size(), 1); - //ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key); - //ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value); + ck_assert_str_eq(result[0].rec.key, view.get(i)->rec.key); + ck_assert_int_eq(result[0].rec.value, view.get(i)->rec.value); } /* point lookup miss; result size should be 0 */ const char *c = "computer"; - pl::Parms<R> parms = {c}; - - auto state = pl::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = pl::Query<R, Shard>::buffer_query(state, &parms); - pl::Query<R, Shard>::delete_buffer_query_state(state); + pl::Query<Shard>::Parameters parms = {c}; + auto local_query = pl::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = pl::Query<Shard>::local_query_buffer(local_query); ck_assert_int_eq(result.size(), 0); } diff --git a/tests/include/rangecount.h b/tests/include/rangecount.h index 1951221..22189b9 100644 --- a/tests/include/rangecount.h +++ b/tests/include/rangecount.h @@ -1,5 +1,5 @@ /* - * tests/include/rangecount.h + * tests/include/rangequery.h * * Standardized unit tests for range queries against supporting * shard types @@ -17,6 +17,9 @@ */ #pragma once +#include "query/rangecount.h" +#include <algorithm> + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,30 +27,29 @@ * should be included in the source file that includes this one, above the * include statement. */ -//#include "shard/ISAMTree.h" -//#include "query/rangecount.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include "testing.h" +// #include <check.h> +// using namespace de; -#include "query/rangecount.h" +// typedef Rec R; +// typedef ISAMTree<R> Shard; +// typedef rc::Query<ISAMTree<R>> Query; START_TEST(t_range_count) { - auto buffer = create_sequential_mbuffer<R>(100, 1000); auto shard = Shard(buffer->get_buffer_view()); - rc::Parms<R> parms = {300, 500}; + rc::Query<Shard>::Parameters parms = {300, 500}; - auto state = rc::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rc::Query<R, Shard>::query(&shard, state, &parms); - rc::Query<R, Shard>::delete_query_state(state); + auto local_query = rc::Query<Shard>::local_preproc(&shard, &parms); + + auto result = rc::Query<Shard>::local_query(&shard, local_query); + delete local_query; - ck_assert_int_eq(result.size(), 1); - ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1); delete buffer; } @@ -58,16 +60,15 @@ START_TEST(t_buffer_range_count) { auto buffer = create_sequential_mbuffer<R>(100, 1000); - rc::Parms<R> parms = {300, 500}; + rc::Query<Shard>::Parameters parms = {300, 500}; { auto view = buffer->get_buffer_view(); - auto state = rc::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rc::Query<R, Shard>::buffer_query(state, &parms); - rc::Query<R, Shard>::delete_buffer_query_state(state); + auto query = rc::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = rc::Query<Shard>::local_query_buffer(query); + delete query; - ck_assert_int_eq(result.size(), 1); - ck_assert_int_eq(result[0].rec.key, parms.upper_bound - parms.lower_bound + 1); + ck_assert_int_eq(result[0].record_count - result[0].tombstone_count, parms.upper_bound - parms.lower_bound + 1); } delete buffer; @@ -83,66 +84,31 @@ START_TEST(t_range_count_merge) auto shard1 = Shard(buffer1->get_buffer_view()); auto shard2 = Shard(buffer2->get_buffer_view()); - rc::Parms<R> parms = {150, 500}; + rc::Query<Shard>::Parameters parms = {150, 500}; size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - auto state1 = rc::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rc::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rc::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rc::Query<R, Shard>::query(&shard2, state2, &parms); - - rc::Query<R, Shard>::delete_query_state(state1); - rc::Query<R, Shard>::delete_query_state(state2); + auto query1 = rc::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = rc::Query<Shard>::local_preproc(&shard2, &parms); - ck_assert_int_eq(results[0].size(), 1); - ck_assert_int_eq(results[1].size(), 1); + std::vector<std::vector<rc::Query<Shard>::LocalResultType>> results(2); + results[0] = rc::Query<Shard>::local_query(&shard1, query1); + results[1] = rc::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; - std::vector<R> result; - rc::Query<R, Shard>::merge(results, nullptr, result); + size_t reccnt = results[0][0].record_count + results[1][0].record_count; + size_t tscnt = results[0][0].tombstone_count + results[1][0].tombstone_count; - ck_assert_int_eq(result[0].key, result_size); - - delete buffer1; - delete buffer2; -} -END_TEST + ck_assert_int_eq(reccnt - tscnt, result_size); + std::vector<rc::Query<Shard>::ResultType> result; + rc::Query<Shard>::combine(results, nullptr, result); -START_TEST(t_lower_bound) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = new Shard(buffer1->get_buffer_view()); - auto shard2 = new Shard(buffer2->get_buffer_view()); - - std::vector<Shard*> shards = {shard1, shard2}; - - auto merged = Shard(shards); - - for (uint32_t i=100; i<1000; i++) { - R r = R{i, i}; - - auto idx = merged.get_lower_bound(i); - - assert(idx < merged.get_record_count()); - - auto res = merged.get_record_at(idx); - - if (i >=200 && i <400) { - ck_assert_int_lt(res->rec.key, i); - } else { - ck_assert_int_eq(res->rec.key, i); - } - } + ck_assert_int_eq(result[0], result_size); delete buffer1; delete buffer2; - delete shard1; - delete shard2; } END_TEST diff --git a/tests/include/rangequery.h b/tests/include/rangequery.h index f90e107..5c3c1d6 100644 --- a/tests/include/rangequery.h +++ b/tests/include/rangequery.h @@ -17,6 +17,9 @@ */ #pragma once +#include "query/rangequery.h" +#include <algorithm> + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,26 +27,27 @@ * should be included in the source file that includes this one, above the * include statement. */ -//#include "shard/ISAMTree.h" -//#include "query/rangequery.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - -#include "query/rangequery.h" +// #include "shard/ISAMTree.h" +// #include "query/rangequery.h" +// #include "testing.h" +// #include <check.h> +// using namespace de; +// typedef Rec R; +// typedef ISAMTree<R> Shard; +// typedef rq::Query<ISAMTree<R>> Query; START_TEST(t_range_query) { auto buffer = create_sequential_mbuffer<R>(100, 1000); auto shard = Shard(buffer->get_buffer_view()); - rq::Parms<R> parms = {300, 500}; + rq::Query<Shard>::Parameters parms = {300, 500}; - auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rq::Query<R, Shard>::query(&shard, state, &parms); - rq::Query<R, Shard>::delete_query_state(state); + auto local_query = rq::Query<Shard>::local_preproc(&shard, &parms); + + auto result = rq::Query<Shard>::local_query(&shard, local_query); + delete local_query; ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); for (size_t i=0; i<result.size(); i++) { @@ -60,13 +64,13 @@ START_TEST(t_buffer_range_query) { auto buffer = create_sequential_mbuffer<R>(100, 1000); - rq::Parms<R> parms = {300, 500}; + rq::Query<Shard>::Parameters parms = {300, 500}; { auto view = buffer->get_buffer_view(); - auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rq::Query<R, Shard>::buffer_query(state, &parms); - rq::Query<R, Shard>::delete_buffer_query_state(state); + auto query = rq::Query<Shard>::local_preproc_buffer(&view, &parms); + auto result = rq::Query<Shard>::local_query_buffer(query); + delete query; ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); for (size_t i=0; i<result.size(); i++) { @@ -88,19 +92,18 @@ START_TEST(t_range_query_merge) auto shard1 = Shard(buffer1->get_buffer_view()); auto shard2 = Shard(buffer2->get_buffer_view()); - rq::Parms<R> parms = {150, 500}; + rq::Query<Shard>::Parameters parms = {150, 500}; size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); + auto query1 = rq::Query<Shard>::local_preproc(&shard1, &parms); + auto query2 = rq::Query<Shard>::local_preproc(&shard2, &parms); - rq::Query<R, Shard>::delete_query_state(state1); - rq::Query<R, Shard>::delete_query_state(state2); + std::vector<std::vector<rq::Query<Shard>::LocalResultType>> results(2); + results[0] = rq::Query<Shard>::local_query(&shard1, query1); + results[1] = rq::Query<Shard>::local_query(&shard2, query2); + delete query1; + delete query2; ck_assert_int_eq(results[0].size() + results[1].size(), result_size); @@ -113,8 +116,8 @@ START_TEST(t_range_query_merge) } } - std::vector<R> result; - rq::Query<R, Shard>::merge(proc_results, nullptr, result); + std::vector<rq::Query<Shard>::ResultType> result; + rq::Query<Shard>::combine(proc_results, nullptr, result); std::sort(result.begin(), result.end()); ck_assert_int_eq(result.size(), result_size); @@ -145,8 +148,6 @@ START_TEST(t_lower_bound) auto merged = Shard(shards); for (uint32_t i=100; i<1000; i++) { - R r = R{i, i}; - auto idx = merged.get_lower_bound(i); assert(idx < merged.get_record_count()); diff --git a/tests/include/shard_standard.h b/tests/include/shard_standard.h index 2809d74..ece2a57 100644 --- a/tests/include/shard_standard.h +++ b/tests/include/shard_standard.h @@ -75,7 +75,6 @@ START_TEST(t_shard_init) ck_assert_int_eq(shard4->get_record_count(), n * 3); ck_assert_int_eq(shard4->get_tombstone_count(), 0); - size_t total_cnt = 0; size_t shard1_idx = 0; size_t shard2_idx = 0; size_t shard3_idx = 0; diff --git a/tests/include/shard_string.h b/tests/include/shard_string.h index 881f41a..2ef4cec 100644 --- a/tests/include/shard_string.h +++ b/tests/include/shard_string.h @@ -73,7 +73,6 @@ START_TEST(t_shard_init) ck_assert_int_eq(shard4->get_record_count(), n * 3); ck_assert_int_eq(shard4->get_tombstone_count(), 0); - size_t total_cnt = 0; size_t shard1_idx = 0; size_t shard2_idx = 0; size_t shard3_idx = 0; diff --git a/tests/include/testing.h b/tests/include/testing.h index d0bff2d..33cbb3f 100644 --- a/tests/include/testing.h +++ b/tests/include/testing.h @@ -34,7 +34,7 @@ static std::string summa_wordlist = "tests/data/summa-wordlist.txt"; static std::vector<std::unique_ptr<char[]>> string_data; -static std::vector<StringRec> read_string_data(std::string fname, size_t n) { +[[maybe_unused]] static std::vector<StringRec> read_string_data(std::string fname, size_t n) { std::vector<StringRec> vec; vec.reserve(n); string_data.reserve(n); @@ -50,14 +50,14 @@ static std::vector<StringRec> read_string_data(std::string fname, size_t n) { std::string field; std::getline(ls, field, '\t'); - auto val = atol(field.c_str()); + uint64_t val = atol(field.c_str()); std::getline(ls, field, '\n'); char *c = strdup(field.c_str()); string_data.push_back(std::unique_ptr<char[]>(c)); - StringRec r(string_data[string_data.size() -1].get(), val, field.size()); + StringRec r{string_data[string_data.size() -1].get(), val, field.size()}; vec.push_back(r); } @@ -76,7 +76,7 @@ std::vector<R> strip_wrapping(std::vector<de::Wrapped<R>> vec) { return out; } -static bool initialize_test_file(std::string fname, size_t page_cnt) +[[maybe_unused]] static bool initialize_test_file(std::string fname, size_t page_cnt) { auto flags = O_RDWR | O_CREAT | O_TRUNC; mode_t mode = 0640; @@ -113,7 +113,7 @@ error: return 0; } -static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) { +[[maybe_unused]] static bool roughly_equal(int n1, int n2, size_t mag, double epsilon) { return ((double) std::abs(n1 - n2) / (double) mag) < epsilon; } diff --git a/tests/include/wirs.h b/tests/include/wirs.h deleted file mode 100644 index 4c0630f..0000000 --- a/tests/include/wirs.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * tests/include/rangequery.h - * - * Standardized unit tests for range queries against supporting - * shard types - * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> - * - * Distributed under the Modified BSD License. - * - * WARNING: This file must be included in the main unit test set - * after the definition of an appropriate Shard and R - * type. In particular, R needs to implement the key-value - * pair interface and Shard needs to support lower_bound. - * For other types of record and shard, you'll need to - * use a different set of unit tests. - */ -#pragma once - -/* - * Uncomment these lines temporarily to remove errors in this file - * temporarily for development purposes. They should be removed prior - * to building, to ensure no duplicate definitions. These includes/defines - * should be included in the source file that includes this one, above the - * include statement. - */ -//#include "shard/ISAMTree.h" -//#include "query/rangequery.h" -//#include "testing.h" -//#include <check.h> -//using namespace de; -//typedef ISAMTree<R> Shard; - - -START_TEST(t_range_query) -{ - auto buffer = create_sequential_mbuffer<R>(100, 1000); - auto shard = Shard(buffer->get_buffer_view()); - - rq::Parms<R> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - auto state = rq::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = rq::Query<R, Shard>::query(&shard, state, &parms); - rq::Query<R, Shard>::delete_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - - delete buffer; -} -END_TEST - - -START_TEST(t_buffer_range_query) -{ - auto buffer = create_sequential_mbuffer<R>(100, 1000); - - rq::Parms<R> parms; - parms.lower_bound = 300; - parms.upper_bound = 500; - - { - auto view = buffer->get_buffer_view(); - auto state = rq::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = rq::Query<R, Shard>::buffer_query(state, &parms); - rq::Query<R, Shard>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.upper_bound - parms.lower_bound + 1); - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_le(result[i].rec.key, parms.upper_bound); - ck_assert_int_ge(result[i].rec.key, parms.lower_bound); - } - } - - delete buffer; -} -END_TEST - - -START_TEST(t_range_query_merge) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = Shard(buffer1->get_buffer_view()); - auto shard2 = Shard(buffer2->get_buffer_view()); - - rq::Parms<R> parms; - parms.lower_bound = 150; - parms.upper_bound = 500; - - size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - - auto state1 = rq::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = rq::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = rq::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = rq::Query<R, Shard>::query(&shard2, state2, &parms); - - rq::Query<R, Shard>::delete_query_state(state1); - rq::Query<R, Shard>::delete_query_state(state2); - - ck_assert_int_eq(results[0].size() + results[1].size(), result_size); - - std::vector<std::vector<Wrapped<R>>> proc_results; - - for (size_t j=0; j<results.size(); j++) { - proc_results.emplace_back(std::vector<Wrapped<R>>()); - for (size_t i=0; i<results[j].size(); i++) { - proc_results[j].emplace_back(results[j][i]); - } - } - - std::vector<R> result; - rq::Query<R, Shard>::merge(proc_results, nullptr, result); - std::sort(result.begin(), result.end()); - - ck_assert_int_eq(result.size(), result_size); - auto key = parms.lower_bound; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(key++, result[i].key); - if (key == 200) { - key = 400; - } - } - - delete buffer1; - delete buffer2; -} -END_TEST - - -START_TEST(t_lower_bound) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = new Shard(buffer1->get_buffer_view()); - auto shard2 = new Shard(buffer2->get_buffer_view()); - - std::vector<Shard*> shards = {shard1, shard2}; - - auto merged = Shard(shards); - - for (size_t i=100; i<1000; i++) { - R r; - r.key = i; - r.value = i; - - auto idx = merged.get_lower_bound(i); - - assert(idx < merged.get_record_count()); - - auto res = merged.get_record_at(idx); - - if (i >=200 && i <400) { - ck_assert_int_lt(res->rec.key, i); - } else { - ck_assert_int_eq(res->rec.key, i); - } - } - - delete buffer1; - delete buffer2; - delete shard1; - delete shard2; -} -END_TEST - -static void inject_rangequery_tests(Suite *suite) { - TCase *range_query = tcase_create("Range Query Testing"); - tcase_add_test(range_query, t_range_query); - tcase_add_test(range_query, t_buffer_range_query); - tcase_add_test(range_query, t_range_query_merge); - suite_add_tcase(suite, range_query); -} diff --git a/tests/include/wss.h b/tests/include/wss.h index f0ac74c..01327d2 100644 --- a/tests/include/wss.h +++ b/tests/include/wss.h @@ -1,10 +1,10 @@ /* - * tests/include/rangequery.h + * tests/include/wss.h * - * Standardized unit tests for range queries against supporting + * Standardized unit tests for weighted set sampling against supporting * shard types * - * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> + * Copyright (C) 2023-2024 Douglas Rumbaugh <drumbaugh@psu.edu> * * Distributed under the Modified BSD License. * @@ -17,6 +17,8 @@ */ #pragma once +#include "query/wss.h" + /* * Uncomment these lines temporarily to remove errors in this file * temporarily for development purposes. They should be removed prior @@ -24,28 +26,38 @@ * should be included in the source file that includes this one, above the * include statement. */ -#include "shard/Alias.h" -#include "testing.h" -#include <check.h> -using namespace de; -typedef Alias<R> Shard; +// #include "framework/interface/Record.h" +// #include "shard/Alias.h" +// #include "testing.h" +// #include <check.h> -#include "query/wss.h" +// using namespace de; + +// typedef WeightedRecord<int64_t, int32_t, int32_t> R; +// typedef Alias<R> Shard; + +typedef wss::Query<Shard> Q; START_TEST(t_wss_query) { auto buffer = create_weighted_mbuffer<R>(1000); auto shard = Shard(buffer->get_buffer_view()); - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - wss::Parms<R> parms; + size_t k = 20; + + Q::Parameters parms; parms.rng = rng; - parms.sample_size = 20; + parms.sample_size = k; + + auto query = Q::local_preproc(&shard, &parms); + Q::distribute_query(&parms, {query}, nullptr); + + auto result = Q::local_query(&shard, query); + delete query; + + ck_assert_int_eq(result.size(), k); - auto state = wss::Query<R, Shard>::get_query_state(&shard, &parms); - auto result = wss::Query<R, Shard>::query(&shard, state, &parms); - wss::Query<R, Shard>::delete_query_state(state); delete buffer; gsl_rng_free(rng); @@ -56,83 +68,28 @@ END_TEST START_TEST(t_buffer_wss_query) { auto buffer = create_weighted_mbuffer<R>(1000); - - auto rng = gsl_rng_alloc(gsl_rng_mt19937); - wss::Parms<R> parms; + size_t k = 20; + + Q::Parameters parms; parms.rng = rng; + parms.sample_size = k; { auto view = buffer->get_buffer_view(); - auto state = wss::Query<R, Shard>::get_buffer_query_state(&view, &parms); - auto result = wss::Query<R, Shard>::buffer_query(state, &parms); - wss::Query<R, Shard>::delete_buffer_query_state(state); - - ck_assert_int_eq(result.size(), parms.sample_size); - for (size_t i=0; i<result.size(); i++) { - - } - } - - delete buffer; -} -END_TEST + auto query = Q::local_preproc_buffer(&view, &parms); + Q::distribute_query(&parms, {}, query); + auto result = Q::local_query_buffer(query); - -/* -START_TEST(t_range_query_merge) -{ - auto buffer1 = create_sequential_mbuffer<R>(100, 200); - auto buffer2 = create_sequential_mbuffer<R>(400, 1000); - - auto shard1 = Shard(buffer1->get_buffer_view()); - auto shard2 = Shard(buffer2->get_buffer_view()); - - wss::Parms<R> parms; - parms.lower_bound = 150; - parms.upper_bound = 500; - - size_t result_size = parms.upper_bound - parms.lower_bound + 1 - 200; - - auto state1 = wss::Query<R, Shard>::get_query_state(&shard1, &parms); - auto state2 = wss::Query<R, Shard>::get_query_state(&shard2, &parms); - - std::vector<std::vector<de::Wrapped<R>>> results(2); - results[0] = wss::Query<R, Shard>::query(&shard1, state1, &parms); - results[1] = wss::Query<R, Shard>::query(&shard2, state2, &parms); - - wss::Query<R, Shard>::delete_query_state(state1); - wss::Query<R, Shard>::delete_query_state(state2); - - ck_assert_int_eq(results[0].size() + results[1].size(), result_size); - - std::vector<std::vector<Wrapped<R>>> proc_results; - - for (size_t j=0; j<results.size(); j++) { - proc_results.emplace_back(std::vector<Wrapped<R>>()); - for (size_t i=0; i<results[j].size(); i++) { - proc_results[j].emplace_back(results[j][i]); - } - } - - auto result = wss::Query<R, Shard>::merge(proc_results, nullptr); - std::sort(result.begin(), result.end()); - - ck_assert_int_eq(result.size(), result_size); - auto key = parms.lower_bound; - for (size_t i=0; i<result.size(); i++) { - ck_assert_int_eq(key++, result[i].key); - if (key == 200) { - key = 400; - } + delete query; + ck_assert_int_le(result.size(), k); } - delete buffer1; - delete buffer2; + delete buffer; + gsl_rng_free(rng); } END_TEST -*/ static void inject_wss_tests(Suite *suite) { |