From 402fc269c0aaa671d84a6d15918735ad4b90e6b2 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 9 Feb 2024 12:30:21 -0500 Subject: Comment updates/fixes --- include/util/Cursor.h | 17 +++++++++++------ include/util/SortedMerge.h | 30 +++++++++++++++++++++++++----- include/util/bf_config.h | 3 +-- include/util/types.h | 10 +++++----- 4 files changed, 42 insertions(+), 18 deletions(-) (limited to 'include/util') diff --git a/include/util/Cursor.h b/include/util/Cursor.h index be7ab32..e8ba53d 100644 --- a/include/util/Cursor.h +++ b/include/util/Cursor.h @@ -7,15 +7,18 @@ * Distributed under the Modified BSD License. * * A simple record cursor type with associated methods for help in - * merging record sets when constructing shards. + * merging record sets when constructing shards. Iterates an array + * of records in order, and provides facilities to make sorted merges + * easier. + * + * TODO: Prior versions of this module included automatic support for + * working with data stored in PagedFiles as well. That should be + * reintroduced at some point. */ #pragma once -#include "framework/ShardRequirements.h" - -#include "psu-ds/BloomFilter.h" -#include "psu-ds/PriorityQueue.h" -#include "psu-util/alignment.h" +#include +#include namespace de { template @@ -64,6 +67,8 @@ template inline static Cursor *get_next(std::vector> &cursors, Cursor *current=nullptr) { const R *min_rec = nullptr; Cursor *result = nullptr; + // FIXME: for large cursor vectors, it may be worth it to use a + // PriorityQueue here instead of scanning. for (size_t i=0; i< cursors.size(); i++) { if (cursors[i] == (Cursor) {0} ) continue; diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h index ed47acb..8a1e782 100644 --- a/include/util/SortedMerge.h +++ b/include/util/SortedMerge.h @@ -2,7 +2,6 @@ * include/util/SortedMerge.h * * Copyright (C) 2023 Douglas B. Rumbaugh - * Dong Xie * * Distributed under the Modified BSD License. * @@ -28,12 +27,26 @@ using psudb::queue_record; using psudb::byte; using psudb::CACHELINE_SIZE; +/* + * A simple struct to return record_count and tombstone_count information + * back to the caller. Could've been an std::pair, but I like the more + * explicit names. + */ struct merge_info { size_t record_count; size_t tombstone_count; }; - +/* + * Build a vector of cursors corresponding to the records contained within + * a vector of shards. The cursor at index i in the output will correspond + * to the shard at index i in the input. + * + * The values of reccnt and tscnt will be updated with the sum of the + * records contained within the shards. Note that these counts include deleted + * records that may be removed during shard construction, and so constitute + * upper bounds only. + */ template S> static std::vector>> build_cursor_vec(std::vector &shards, size_t *reccnt, size_t *tscnt) { std::vector>> cursors; @@ -57,7 +70,14 @@ static std::vector>> build_cursor_vec(std::vector &shards, } /* - * + * Build a sorted array of records based on the contents of a BufferView. + * This routine does not alter the buffer view, but rather copies the + * records out and then sorts them. The provided buffer must be large + * enough to store the records from the BufferView, or the behavior of the + * function is undefined. + * + * It allocates a temporary buffer for the sorting, and execution of the + * program will be aborted if the allocation fails. */ template static merge_info sorted_array_from_bufferview(BufferView bv, @@ -94,10 +114,10 @@ static merge_info sorted_array_from_bufferview(BufferView bv, continue; } - // fixme: this shouldn't be necessary, but the tagged record + // FIXME: this shouldn't be necessary, but the tagged record // bypass doesn't seem to be working on this code-path, so this // ensures that tagged records from the buffer are able to be - // dropped, eventually. it should only need to be &= 1 + // dropped, eventually. It should only need to be &= 1 base->header &= 3; buffer[info.record_count++] = *base; diff --git a/include/util/bf_config.h b/include/util/bf_config.h index fdf2195..9f29ed7 100644 --- a/include/util/bf_config.h +++ b/include/util/bf_config.h @@ -15,7 +15,7 @@ */ #pragma once -#include "psu-util/alignment.h" +#include namespace de { @@ -30,7 +30,6 @@ static size_t BF_HASH_FUNCS = 7; * (0, 1), or the behavior of bloom filters is undefined. */ static void BF_SET_FPR(double fpr) { - BF_FPR = fpr; } diff --git a/include/util/types.h b/include/util/types.h index 3908174..a13bd95 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -10,18 +10,18 @@ * that are defined within the header files that make direct use of them, * but all generally usable, simple types are defined here. * + * Many of these types were used in the Practical Dynamic Extension for + * Sampling Indexes work, particularly for external storage and buffer + * pool systems. They aren't used now, but we're leaving them here to use + * them in the future, when we add this functionality into this system too. */ #pragma once -#include #include -#include -#include +#include namespace de { -using std::byte; - /* Represents a page offset within a specific file (physical or virtual) */ typedef uint32_t PageNum; -- cgit v1.2.3