summaryrefslogtreecommitdiffstats
path: root/include/util
diff options
context:
space:
mode:
Diffstat (limited to 'include/util')
-rw-r--r--include/util/Cursor.h17
-rw-r--r--include/util/SortedMerge.h30
-rw-r--r--include/util/bf_config.h3
-rw-r--r--include/util/types.h10
4 files changed, 42 insertions, 18 deletions
diff --git a/include/util/Cursor.h b/include/util/Cursor.h
index be7ab32..e8ba53d 100644
--- a/include/util/Cursor.h
+++ b/include/util/Cursor.h
@@ -7,15 +7,18 @@
* Distributed under the Modified BSD License.
*
* A simple record cursor type with associated methods for help in
- * merging record sets when constructing shards.
+ * merging record sets when constructing shards. Iterates an array
+ * of records in order, and provides facilities to make sorted merges
+ * easier.
+ *
+ * TODO: Prior versions of this module included automatic support for
+ * working with data stored in PagedFiles as well. That should be
+ * reintroduced at some point.
*/
#pragma once
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/BloomFilter.h"
-#include "psu-ds/PriorityQueue.h"
-#include "psu-util/alignment.h"
+#include <cstdlib>
+#include <vector>
namespace de {
template<typename R>
@@ -64,6 +67,8 @@ template <typename R>
inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) {
const R *min_rec = nullptr;
Cursor<R> *result = nullptr;
+ // FIXME: for large cursor vectors, it may be worth it to use a
+ // PriorityQueue here instead of scanning.
for (size_t i=0; i< cursors.size(); i++) {
if (cursors[i] == (Cursor<R>) {0} ) continue;
diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h
index ed47acb..8a1e782 100644
--- a/include/util/SortedMerge.h
+++ b/include/util/SortedMerge.h
@@ -2,7 +2,6 @@
* include/util/SortedMerge.h
*
* Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu>
- * Dong Xie <dongx@psu.edu>
*
* Distributed under the Modified BSD License.
*
@@ -28,12 +27,26 @@ using psudb::queue_record;
using psudb::byte;
using psudb::CACHELINE_SIZE;
+/*
+ * A simple struct to return record_count and tombstone_count information
+ * back to the caller. Could've been an std::pair, but I like the more
+ * explicit names.
+ */
struct merge_info {
size_t record_count;
size_t tombstone_count;
};
-
+/*
+ * Build a vector of cursors corresponding to the records contained within
+ * a vector of shards. The cursor at index i in the output will correspond
+ * to the shard at index i in the input.
+ *
+ * The values of reccnt and tscnt will be updated with the sum of the
+ * records contained within the shards. Note that these counts include deleted
+ * records that may be removed during shard construction, and so constitute
+ * upper bounds only.
+ */
template <RecordInterface R, ShardInterface<R> S>
static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) {
std::vector<Cursor<Wrapped<R>>> cursors;
@@ -57,7 +70,14 @@ static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards,
}
/*
- *
+ * Build a sorted array of records based on the contents of a BufferView.
+ * This routine does not alter the buffer view, but rather copies the
+ * records out and then sorts them. The provided buffer must be large
+ * enough to store the records from the BufferView, or the behavior of the
+ * function is undefined.
+ *
+ * It allocates a temporary buffer for the sorting, and execution of the
+ * program will be aborted if the allocation fails.
*/
template <RecordInterface R>
static merge_info sorted_array_from_bufferview(BufferView<R> bv,
@@ -94,10 +114,10 @@ static merge_info sorted_array_from_bufferview(BufferView<R> bv,
continue;
}
- // fixme: this shouldn't be necessary, but the tagged record
+ // FIXME: this shouldn't be necessary, but the tagged record
// bypass doesn't seem to be working on this code-path, so this
// ensures that tagged records from the buffer are able to be
- // dropped, eventually. it should only need to be &= 1
+ // dropped, eventually. It should only need to be &= 1
base->header &= 3;
buffer[info.record_count++] = *base;
diff --git a/include/util/bf_config.h b/include/util/bf_config.h
index fdf2195..9f29ed7 100644
--- a/include/util/bf_config.h
+++ b/include/util/bf_config.h
@@ -15,7 +15,7 @@
*/
#pragma once
-#include "psu-util/alignment.h"
+#include <cstdlib>
namespace de {
@@ -30,7 +30,6 @@ static size_t BF_HASH_FUNCS = 7;
* (0, 1), or the behavior of bloom filters is undefined.
*/
static void BF_SET_FPR(double fpr) {
-
BF_FPR = fpr;
}
diff --git a/include/util/types.h b/include/util/types.h
index 3908174..a13bd95 100644
--- a/include/util/types.h
+++ b/include/util/types.h
@@ -10,18 +10,18 @@
* that are defined within the header files that make direct use of them,
* but all generally usable, simple types are defined here.
*
+ * Many of these types were used in the Practical Dynamic Extension for
+ * Sampling Indexes work, particularly for external storage and buffer
+ * pool systems. They aren't used now, but we're leaving them here to use
+ * them in the future, when we add this functionality into this system too.
*/
#pragma once
-#include <cstdlib>
#include <cstdint>
-#include <cstddef>
-#include <string>
+#include <cstdlib>
namespace de {
-using std::byte;
-
/* Represents a page offset within a specific file (physical or virtual) */
typedef uint32_t PageNum;