From 402fc269c0aaa671d84a6d15918735ad4b90e6b2 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Fri, 9 Feb 2024 12:30:21 -0500
Subject: Comment updates/fixes

---
 include/util/Cursor.h      | 17 +++++++++++------
 include/util/SortedMerge.h | 30 +++++++++++++++++++++++++-----
 include/util/bf_config.h   |  3 +--
 include/util/types.h       | 10 +++++-----
 4 files changed, 42 insertions(+), 18 deletions(-)

(limited to 'include/util')
diff --git a/include/util/Cursor.h b/include/util/Cursor.h
index be7ab32..e8ba53d 100644
--- a/include/util/Cursor.h
+++ b/include/util/Cursor.h
@@ -7,15 +7,18 @@
  * Distributed under the Modified BSD License.
  *
  * A simple record cursor type with associated methods for help in
- * merging record sets when constructing shards. 
+ * merging record sets when constructing shards. Iterates an array
+ * of records in order, and provides facilities to make sorted merges
+ * easier.
+ *
+ * TODO: Prior versions of this module included automatic support for
+ * working with data stored in PagedFiles as well. That should be
+ * reintroduced at some point.
  */
 #pragma once
 
-#include "framework/ShardRequirements.h"
-
-#include "psu-ds/BloomFilter.h"
-#include "psu-ds/PriorityQueue.h"
-#include "psu-util/alignment.h"
+#include <cstdlib>
+#include <vector>
 
 namespace de {
 template<typename R>
@@ -64,6 +67,8 @@ template <typename R>
 inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) {
     const R *min_rec = nullptr;
     Cursor<R> *result = nullptr;
+    // FIXME: for large cursor vectors, it may be worth it to use a
+    //        PriorityQueue here instead of scanning.
     for (size_t i=0; i< cursors.size(); i++) {
         if (cursors[i] == (Cursor<R>) {0} ) continue;
 
diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h
index ed47acb..8a1e782 100644
--- a/include/util/SortedMerge.h
+++ b/include/util/SortedMerge.h
@@ -2,7 +2,6 @@
  * include/util/SortedMerge.h
  *
  * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> 
- *                    Dong Xie <dongx@psu.edu>
  *
  * Distributed under the Modified BSD License.
  *
@@ -28,12 +27,26 @@ using psudb::queue_record;
 using psudb::byte;
 using psudb::CACHELINE_SIZE;
 
+/*
+ * A simple struct to return record_count and tombstone_count information 
+ * back to the caller. Could've been an std::pair, but I like the more 
+ * explicit names.
+ */
 struct merge_info {
     size_t record_count;
     size_t tombstone_count;
 };
 
-
+/*
+ * Build a vector of cursors corresponding to the records contained within
+ * a vector of shards. The cursor at index i in the output will correspond
+ * to the shard at index i in the input. 
+ *
+ * The values of reccnt and tscnt will be updated with the sum of the
+ * records contained within the shards. Note that these counts include deleted
+ * records that may be removed during shard construction, and so constitute
+ * upper bounds only.
+ */
 template <RecordInterface R, ShardInterface<R> S>
 static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) {
     std::vector<Cursor<Wrapped<R>>> cursors;
@@ -57,7 +70,14 @@ static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards,
 }
 
 /*
- * 
+ * Build a sorted array of records based on the contents of a BufferView.
+ * This routine does not alter the buffer view, but rather copies the
+ * records out and then sorts them. The provided buffer must be large
+ * enough to store the records from the BufferView, or the behavior of the
+ * function is undefined.
+ *
+ * It allocates a temporary buffer for the sorting, and execution of the
+ * program will be aborted if the allocation fails.
  */
 template <RecordInterface R>
 static merge_info sorted_array_from_bufferview(BufferView<R> bv, 
@@ -94,10 +114,10 @@ static merge_info sorted_array_from_bufferview(BufferView<R> bv,
             continue;
         }
 
-        // fixme: this shouldn't be necessary, but the tagged record
+        // FIXME: this shouldn't be necessary, but the tagged record
         // bypass doesn't seem to be working on this code-path, so this
         // ensures that tagged records from the buffer are able to be
-        // dropped, eventually. it should only need to be &= 1
+        // dropped, eventually. It should only need to be &= 1
         base->header &= 3;
         buffer[info.record_count++] = *base;
 
diff --git a/include/util/bf_config.h b/include/util/bf_config.h
index fdf2195..9f29ed7 100644
--- a/include/util/bf_config.h
+++ b/include/util/bf_config.h
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include "psu-util/alignment.h"
+#include <cstdlib>
 
 namespace de {
 
@@ -30,7 +30,6 @@ static size_t BF_HASH_FUNCS = 7;
  * (0, 1), or the behavior of bloom filters is undefined.
  */
 static void BF_SET_FPR(double fpr) {
-
     BF_FPR = fpr;
 }
 
diff --git a/include/util/types.h b/include/util/types.h
index 3908174..a13bd95 100644
--- a/include/util/types.h
+++ b/include/util/types.h
@@ -10,18 +10,18 @@
  * that are defined within the header files that make direct use of them,
  * but all generally usable, simple types are defined here.
  *
+ * Many of these types were used in the Practical Dynamic Extension for
+ * Sampling Indexes work, particularly for external storage and buffer
+ * pool systems. They aren't used now, but we're leaving them here to use
+ * them in the future, when we add this functionality into this system too.
  */
 #pragma once
 
-#include <cstdlib>
 #include <cstdint>
-#include <cstddef>
-#include <string>
+#include <cstdlib>
 
 namespace de {
 
-using std::byte;
-
 /* Represents a page offset within a specific file (physical or virtual) */
 typedef uint32_t PageNum;
 
-- 
cgit v1.2.3