diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/framework/DynamicExtension.h | 73 | ||||
| -rw-r--r-- | include/framework/interface/Query.h | 5 | ||||
| -rw-r--r-- | include/framework/interface/Record.h | 7 | ||||
| -rw-r--r-- | include/framework/scheduling/Epoch.h | 7 | ||||
| -rw-r--r-- | include/framework/scheduling/FIFOScheduler.h | 9 | ||||
| -rw-r--r-- | include/framework/scheduling/Task.h | 5 | ||||
| -rw-r--r-- | include/framework/scheduling/statistics.h | 5 | ||||
| -rw-r--r-- | include/framework/structure/BufferView.h | 1 | ||||
| -rw-r--r-- | include/framework/structure/ExtensionStructure.h | 64 | ||||
| -rw-r--r-- | include/framework/structure/InternalLevel.h | 6 | ||||
| -rw-r--r-- | include/framework/util/Configuration.h | 37 | ||||
| -rw-r--r-- | include/query/irs.h | 1 | ||||
| -rw-r--r-- | include/query/wirs.h | 1 | ||||
| -rw-r--r-- | include/shard/Alias.h | 1 | ||||
| -rw-r--r-- | include/shard/AugBTree.h | 2 | ||||
| -rw-r--r-- | include/shard/ISAMTree.h | 1 | ||||
| -rw-r--r-- | include/shard/PGM.h | 1 | ||||
| -rw-r--r-- | include/shard/TrieSpline.h | 1 | ||||
| -rw-r--r-- | include/shard/VPTree.h | 39 | ||||
| -rw-r--r-- | include/util/Cursor.h | 17 | ||||
| -rw-r--r-- | include/util/SortedMerge.h | 30 | ||||
| -rw-r--r-- | include/util/bf_config.h | 3 | ||||
| -rw-r--r-- | include/util/types.h | 10 |
23 files changed, 223 insertions, 103 deletions
diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h index e7dd774..473592d 100644 --- a/include/framework/DynamicExtension.h +++ b/include/framework/DynamicExtension.h @@ -13,7 +13,6 @@ #include <cstdio> #include <vector> #include <set> -#include <shared_mutex> #include <mutex> #include "framework/interface/Scheduler.h" @@ -87,10 +86,34 @@ public: } } + /* + * Insert the record `rec` into the index. If the buffer is full and + * the framework is blocking on an epoch transition, this call may fail + * and return 0. In this case, retry the call again later. If + * successful, 1 will be returned. The record will be immediately + * visible in the buffer upon the successful return of this function. + */ int insert(const R &rec) { return internal_append(rec, false); } + /* + * Erase the record `rec` from the index. It is assumed that `rec` + * currently exists--no special checks are made for correctness here. + * The behavior if this function will differ depending on if tombstone + * or tagged deletes are used. + * + * Tombstone deletes - inserts a tombstone record for `rec`. This *may* + * return 0 and fail if the buffer is full and the framework is + * blocking on an epoch transition. In this case, repeat the call + * later. 1 will be returned when the tombstone is successfully + * inserted. + * + * Tagging deletes - Does a point lookup for the record across the + * entire structure, and sets its delete bit when found. Returns 1 if + * the record is found and marked, and 0 if it was not (i.e., if it + * isn't present in the index). + */ int erase(const R &rec) { // FIXME: delete tagging will require a lot of extra work to get // operating "correctly" in a concurrent environment. @@ -121,10 +144,23 @@ public: return internal_append(rec, true); } + /* + * Execute the query with parameters `parms` and return a future. This + * future can be used to access a vector containing the results of the + * query. + * + * The behavior of this function is undefined if `parms` is not a + * pointer to a valid query parameter object for the query type used as + * a template parameter to construct the framework. + */ std::future<std::vector<R>> query(void *parms) { return schedule_query(parms); } + /* + * Returns the number of records (included tagged records and + * tombstones) currently within the framework. + */ size_t get_record_count() { auto epoch = get_active_epoch(); auto t = epoch->get_buffer().get_record_count() + epoch->get_structure()->get_record_count(); @@ -133,6 +169,11 @@ public: return t; } + /* + * Returns the number of tombstone records currently within the + * framework. This function can be called when tagged deletes are used, + * but will always return 0 in that case. + */ size_t get_tombstone_count() { auto epoch = get_active_epoch(); auto t = epoch->get_buffer().get_tombstone_count() + epoch->get_structure()->get_tombstone_count(); @@ -141,6 +182,12 @@ public: return t; } + /* + * Get the number of levels within the framework. This count will + * include any empty levels, but will not include the buffer. Note that + * this is *not* the same as the number of shards when tiering is used, + * as each level can contain multiple shards in that case. + */ size_t get_height() { auto epoch = get_active_epoch(); auto t = epoch->get_structure()->get_height(); @@ -149,6 +196,13 @@ public: return t; } + /* + * Get the number of bytes of memory allocated across the framework for + * storing records and associated index information (i.e., internal + * ISAM tree nodes). This includes memory that is allocated but + * currently unused in the buffer, or in shards themselves + * (overallocation due to delete cancellation, etc.). + */ size_t get_memory_usage() { auto epoch = get_active_epoch(); auto t= epoch->get_buffer().get_memory_usage() + epoch->get_structure()->get_memory_usage(); @@ -157,6 +211,11 @@ public: return t; } + /* + * Get the number of bytes of memory allocated across the framework for + * auxiliary structures. This can include bloom filters, aux + * hashtables, etc. + */ size_t get_aux_memory_usage() { auto epoch = get_active_epoch(); auto t = epoch->get_buffer().get_aux_memory_usage() + epoch->get_structure()->get_aux_memory_usage(); @@ -165,10 +224,22 @@ public: return t; } + /* + * Returns the maximum physical capacity of the buffer, measured in + * records. + */ size_t get_buffer_capacity() { return m_buffer->get_capacity(); } + /* + * Create a new single Shard object containing all of the records + * within the framework (buffer and shards). The optional parameter can + * be used to specify whether the Shard should be constructed with the + * currently active state of the framework (false), or if shard + * construction should wait until any ongoing reconstructions have + * finished and use that new version (true). + */ Shard *create_static_structure(bool await_reconstruction_completion=false) { if (await_reconstruction_completion) { await_next_epoch(); diff --git a/include/framework/interface/Query.h b/include/framework/interface/Query.h index 8cf9660..3d487f0 100644 --- a/include/framework/interface/Query.h +++ b/include/framework/interface/Query.h @@ -9,12 +9,9 @@ #pragma once #include "framework/QueryRequirements.h" -#include <concepts> namespace de{ -// FIXME: The interface is not completely specified yet, as it is pending -// determining a good way to handle additional template arguments -// to get the Shard and Record types into play + template <typename Q, typename R, typename S> concept QueryInterface = requires(void *p, S *sh, std::vector<void*> &s, std::vector<std::vector<Wrapped<R>>> &rv, BufferView<R> *bv) { {Q::get_query_state(sh, p)} -> std::convertible_to<void*>; diff --git a/include/framework/interface/Record.h b/include/framework/interface/Record.h index 29df4b6..5b9f307 100644 --- a/include/framework/interface/Record.h +++ b/include/framework/interface/Record.h @@ -138,7 +138,7 @@ struct CosinePoint{ return true; } - // lexicographic order + /* lexicographic order */ inline bool operator<(const CosinePoint& other) const { for (size_t i=0; i<D; i++) { if (data[i] < other.data[i]) { @@ -182,7 +182,7 @@ struct EuclidPoint{ return true; } - // lexicographic order + /* lexicographic order */ inline bool operator<(const EuclidPoint& other) const { for (size_t i=0; i<D; i++) { if (data[i] < other.data[i]) { @@ -228,7 +228,4 @@ public: private: R *P; }; - - - } diff --git a/include/framework/scheduling/Epoch.h b/include/framework/scheduling/Epoch.h index e58bd11..3ffa145 100644 --- a/include/framework/scheduling/Epoch.h +++ b/include/framework/scheduling/Epoch.h @@ -44,13 +44,6 @@ public: } ~Epoch() { - /* FIXME: this is needed to keep the destructor from sometimes locking - * up here. But there *shouldn't* be any threads waiting on this signal - * at object destruction, so something else is going on here that needs - * looked into - */ - // m_active_cv.notify_all(); - if (m_structure) { m_structure->release_reference(); } diff --git a/include/framework/scheduling/FIFOScheduler.h b/include/framework/scheduling/FIFOScheduler.h index c6baf9b..3ed4f49 100644 --- a/include/framework/scheduling/FIFOScheduler.h +++ b/include/framework/scheduling/FIFOScheduler.h @@ -5,6 +5,15 @@ * * Distributed under the Modified BSD License. * + * This scheduler runs just concurrently, using a standard FIFO queue to + * determine which jobs to run next. If more jobs are scheduled than there + * are available threads, the excess will stall until a thread becomes + * available and then run in the order they were received by the scheduler. + * + * TODO: We need to set up a custom threadpool based on jthreads to support + * thread preemption for a later phase of this project. That will allow us + * to avoid blocking epoch transitions on long-running queries, or to pause + * reconstructions on demand. */ #pragma once diff --git a/include/framework/scheduling/Task.h b/include/framework/scheduling/Task.h index 008f232..d5d4266 100644 --- a/include/framework/scheduling/Task.h +++ b/include/framework/scheduling/Task.h @@ -5,6 +5,11 @@ * * Distributed under the Modified BSD License. * + * An abstraction to represent a job to be scheduled. Currently the + * supported task types are queries and merges. Based on the current plan, + * simple buffer inserts will likely also be made into a task at some + * point. + * */ #pragma once diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h index 50ba196..6c479cd 100644 --- a/include/framework/scheduling/statistics.h +++ b/include/framework/scheduling/statistics.h @@ -5,6 +5,11 @@ * * Distributed under the Modified BSD License. * + * This is a stub for a statistics tracker to be used in scheduling. It + * currently only tracks simple aggregated statistics, but should be + * updated in the future for more fine-grained statistics. These will be + * used for making scheduling decisions and predicting the runtime of a + * given job. */ #pragma once diff --git a/include/framework/structure/BufferView.h b/include/framework/structure/BufferView.h index 4e3de25..9e0872b 100644 --- a/include/framework/structure/BufferView.h +++ b/include/framework/structure/BufferView.h @@ -5,6 +5,7 @@ * * Distributed under the Modified BSD License. * + * TODO: This file is very poorly commented. */ #pragma once diff --git a/include/framework/structure/ExtensionStructure.h b/include/framework/structure/ExtensionStructure.h index 373a1e2..4802bc1 100644 --- a/include/framework/structure/ExtensionStructure.h +++ b/include/framework/structure/ExtensionStructure.h @@ -37,19 +37,23 @@ public: ~ExtensionStructure() = default; /* - * Create a shallow copy of this extension structure. The copy will share references to the - * same levels/shards as the original, but will have its own lists. As all of the shards are - * immutable (with the exception of deletes), the copy can be restructured with reconstructions - * and flushes without affecting the original. The copied structure will be returned with a reference - * count of 0; generally you will want to immediately call take_reference() on it. + * Create a shallow copy of this extension structure. The copy will share + * references to the same levels/shards as the original, but will have its + * own lists. As all of the shards are immutable (with the exception of + * deletes), the copy can be restructured with reconstructions and flushes + * without affecting the original. The copied structure will be returned + * with a reference count of 0; generally you will want to immediately call + * take_reference() on it. * - * NOTE: When using tagged deletes, a delete of a record in the original structure will affect - * the copy, so long as the copy retains a reference to the same shard as the original. This could - * cause synchronization problems under tagging with concurrency. Any deletes in this context will + * NOTE: When using tagged deletes, a delete of a record in the original + * structure will affect the copy, so long as the copy retains a reference + * to the same shard as the original. This could cause synchronization + * problems under tagging with concurrency. Any deletes in this context will * need to be forwarded to the appropriate structures manually. */ ExtensionStructure<R, S, Q, L> *copy() { - auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, m_max_delete_prop); + auto new_struct = new ExtensionStructure<R, S, Q, L>(m_buffer_size, m_scale_factor, + m_max_delete_prop); for (size_t i=0; i<m_levels.size(); i++) { new_struct->m_levels.push_back(m_levels[i]->clone()); } @@ -64,9 +68,9 @@ public: * setting the delete bit in its wrapped header. Returns 1 if a matching * record was found and deleted, and 0 if a matching record was not found. * - * This function will stop after finding the first matching record. It is assumed - * that no duplicate records exist. In the case of duplicates, this function will - * still "work", but in the sense of "delete first match". + * This function will stop after finding the first matching record. It is + * assumed that no duplicate records exist. In the case of duplicates, this + * function will still "work", but in the sense of "delete first match". */ int tagged_delete(const R &rec) { for (auto level : m_levels) { @@ -77,7 +81,7 @@ public: /* * If the record to be erased wasn't found, return 0. The - * DynamicExtension itself will then search the active + * DynamicExtension itself will then search the active * Buffers. */ return 0; @@ -164,21 +168,23 @@ public: } /* - * Validate that no level in the structure exceeds its maximum tombstone capacity. This is - * used to trigger preemptive compactions at the end of the reconstruction process. + * Validate that no level in the structure exceeds its maximum tombstone + * capacity. This is used to trigger preemptive compactions at the end of + * the reconstruction process. */ bool validate_tombstone_proportion() { - long double ts_prop; - for (size_t i=0; i<m_levels.size(); i++) { - if (m_levels[i]) { - ts_prop = (long double) m_levels[i]->get_tombstone_count() / (long double) calc_level_record_capacity(i); - if (ts_prop > (long double) m_max_delete_prop) { - return false; - } - } + long double ts_prop; + for (size_t i = 0; i < m_levels.size(); i++) { + if (m_levels[i]) { + ts_prop = (long double)m_levels[i]->get_tombstone_count() / + (long double)calc_level_record_capacity(i); + if (ts_prop > (long double)m_max_delete_prop) { + return false; + } } + } - return true; + return true; } bool validate_tombstone_proportion(level_index level) { @@ -224,14 +230,14 @@ public: /* * The amount of storage required for the reconstruction accounts * for the cost of storing the new records, along with the - * cost of retaining the old records during the process - * (hence the 2x multiplier). + * cost of retaining the old records during the process + * (hence the 2x multiplier). * - * FIXME: currently does not account for the *actual* size - * of the shards, only the storage for the records + * FIXME: currently does not account for the *actual* size + * of the shards, only the storage for the records * themselves. */ - size_t reccnt = m_levels[i-1]->get_record_count(); + size_t reccnt = m_levels[i - 1]->get_record_count(); if constexpr (L == LayoutPolicy::LEVELING) { if (can_reconstruct_with(i, reccnt)) { reccnt += m_levels[i]->get_record_count(); diff --git a/include/framework/structure/InternalLevel.h b/include/framework/structure/InternalLevel.h index d586869..db38946 100644 --- a/include/framework/structure/InternalLevel.h +++ b/include/framework/structure/InternalLevel.h @@ -6,6 +6,12 @@ * * Distributed under the Modified BSD License. * + * The word `Internal` in this class's name refers to memory. The current + * model, inherited from the framework in Practical Dynamic Extension for + * Sampling Indexes, would use a different ExternalLevel for shards stored + * on external storage. This is a distinction that can probably be avoided + * with some more thought being put into interface design. + * */ #pragma once diff --git a/include/framework/util/Configuration.h b/include/framework/util/Configuration.h index 8e3d20f..65ca181 100644 --- a/include/framework/util/Configuration.h +++ b/include/framework/util/Configuration.h @@ -8,34 +8,29 @@ */ #pragma once -#include <atomic> -#include <numeric> -#include <cstdio> -#include <vector> - -#include "psu-util/timer.h" -#include "psu-ds/Alias.h" +#include <cstdlib> +#include <utility> namespace de { -thread_local size_t sampling_attempts = 0; -thread_local size_t sampling_rejections = 0; -thread_local size_t deletion_rejections = 0; -thread_local size_t bounds_rejections = 0; -thread_local size_t tombstone_rejections = 0; -thread_local size_t buffer_rejections = 0; +static thread_local size_t sampling_attempts = 0; +static thread_local size_t sampling_rejections = 0; +static thread_local size_t deletion_rejections = 0; +static thread_local size_t bounds_rejections = 0; +static thread_local size_t tombstone_rejections = 0; +static thread_local size_t buffer_rejections = 0; /* * thread_local size_t various_sampling_times go here. */ -thread_local size_t sample_range_time = 0; -thread_local size_t alias_time = 0; -thread_local size_t alias_query_time = 0; -thread_local size_t rejection_check_time = 0; -thread_local size_t buffer_sample_time = 0; -thread_local size_t memlevel_sample_time = 0; -thread_local size_t disklevel_sample_time = 0; -thread_local size_t sampling_bailouts = 0; +static thread_local size_t sample_range_time = 0; +static thread_local size_t alias_time = 0; +static thread_local size_t alias_query_time = 0; +static thread_local size_t rejection_check_time = 0; +static thread_local size_t buffer_sample_time = 0; +static thread_local size_t memlevel_sample_time = 0; +static thread_local size_t disklevel_sample_time = 0; +static thread_local size_t sampling_bailouts = 0; enum class LayoutPolicy { diff --git a/include/query/irs.h b/include/query/irs.h index c14d0cf..e2d9325 100644 --- a/include/query/irs.h +++ b/include/query/irs.h @@ -8,7 +8,6 @@ * A query class for independent range sampling. This query requires * that the shard support get_lower_bound(key), get_upper_bound(key), * and get_record_at(index). - * */ #pragma once diff --git a/include/query/wirs.h b/include/query/wirs.h index 4fac7e7..ae82194 100644 --- a/include/query/wirs.h +++ b/include/query/wirs.h @@ -8,7 +8,6 @@ * A query class for weighted independent range sampling. This * class is tightly coupled with include/shard/AugBTree.h, and * so is probably of limited general utility. - * */ #pragma once diff --git a/include/shard/Alias.h b/include/shard/Alias.h index f0d1d59..9275952 100644 --- a/include/shard/Alias.h +++ b/include/shard/Alias.h @@ -10,6 +10,7 @@ * structure. Designed to be used along side the WSS * query in include/query/wss.h * + * TODO: The code in this file is very poorly commented. */ #pragma once diff --git a/include/shard/AugBTree.h b/include/shard/AugBTree.h index 58bd098..54931bd 100644 --- a/include/shard/AugBTree.h +++ b/include/shard/AugBTree.h @@ -10,6 +10,8 @@ * used along side the WIRS query in include/query/wirs.h, but * also supports the necessary methods for other common query * types. + * + * TODO: The code in this file is very poorly commented. */ #pragma once diff --git a/include/shard/ISAMTree.h b/include/shard/ISAMTree.h index 9458b1f..3763271 100644 --- a/include/shard/ISAMTree.h +++ b/include/shard/ISAMTree.h @@ -8,6 +8,7 @@ * * A shard shim around an in-memory ISAM tree. * + * TODO: The code in this file is very poorly commented. */ #pragma once diff --git a/include/shard/PGM.h b/include/shard/PGM.h index 8031870..e2752ef 100644 --- a/include/shard/PGM.h +++ b/include/shard/PGM.h @@ -9,6 +9,7 @@ * A shard shim around the static version of the PGM learned * index. * + * TODO: The code in this file is very poorly commented. */ #pragma once diff --git a/include/shard/TrieSpline.h b/include/shard/TrieSpline.h index f9fb3cb..2a432e8 100644 --- a/include/shard/TrieSpline.h +++ b/include/shard/TrieSpline.h @@ -7,6 +7,7 @@ * * A shard shim around the TrieSpline learned index. * + * TODO: The code in this file is very poorly commented. */ #pragma once diff --git a/include/shard/VPTree.h b/include/shard/VPTree.h index ba13a87..b342fe6 100644 --- a/include/shard/VPTree.h +++ b/include/shard/VPTree.h @@ -9,7 +9,7 @@ * search. * * FIXME: Does not yet support the tombstone delete policy. - * + * TODO: The code in this file is very poorly commented. */ #pragma once @@ -234,13 +234,15 @@ private: } vpnode *build_subtree(size_t start, size_t stop, gsl_rng *rng) { - // base-case: sometimes happens (probably because of the +1 and -1 - // in the first recursive call) + /* + * base-case: sometimes happens (probably because of the +1 and -1 + * in the first recursive call) + */ if (start > stop) { return nullptr; } - // base-case: create a leaf node + /* base-case: create a leaf node */ if (stop - start <= LEAFSZ) { vpnode *node = new vpnode(); node->start = start; @@ -251,26 +253,30 @@ private: return node; } - // select a random element to be the root of the - // subtree + /* + * select a random element to be the root of the + * subtree + */ auto i = start + gsl_rng_uniform_int(rng, stop - start + 1); swap(start, i); - // partition elements based on their distance from the start, - // with those elements with distance falling below the median - // distance going into the left sub-array and those above - // the median in the right. This is easily done using QuickSelect. + /* + * partition elements based on their distance from the start, + * with those elements with distance falling below the median + * distance going into the left sub-array and those above + * the median in the right. This is easily done using QuickSelect. + */ auto mid = (start + 1 + stop) / 2; quickselect(start + 1, stop, mid, m_ptrs[start], rng); - // Create a new node based on this partitioning + /* Create a new node based on this partitioning */ vpnode *node = new vpnode(); node->start = start; - // store the radius of the circle used for partitioning the node. + /* store the radius of the circle used for partitioning the node. */ node->radius = m_ptrs[start]->rec.calc_distance(m_ptrs[mid]->rec); - // recursively construct the left and right subtrees + /* recursively construct the left and right subtrees */ node->inside = build_subtree(start + 1, mid-1, rng); node->outside = build_subtree(mid, stop, rng); @@ -279,6 +285,8 @@ private: return node; } + // TODO: The quickselect code can probably be generalized and moved out + // to psudb-common instead. void quickselect(size_t start, size_t stop, size_t k, Wrapped<R> *p, gsl_rng *rng) { if (start == stop) return; @@ -291,6 +299,8 @@ private: } } + // TODO: The quickselect code can probably be generalized and moved out + // to psudb-common instead. size_t partition(size_t start, size_t stop, Wrapped<R> *p, gsl_rng *rng) { auto pivot = start + gsl_rng_uniform_int(rng, stop - start); double pivot_dist = p->rec.calc_distance(m_ptrs[pivot]->rec); @@ -369,8 +379,5 @@ private: } } } - - }; - } diff --git a/include/util/Cursor.h b/include/util/Cursor.h index be7ab32..e8ba53d 100644 --- a/include/util/Cursor.h +++ b/include/util/Cursor.h @@ -7,15 +7,18 @@ * Distributed under the Modified BSD License. * * A simple record cursor type with associated methods for help in - * merging record sets when constructing shards. + * merging record sets when constructing shards. Iterates an array + * of records in order, and provides facilities to make sorted merges + * easier. + * + * TODO: Prior versions of this module included automatic support for + * working with data stored in PagedFiles as well. That should be + * reintroduced at some point. */ #pragma once -#include "framework/ShardRequirements.h" - -#include "psu-ds/BloomFilter.h" -#include "psu-ds/PriorityQueue.h" -#include "psu-util/alignment.h" +#include <cstdlib> +#include <vector> namespace de { template<typename R> @@ -64,6 +67,8 @@ template <typename R> inline static Cursor<R> *get_next(std::vector<Cursor<R>> &cursors, Cursor<R> *current=nullptr) { const R *min_rec = nullptr; Cursor<R> *result = nullptr; + // FIXME: for large cursor vectors, it may be worth it to use a + // PriorityQueue here instead of scanning. for (size_t i=0; i< cursors.size(); i++) { if (cursors[i] == (Cursor<R>) {0} ) continue; diff --git a/include/util/SortedMerge.h b/include/util/SortedMerge.h index ed47acb..8a1e782 100644 --- a/include/util/SortedMerge.h +++ b/include/util/SortedMerge.h @@ -2,7 +2,6 @@ * include/util/SortedMerge.h * * Copyright (C) 2023 Douglas B. Rumbaugh <drumbaugh@psu.edu> - * Dong Xie <dongx@psu.edu> * * Distributed under the Modified BSD License. * @@ -28,12 +27,26 @@ using psudb::queue_record; using psudb::byte; using psudb::CACHELINE_SIZE; +/* + * A simple struct to return record_count and tombstone_count information + * back to the caller. Could've been an std::pair, but I like the more + * explicit names. + */ struct merge_info { size_t record_count; size_t tombstone_count; }; - +/* + * Build a vector of cursors corresponding to the records contained within + * a vector of shards. The cursor at index i in the output will correspond + * to the shard at index i in the input. + * + * The values of reccnt and tscnt will be updated with the sum of the + * records contained within the shards. Note that these counts include deleted + * records that may be removed during shard construction, and so constitute + * upper bounds only. + */ template <RecordInterface R, ShardInterface<R> S> static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, size_t *reccnt, size_t *tscnt) { std::vector<Cursor<Wrapped<R>>> cursors; @@ -57,7 +70,14 @@ static std::vector<Cursor<Wrapped<R>>> build_cursor_vec(std::vector<S*> &shards, } /* - * + * Build a sorted array of records based on the contents of a BufferView. + * This routine does not alter the buffer view, but rather copies the + * records out and then sorts them. The provided buffer must be large + * enough to store the records from the BufferView, or the behavior of the + * function is undefined. + * + * It allocates a temporary buffer for the sorting, and execution of the + * program will be aborted if the allocation fails. */ template <RecordInterface R> static merge_info sorted_array_from_bufferview(BufferView<R> bv, @@ -94,10 +114,10 @@ static merge_info sorted_array_from_bufferview(BufferView<R> bv, continue; } - // fixme: this shouldn't be necessary, but the tagged record + // FIXME: this shouldn't be necessary, but the tagged record // bypass doesn't seem to be working on this code-path, so this // ensures that tagged records from the buffer are able to be - // dropped, eventually. it should only need to be &= 1 + // dropped, eventually. It should only need to be &= 1 base->header &= 3; buffer[info.record_count++] = *base; diff --git a/include/util/bf_config.h b/include/util/bf_config.h index fdf2195..9f29ed7 100644 --- a/include/util/bf_config.h +++ b/include/util/bf_config.h @@ -15,7 +15,7 @@ */ #pragma once -#include "psu-util/alignment.h" +#include <cstdlib> namespace de { @@ -30,7 +30,6 @@ static size_t BF_HASH_FUNCS = 7; * (0, 1), or the behavior of bloom filters is undefined. */ static void BF_SET_FPR(double fpr) { - BF_FPR = fpr; } diff --git a/include/util/types.h b/include/util/types.h index 3908174..a13bd95 100644 --- a/include/util/types.h +++ b/include/util/types.h @@ -10,18 +10,18 @@ * that are defined within the header files that make direct use of them, * but all generally usable, simple types are defined here. * + * Many of these types were used in the Practical Dynamic Extension for + * Sampling Indexes work, particularly for external storage and buffer + * pool systems. They aren't used now, but we're leaving them here to use + * them in the future, when we add this functionality into this system too. */ #pragma once -#include <cstdlib> #include <cstdint> -#include <cstddef> -#include <string> +#include <cstdlib> namespace de { -using std::byte; - /* Represents a page offset within a specific file (physical or virtual) */ typedef uint32_t PageNum; |