summaryrefslogtreecommitdiffstats
path: root/chapters
diff options
context:
space:
mode:
Diffstat (limited to 'chapters')
-rw-r--r--chapters/beyond-dsp.tex170
-rw-r--r--chapters/future-work.tex4
-rw-r--r--chapters/related-works.tex25
3 files changed, 196 insertions, 3 deletions
diff --git a/chapters/beyond-dsp.tex b/chapters/beyond-dsp.tex
index 66b9d97..ee32bfb 100644
--- a/chapters/beyond-dsp.tex
+++ b/chapters/beyond-dsp.tex
@@ -697,7 +697,6 @@ queries. We'll discuss each of these in this section.
\subsubsection{Record Interface}
-
The record interface is the simplest of the three. Records are C++
structs, and they must implement an equality comparision operator. Beyond
this, the framework places no additional constraints and makes
@@ -706,7 +705,9 @@ etc. Though the records must be fixed length (as they are structs),
variable length data can be supported using off-record storage and
pointers if necessary. Each record is automatically wrapped by the
framework with a header that is used to facilitate deletion support.
-The record concept is shown in Listing~\ref{lst:record}.
+The record concept is shown in Listing~\ref{lst:record}, along with the
+wrapped header type that is used to interact with records within
+the framework.
\begin{lstfloat}
\begin{lstlisting}[language=C++]
@@ -714,15 +715,178 @@ template <typename R>
concept RecordInterface = requires(R r, R s) {
{ r == s } -> std::convertible_to<bool>;
};
+
+
+template <RecordInterface R> struct Wrapped {
+ uint32_t header;
+ R rec;
+
+ inline void set_delete();
+ inline bool is_deleted() const;
+ inline void set_tombstone(bool val);
+ inline bool is_tombstone() const;
+
+ inline bool operator==(const Wrapped &other) const;
+};
\end{lstlisting}
\caption{The required interface for record types in our dynamization framework.}
\label{lst:record}
\end{lstfloat}
-
\subsubsection{Shard Interface}
+
+Our framework's underlying representation of the data structure is called
+a \emph{shard}. The provided shard structure should provide either a full
+implementation of the data structure to be dynamized, or a shim around
+an existing implementation that provides the necessary functions for our
+framework to interact with it. Shards must provide two constructors:
+one from an unsorted set of records, and another from a set of other
+shards of the same type. The second of these constructors is to allow for
+efficient merging to be leveraged for merge decomposable search problems.
+
+Shards can also expose a point lookup operation for use in supporting
+deletes for DDSPs. This function is only used for DDSP deletes, and so can
+be left off when this functionality isn't necessary. If a data structure
+doesn't natively support an efficient point-lookup, then it can be added
+by including a hash table or other data structure in the shard if desired.
+This function accepts a record type as input, and should return a pointer
+to the record that exactly matches the input in storage, if one exists,
+or \texttt{nullptr} if it doesn't. It should also accept an optional
+boolean argument that the framework will pass \texttt{true} into if it
+is don't a lookup for a tombstone. This flag is to allow the shard to
+use various tombstone-related optimization, such as using a Bloom filter
+for them, or storing them seperately from the main records, etc.
+
+Shards should also expose some accessors for basic meta-data about
+its contents. In particular, the framework is reliant upon a function
+that returns the number of records within the shard for planning
+reconstructions, and the number of deleted records or tombstones within
+the shard for use in proactive compaction to bound the number of deleted
+records. The interface also requires functions for accessing memory
+usage information, both the memory use for the main data structure
+being dynamized, and also any auxilliary memory (e.g., memory used
+for an auxiliary hash table). These memory functions are used only for
+informational purposes.
+
+The concept for shard types is shown in Listing~\ref{lst:shard}. Note
+that all records within shards are wrapped by the framework header. It
+is up to the shard to handle the removal of deleted records based on
+this information during reconstruction.
+
+\begin{lstfloat}
+\begin{lstlisting}[language=C++]
+
+template <typename SHARD>
+concept ShardInterface = RecordInterface<typename SHARD::RECORD>
+ && requires(SHARD shard, const std::vector<SHARD *> &shard_vector,
+ bool b, BufferView<typename SHARD::RECORD> bv,
+ typename SHARD::RECORD rec) {
+ {SHARD(shard_vector)};
+ {SHARD(std::move(bv))};
+
+ {
+ shard.point_lookup(rec, b)
+ } -> std::same_as<Wrapped<typename SHARD::RECORD> *>;
+
+ { shard.get_record_count() } -> std::convertible_to<size_t>;
+ { shard.get_tombstone_count() } -> std::convertible_to<size_t>;
+ { shard.get_memory_usage() } -> std::convertible_to<size_t>;
+ { shard.get_aux_memory_usage() } -> std::convertible_to<size_t>;
+
+};
+\end{lstlisting}
+\label{listing:shard}
+\caption{The required interface for shard types in our dynamization
+framework.}
+\end{lstfloat}
+
+
\subsubsection{Query Interface}
+The most complex interface required by the framework is for queries. The
+concept for query types is given in Listing~\ref{listing:query}. In
+effect, it requires implementing the full IDSP interface from the
+previous section, as well as versions of $\mathbftt{local\_preproc}$
+and $\mathbftt{local\query}$ for pre-processing and querying an unsorted
+set of records, which is necessary to allow the mutable buffer to be
+used as part of the query process.\footnote{
+ In the worst case, these routines could construct temporary shard
+ over the mutable buffer, and use this to answer queries.
+} The $\mathbftt{repeat}$ function is necessary even for
+normal eDSP problems, and should just return \texttt{false} with no other
+action in those cases. The interface also allows the user to specify
+whether the query process should abort after the first result is obtained,
+which is a useful optimization for point lookups.
+
+This interface allows for the local and overall query results to be
+independently specified of different types. This can be used for a
+variety of purposes. For example, an invertible range count can have
+a local result that includes both the number of records and the number
+of tombstones, while the query result itself remains a single number.
+Additionally, the framework makes no decision about what, if any,
+collection type should be used for these results. A range scan, for
+example, could specified the result types as a vector of records, map
+of records, etc., depending on the use case.
+
+There are two significant differences between the IDSP interface and
+the query concept implementation. The first is in the way that the query
+result object is passed between the \texttt{combine} and \texttt{repeat}
+function. To avoid copying it, as it can be large for some types of query,
+this object is initialized by the framework and passed by reference into
+these two functions. The second difference is that the \texttt{repeat}
+function is responsible for making any updates to local query objects,
+and that when \texttt{repeat} returns \texttt{true}, the local queries
+will be immediately re-executed. To facilitate this, the \texttt{repeat}
+function takes the local query objects as arguments.
+
+
+\begin{lstfloat}
+
+\begin{lstlisting}[language=C++]
+
+template <typename QUERY, typename SHARD,
+ typename RESULT = typename QUERY::ResultType,
+ typename LOCAL_RESULT = typename QUERY::LocalResultType,
+ typename PARAMETERS = typename QUERY::Parameters,
+ typename LOCAL = typename QUERY::LocalQuery,
+ typename LOCAL_BUFFER = typename QUERY::LocalQueryBuffer>
+concept QueryInterface =
+ requires(PARAMETERS *parameters, LOCAL *local, LOCAL_BUFFER *buffer_query,
+ SHARD *shard, std::vector<LOCAL *> &local_queries,
+ std::vector<LOCAL_RESULT> &local_results, RESULT &result,
+ BufferView<typename SHARD::RECORD> *bv) {
+ {
+ QUERY::local_preproc(shard, parameters)
+ } -> std::convertible_to<LOCAL *>;
+
+ {
+ QUERY::local_preproc_buffer(bv, parameters)
+ } -> std::convertible_to<LOCAL_BUFFER *>;
+
+ { QUERY::distribute_query(parameters, local_queries, buffer_query) };
+
+ { QUERY::local_query(shard, local) } -> std::convertible_to<LOCAL_RESULT>;
+
+ {
+ QUERY::local_query_buffer(buffer_query)
+ } -> std::convertible_to<LOCAL_RESULT>;
+
+ { QUERY::combine(local_results, parameters, result) };
+
+ {
+ QUERY::repeat(parameters, result, local_queries, buffer_query)
+ } -> std::same_as<bool>;
+
+ { QUERY::EARLY_ABORT } -> std::convertible_to<bool>;
+ };
+\end{lstlisting}
+
+\label{listing:query}
+\caption{The required interface for query types in our dynamization
+framework.}
+\end{lstfloat}
+
+
\subsection{Configurability}
\subsection{Concurrency}
diff --git a/chapters/future-work.tex b/chapters/future-work.tex
index 0c766dd..a3fa7aa 100644
--- a/chapters/future-work.tex
+++ b/chapters/future-work.tex
@@ -1,2 +1,6 @@
\chapter{Future Work}
\label{chap:future}
+
+\section{Fault Tolerance and External Structures}
+
+\section{Query Processing}
diff --git a/chapters/related-works.tex b/chapters/related-works.tex
new file mode 100644
index 0000000..0fe4168
--- /dev/null
+++ b/chapters/related-works.tex
@@ -0,0 +1,25 @@
+\chapter{Related Work}
+
+\section{Implementations of Bentley-Saxe}
+
+\subsection{Mantis}
+
+\subsection{Metric Indexing Structures}
+
+\subsection{LSMGraph}
+
+\subsection{PGM}
+
+\section{LSM Tree}
+
+\subsection{Design Space}
+
+\subsection{SILK}
+
+\section{GiST and GIN}
+
+\section{Automated Index Composition}
+\subsection{Periodic Table of Data Structures, etc.}
+\subsection{Gene}
+
+