diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2025-05-12 19:59:26 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2025-05-12 19:59:26 -0400 |
| commit | 5ffc53e69e956054fdefd1fe193e00eee705dcab (patch) | |
| tree | 74fd32db95211d0be067d22919e65ac959e4fa46 | |
| parent | 901a04fd8ec9a07b7bd195517a6d9e89da3ecab6 (diff) | |
| download | dissertation-5ffc53e69e956054fdefd1fe193e00eee705dcab.tar.gz | |
Updates
| -rw-r--r-- | chapters/introduction.tex | 42 | ||||
| -rw-r--r-- | chapters/sigmod23/conclusion.tex | 33 | ||||
| -rw-r--r-- | chapters/sigmod23/exp-baseline.tex | 148 | ||||
| -rw-r--r-- | chapters/sigmod23/exp-extensions.tex | 67 | ||||
| -rw-r--r-- | chapters/sigmod23/exp-parameter-space.tex | 205 | ||||
| -rw-r--r-- | chapters/sigmod23/experiment.tex | 115 | ||||
| -rw-r--r-- | chapters/sigmod23/extensions.tex | 106 | ||||
| -rw-r--r-- | chapters/sigmod23/framework.tex | 23 | ||||
| -rw-r--r-- | references/references.bib | 66 |
9 files changed, 531 insertions, 274 deletions
diff --git a/chapters/introduction.tex b/chapters/introduction.tex index a5d9740..7084867 100644 --- a/chapters/introduction.tex +++ b/chapters/introduction.tex @@ -1,6 +1,48 @@ \chapter{Introduction} \label{chap:intro} +Modern relational database management systems (RDBMS) are founded +upon a set-based representation of data~\cite{codd70}. This model is +very flexible and can be used to represent data of a wide variety of +types, from standard tabular information, to vectors, to graphs, and +more. However, this flexibility comes at a significant cost in terms of +its ability to answer queries: the most basic data access operation is +a linear table scan. + +To work around this limitation, RDBMS support the creation of special +data structures called indices, which can be used to accelerate +particular types of query, and feature sophisticated query planning and +optimization systems that can identify opportunities to utilize these +indices~\cite{cowbook}. This approach works well for particular types +of queries for which an index has been designed and integrated into +the database. Unfortunately, many RDBMS only support a very limited +set of indices for accelerating single dimensional range queries and +point-lookups~\cite{mysql-btree-hash, cowbook}. + +This situation is unfortunate, because one of the major challenges +currently facing data systems is the processing of complex analytical +queries of varying types over large sets of data. These queries and +data types are supported, nominally, by a relational database, but +are not well addressed by existing indexing techniques and as a result +have horrible performance. This has led to the development of a variety +of specialized systems for particular types of query, such as spatial +systems~\cite{postgis-doc}, vector databases~\cite{pinecone-db}, and +graph databases~\cite{neptune, neo4j}. + + + + + + +however the cost of this flexibility is + +Modern relational database systems are based upon the fundamental data + + +highly optimized for addressing +particular types of search problems, such as point lookups and range +queries. + One of the major challenges facing current data systems is the processing of complex and varied analytical queries over vast data sets. One commonly used technique for accelerating these queries is the application of data diff --git a/chapters/sigmod23/conclusion.tex b/chapters/sigmod23/conclusion.tex index de6bffc..f350cfd 100644 --- a/chapters/sigmod23/conclusion.tex +++ b/chapters/sigmod23/conclusion.tex @@ -1,17 +1,22 @@ \section{Conclusion}
\label{sec:conclusion}
-This chapter discussed the creation of a framework for the dynamic extension of
-static indexes designed for various sampling problems. Specifically, extensions
-were created for the alias structure (WSS), the in-memory ISAM tree (IRS), and
-the alias-augmented B+tree (WIRS). In each case, the SSIs were extended
-successfully with support for updates and deletes, without compromising their
-sampling performance advantage relative to existing dynamic baselines. This was
-accomplished by leveraging ideas borrowed from the Bentley-Saxe method and the
-design space of the LSM tree to divide the static index into multiple shards,
-which could be individually reconstructed in a systematic fashion to
-accommodate new data. This framework provides a large design space for trading
-between update performance, sampling performance, and memory usage, which was
-explored experimentally. The resulting extended indexes were shown to approach
-or match the insertion performance of the B+tree, while simultaneously
-performing significantly faster in sampling operations under most situations.
+In this chapter, we discussed the creation of a dynamization system +based
upon the Bentley-Saxe method that can be used to create dynamized +sampling
data structures that outperform dynamic baselines and feature a +configurable
design space. Specifically, we discussed dynamized versions +of the alias
structure for weighted set sampling, the alias-augmented +B+tree for
weighted independent range sampling, and the ISAM tree for +independent
range sampling. In each case, the static structures were +dynamized with
support for inserts and deletes without compromising +their query performance
advantage over dynamic baselines, and while
+matching or exceeding the dynamic structures' insertion performance.
+
+The techniques proposed in chapter, however, are limited to a very +specific class
of data structures for addressing a very specific type +of search problem. While these results are promising, they fall short +of a general
solution to data structure dynamization that addresses +the
limitations of classical dynamization techniques discussed in +Chapter~\ref{chap:background}. In the next chapter, we will take several
+of the results of this chapter, generalize them, and apply them to a much
+wider range of data structures.
diff --git a/chapters/sigmod23/exp-baseline.tex b/chapters/sigmod23/exp-baseline.tex index 9e7929c..da62766 100644 --- a/chapters/sigmod23/exp-baseline.tex +++ b/chapters/sigmod23/exp-baseline.tex @@ -1,16 +1,17 @@ \subsection{Comparison to Baselines} -Next, the performance of indexes extended using the framework is compared -against tree sampling on the aggregate B+tree, as well as problem-specific -SSIs for WSS, WIRS, and IRS queries. Unless otherwise specified, IRS and WIRS -queries were executed with a selectivity of $0.1\%$ and 500 million randomly -selected records from the OSM dataset were used. The uniform and zipfian -synthetic datasets were 1 billion records in size. All benchmarks warmed up the -data structure by inserting 10\% of the records, and then measured the -throughput inserting the remaining records, while deleting 5\% of them over the -course of the benchmark. Once all records were inserted, the sampling -performance was measured. The reported update throughputs were calculated using -both inserts and deletes, following the warmup period. +Next, we compared the performance of our dynamized sampling indices with +Olken's method on an aggregate B+Tree. We also examine the query performance +of a single instance of the SSI in question to establish how much query +performance is lost in the dynamization. Unless otherwise specified, +IRS and WIRS queries are run with a selectivity of $0.1\%$. Additionally, +the \texttt{OSM} dataset was downsampled to 500 million records, except +for scalability tests. The synthetic uniform and zipfian datasets were +generated with 1 billion records. As with the previous section, all +benchmarks began by warming up the structure with $10\%$ of the total +records, and then update performance was measured over the insertion of +the remaining records, including a mix of $5\%$ deletes. Query performance +was measured following the insertion of all records. \begin{figure*} \centering @@ -21,15 +22,25 @@ both inserts and deletes, following the warmup period. \caption{Framework Comparisons to Baselines for WSS} \end{figure*} -Starting with WSS, Figure~\ref{fig:wss-insert} shows that the DE-WSS structure -is competitive with the AGG B+tree in terms of insertion performance, achieving -about 85\% of the AGG B+tree's insertion throughput on the Twitter dataset, and -beating it by similar margins on the other datasets. In terms of sampling -performance in Figure~\ref{fig:wss-sample}, it beats the B+tree handily, and -compares favorably to the static alias structure. Figures~\ref{fig:wss-insert-s} -and \ref{fig:wss-sample-s} show the performance scaling of the three structures as -the dataset size increases. All of the structures exhibit the same type of -performance degradation with respect to dataset size. +We'll begin with WSS. Figure~\ref{fig:wss-insert} shows that +\texttt{DE-WSS} achieves about $85\%$ of \texttt{AGG B+Tree}'s insertion +throughput on the \texttt{twitter} dataset, and outright defeats it on the +others. Its sampling performance, shown in Figure~\ref{fig:wss-sample} +is also clearly superior to Olken's method, and is quite close +to a single instance of the alias structure, indicating that the +overhead due to the dynamization is quite low. We also considered +the scalability of \texttt{DE-WSS} as the data size increases, +in Figures~\ref{fig:wss-insert-s} and \ref{fig:wss-sample-s}. These +tests were run with random samples of the \texttt{OSM} dataset of the +specified sizes, and show that \texttt{DE-WSS} maintains its advantage +over \texttt{AGG B+Tree} across a range of data sizes. One interesting +point on Figure~\ref{fig:wss-sample-s} is the final data point for +the alias structure, which is \emph{worse} than \texttt{DE-WSS}. This +point consistently reproduced, and we believe it is because of NUMA. The +2 billion records were large enough that the alias structure built from +them spanned two NUMA nodes on our server, whereas the dynamized structure +was broken into pieces, none of which individually spanned a NUMA node, +resulting in better performance. \begin{figure*} \centering @@ -38,16 +49,22 @@ performance degradation with respect to dataset size. \caption{Framework Comparison to Baselines for WIRS} \end{figure*} -Figures~\ref{fig:wirs-insert} and \ref{fig:wirs-sample} show the performance of -the DE-WIRS index, relative to the AGG B+tree and the alias-augmented B+tree. This -example shows the same pattern of behavior as was seen with DE-WSS, though the -margin between the DE-WIRS and its corresponding SSI is much narrower. -Additionally, the constant factors associated with the construction cost of the -alias-augmented B+tree are much larger than the alias structure. The loss of -insertion performance due to this is seen clearly in Figure~\ref{fig:wirs-insert}, where -the margin of advantage between DE-WIRS and the AGG B+tree in insertion -throughput shrinks compared to the DE-WSS index, and the AGG B+tree's advantage -on the Twitter dataset is expanded. +In Figures~\ref{fig:wirs-insert} and \ref{fig:wirs-sample} we examine +the performed of \texttt{DE-WIRS} compared to \text{AGG B+TreE} and an +alias-augmented B+Tree. We see the same basic set of patterns in this +case as we did with WSS. \texttt{AGG B+Tree} defeats our dynamized +index on the \texttt{twitter} dataset, but loses on the others, in +terms of insertion performance. We can see that the alias-augmented +B+Tree is much more expensive to build than an alias structure, and +so its insertion performance advantage is erroded somewhat compared to +the dynamic structure. For queries we see that the \texttt{AGG B+Tree} +performs similarly for WIRS sampling as it did for WSS sampling, but the +alias-augmented B+Tree structure is quite a bit slower at WIRS than the +alias structure was at WSS. This results in \texttt{DE-WIRS} defeating +the dynamic baseline by less of a margin in this test, but it still is +superior in terms of sampling performance, and is still quite close in +performance to the static structure, indicating relatively low overhead +being introduced by the dynamization. \begin{figure*} \subfloat[Insertion Scalability vs. Baselines]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-sc-irs-insert} \label{fig:irs-insert-s}} @@ -61,38 +78,45 @@ on the Twitter dataset is expanded. \caption{Framework Comparison to Baselines for IRS} \end{figure*} -Finally, Figures~\ref{fig:irs-insert1} and \ref{fig:irs-sample1} show a -comparison of the in-memory DE-IRS index against the in-memory ISAM tree and the AGG -B+tree for answering IRS queries. The cost of bulk-loading the ISAM tree is less -than the cost of building the alias structure, or the alias-augmented B+tree, and -so here DE-IRS defeats the AGG B+tree by wider margins in insertion throughput, -though the margin narrows significantly in terms of sampling performance -advantage. -DE-IRS was further tested to evaluate scalability. -Figure~\ref{fig:irs-insert-s} shows average insertion throughput, -Figure~\ref{fig:irs-delete} shows average delete latency (under tagging), and -Figure~\ref{fig:irs-sample-s} shows average sampling latencies for DE-IRS and -AGG B+tree over a range of data sizes. In all cases, DE-IRS and B+tree show -similar patterns of performance degradation as the datasize grows. Note that -the delete latencies of DE-IRS are worse than AGG B+tree, because of the B+tree's -cheaper point-lookups. +We next considered IRS queries. Figures~\ref{fig:irs-insert1} and +\ref{fig:irs-sample1} show the results of our testing of single-threaded +\texttt{DE-IRS} running in-memory against the in-memory ISAM Tree and +\texttt{AGG B+treE}. The ISAM tree structure can be efficiently bulk-loaded, +which results in a much faster construction time than the alias structure +or alias-augmented B+tree. This gives it a significant update performance +advantage, and we see in Figure~\ref{fig:irs-insert1} that \texttt{DE-IRS} +beats \texttt{AGG B+tree} by a significant margin in terms of insertion +throughput. However, its query performance is significantly worse than +the static baseline, and it defeats the B+tree by only a small margin in +sampling latency across most datasets. Note that the OSM dataset in +these tests is half the size of the synthetic ones, which accounts for +the performance differences. -Figure~\ref{fig:irs-sample-s} -also includes one other point of interest: the sampling performance of -DE-IRS \emph{improves} when the data size grows from one million to ten million -records. While at first glance the performance increase may appear paradoxical, -it actually demonstrates an important result concerning the effect of the -unsorted mutable buffer on index performance. At one million records, the -buffer constitutes approximately 1\% of the total data size; this results in -the buffer being sampled from with greater frequency (as it has more total -weight) than would be the case with larger data. The greater the frequency of -buffer sampling, the more rejections will occur, and the worse the sampling -performance will be. This illustrates the importance of keeping the buffer -small, even when a scan is not used for buffer sampling. Finally, -Figure~\ref{fig:irs-samplesize} shows the decreasing per-sample cost as the -number of records requested by a sampling query grows for DE-IRS, compared to -AGG B+tree. Note that DE-IRS benefits significantly more from batching samples -than AGG B+tree, and that the improvement is greatest up to $k=100$ samples per -query. +We also consider the scalability of inserts, queries, and deletes, of +\texttt{DE-IRS} compared to \texttt{AGG B+tree} across a wide range of +data sizes. Figure~\ref{fig:irs-insert-s} shows that \texttt{DE-IRS}'s +insertion performance scales similarly with datasize as the baseline, and +Figure~\ref{fig:irs-sample-s} tells a similar story for query performance. +Figure~\ref{fig:irs-delete-s} compares the delete performance of the +two structures, where \texttt{DE-IRS} is configured to use tagging. As +expected, the B+tree does perform better here, as it's delete cost is +asymptotically superior to tagging. However, the plot does demonstrate +that tagging delete performance does scale well with data size as well. +Finally, in Figure~\ref{fig:irs-samplesize} shows the effect of sample +set size on average per-sample cost. We see that, for a single sample, +the B+tree is superior to \texttt{DE-IRS} because of the cost of the +preliminary processing that our dynamized structure must do to begin +to answer queries. However, as the sample set size increases, this cost +increasingly begins to pay off, with \texttt{DE-IRS} quickly defeating +the dynamic structure in averge per-sample latency. One other interesting +note is the performance of the static ISAM tree, which begins on-par with +the B+Tree, but also sees an improvement as the sample set size increases. +This is because of cache effects. During the initial tree traversal, both +the B+tree and ISAM tree have a similar number of cache misses. However, +the ISAM tree needs to perform its traversal only once, and then samples +from data that is stored in a compact sorted array, so it benefits strongly +from the cache. Olken's method, in contrast, must perform a full tree +traversal for each sample, so it doesn't see a significant improvement in +per-sample performance as the sample set size grows. diff --git a/chapters/sigmod23/exp-extensions.tex b/chapters/sigmod23/exp-extensions.tex index d929e92..62f15f4 100644 --- a/chapters/sigmod23/exp-extensions.tex +++ b/chapters/sigmod23/exp-extensions.tex @@ -12,29 +12,46 @@ \label{fig:irs-extensions} \end{figure*} -Proof of concept implementations of external and concurrent extensions were -also tested for IRS queries. Figures \ref{fig:ext-sample} and -\ref{fig:ext-insert} show the performance of the external DE-IRS sampling index -against AB-tree. DE-IRS was configured with 4 in-memory levels, using at most -350 MiB of memory in testing, including bloom filters. { -For DE-IRS, the \texttt{O\_DIRECT} flag was used to disable OS caching, and -CGroups were used to limit process memory to 1 GiB to simulate a memory -constrained environment. The AB-tree implementation tested -had a cache, which was configured with a memory budget of 64 GiB. This extra -memory was provided to be fair to AB-tree. Because it uses per-sample -tree-traversals, it is much more reliant on caching for good performance. DE-IRS was -tested without a caching layer.} The tests were performed with 4 billion (80 GiB) -{and 8 billion (162 GiB) uniform and zipfian -records}, and 2.6 billion (55 GiB) OSM records. DE-IRS outperformed the AB-tree -by over an order of magnitude in both insertion and sampling performance. +We also tested our proof-of-concept implementations for external and +concurrent extensions to the dynamization framework, as discussed +in Section~\ref{sec:discussion}. First, we'll consider the external +version of \texttt{DE-IRS}, compared with \texttt{AB-tree}. For this +test, we configured \texttt{DE-IRS} to store the first 4 levels in +memory, and the remainder on disk. This configuration resulted in the +use of at most 350 MiB of memory (including the mutable buffer and +Bloom filters). We used tagging for deletes, to avoid random writes, +but otherwise used the same standardized configuration parameters as +the previous tests. We use \texttt{O\_DIRECT} to disable OS caching and +CGroups to constrain the process to 1 GiB of memory to simulate a memory +constrained environment. \texttt{DE-IRS} did not use any caching layer, +however we did enable a 64 GiB cache for AB-tree. This was to be fair, +as AB-tree requires per-sample tree traversals and thus is much more +reliant on caching for good performance. We performed these tests with +larger data sets: 4 billion and 8 billion record synthetic datasets, +which were 80 GiB and 162 GiB in size respective, and the full 2.6 +billion records of the OSM dataset, which was 55 GiB in size. -Finally, Figures~\ref{fig:con-latency} and \ref{fig:con-tput} show the -multi-threaded insertion performance of the in-memory DE-IRS index with -concurrency support, compared to AB-tree running entirely in memory, using the -synthetic uniform dataset. Note that in Figure~\ref{fig:con-latency}, some of -the AB-tree results are cut off, due to having significantly lower throughput -and higher latency compared with the DE-IRS. Even without concurrent -merging, the framework shows linear scaling up to 4 threads of insertion, -before leveling off; throughput remains flat even up to 32 concurrent -insertion threads. An implementation with support for concurrent merging would -scale even better. +The results of this testing can be see in Figures~\ref{fig:ext-sample} +and \ref{fig:ext-insert}. Despite using significantly less memory and +having no caching layer, \texttt{DE-IRS} was able to handily defeat the +dynamic baseline in both sampling and update performance. + +Finally, we tested the multi-threaded insertion performance of our +in-memory, concurrent implementation of \texttt{DE-IRS} compared to +\texttt{AB-tree} configured to run entirely in memory. We used the +synthetic uniform dataset (1B records) for this testing, and introduced a +slight delay between inserts to avoid bottlenecking on the fetch-and-add +within the mutable buffer. Figure~\ref{fig:con-latency} shows the latency +vs. throughput curves for the two structures. Note that \texttt{AB-tree}'s +results are cut off by the y-axis, as it performs significantly worse than +\texttt{DE-IRS}. Figure~\ref{fig:con-tput} shows the insertion throughput +as additional insertion threads are added. Both plots show linear scaling +up to 3 or 4 threads, before the throughput levels off. Further, even +with as many as 32 threads, the system is able to maintain a stable +insertion throughput. Note that this implementation of concurrency +is incredibly rudamentary, and doesn't take advantage of concurrent +merging opportunities, among other things. An implementation with +support for this will be discussed in Chapter~\ref{chap:tail-latency}, +and shown to perform significantly better. Even with this rudamentary +implementation of concurrency, however, \texttt{DE-IRS} is able to +outperform \texttt{AB-tree} under all conditions tested. diff --git a/chapters/sigmod23/exp-parameter-space.tex b/chapters/sigmod23/exp-parameter-space.tex index d2057ac..d53c592 100644 --- a/chapters/sigmod23/exp-parameter-space.tex +++ b/chapters/sigmod23/exp-parameter-space.tex @@ -1,105 +1,156 @@ -\subsection{Framework Design Space Exploration} +\subsection{Design Space Exploration} \label{ssec:ds-exp} -The proposed framework brings with it a large design space, described in -Section~\ref{ssec:design-space}. First, this design space will be examined -using a standardized benchmark to measure the average insertion throughput and -sampling latency of DE-WSS at several points within this space. Tests were run -using a random selection of 500 million records from the OSM dataset, with the -index warmed up by the insertion of 10\% of the total records prior to -beginning any measurement. Over the course of the insertion period, 5\% of the -records were deleted, except for the tests in -Figures~\ref{fig:insert_delete_prop}, \ref{fig:sample_delete_prop}, and -\ref{fig:bloom}, in which 25\% of the records were deleted. Reported update -throughputs were calculated using both inserts and deletes, following the -warmup period. The standard values -used for parameters not being varied in a given test were $s = 6$, $N_b = -12000$, $k=1000$, and $\delta = 0.05$, with buffer rejection sampling. +Our proposed framework has a large design space, which we briefly +described in Section~\ref{ssec:design-space}. The contents of this +space will be described in much more detail in Chapter~\ref{chap:design-space}, +but as part of this work we did perform an experimental examination of our +framework to compare insertion throughput and query latency over various +points within the space. + +We examined this design space by considering \texttt{DE-WSS} specifically, +using a random sample of $500,000,000$ records from the \texttt{OSM} +dataset. Prior to taking any measurements, we warmed the structure up by +inserting 10\% of the total records in the set. We then measured the +update throughput over the course of the insertion of the remaining +records, randomly intermixing delete operations of 5\% of the +total data. In the tests for Figures~\ref{fig:insert_delete_prop}, +\ref{fig:sample_delete_prop}, and \ref{fig:bloom}, we instead deleted +25\% of the data. + +The reported update throughputs were calculated based on all of the +inserts and deletes following the warmup, executed on a single thread. +Query latency numbers were measured after all of the inserts and +deletes had been completed. We used standardized values of $s = 6$, +$N_b = 12000$, $k = 1000$ and $\delta = 0.05$ for parameters not be +varied in a given test, and all buffer queries were answered using +rejection sampling. We show the results of this testing in +Figures~\ref{fig:parameter-sweeps1}, \ref{fig:parameter-sweeps2}, and +\ref{fig:parameter-sweeps3}. \begin{figure*} \centering \subfloat[Insertion Throughput vs. Mutable Buffer Capacity]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-mt-insert} \label{fig:insert_mt}} - \subfloat[Insertion Throughput vs. Scale Factor]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-sf-insert} \label{fig:insert_sf}} \\ + \subfloat[Insertion Throughput vs. Scale Factor]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-sf-insert} \label{fig:insert_sf}} \\ - \subfloat[Insertion Throughput vs.\\Max Delete Proportion]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-tp-insert} \label{fig:insert_delete_prop}} - \subfloat[Per 1000 Sampling Latency vs.\\Mutable Buffer Capacity]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-mt-sample} \label{fig:sample_mt}} \\ + \subfloat[Per 1000 Sampling Latency vs.\\Mutable Buffer Capacity]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-mt-sample} \label{fig:sample_mt}} + \subfloat[Per 1000 Sampling Latency vs. Scale Factor]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-sf-sample} \label{fig:sample_sf}} - \caption{DE-WSS Design Space Exploration I} + \caption{DE-WSS Design Space Exploration: Major Parameters} \label{fig:parameter-sweeps1} \end{figure*} -The results of this testing are displayed in -Figures~\ref{fig:parameter-sweeps1},~\ref{fig:parameter-sweeps2},~and:wq~\ref{fig:parameter-sweeps3}. -The two largest contributors to differences in performance were the selection -of layout policy and of delete policy. Figures~\ref{fig:insert_mt} and -\ref{fig:insert_sf} show that the choice of layout policy plays a larger role -than delete policy in insertion performance, with tiering outperforming -leveling in both configurations. The situation is reversed in sampling -performance, seen in Figure~\ref{fig:sample_mt} and \ref{fig:sample_sf}, where -the performance difference between layout policies is far less than between -delete policies. +We first note that the two largest contributors to performance +differences across all of the tests was the selection of layout and delete +policy. In particular, Figures~\ref{fig:insert_mt} and \ref{fig:insert_sf} +demonstrate that layout policy plays a very significant role in insertion +performance, with tiering outperforming leveling for both delete +policies. The next largest effect was the delete policy selection, +with tombstone deletes outperforming tagged deletes in insertion +performance. This result aligns with the asymptotic analysis of the two +approaches in Section~\ref{sampling-deletes}. It is interesting to note +however that the effect of layout policy was more significant in these +particular tests,\footnote{ + Although the largest performance gap in absolute terms was between + tiering with tombstones and tiering with tagging, the selection of + delete policy was not enough to overcome the relative difference + between leveling and tiering in these tests, hence us labeling the + layout policy as more significant. +} despite both layout policies having the same asymptotic performance. +This was likely due to the small amount of deletes (only 5\% of the total +operations) reducing their effect on the overall throughput. -The values used for the scale factor and buffer size have less influence than -layout and delete policy. Sampling performance is largely independent of them -over the ranges of values tested, as shown in Figures~\ref{fig:sample_mt} and -\ref{fig:sample_sf}. This isn't surprising, as these parameters adjust the -number of shards, which only contributes to shard alias construction time -during sampling and is is amortized over all samples taken in a query. The -buffer also contributes rejections, but the cost of a rejection is small and -the buffer constitutes only a small portion of the total weight, so these are -negligible. However, under tombstones there is an upward trend in latency with -buffer size, as delete checks occasionally require a full buffer scan. The -effect of buffer size on insertion is shown in Figure~\ref{fig:insert_mt}. -{ There is only a small improvement in insertion performance as the mutable -buffer grows. This is because a larger buffer results in fewer reconstructions, -but these reconstructions individually take longer, and so the net positive -effect is less than might be expected.} Finally, Figure~\ref{fig:insert_sf} -shows the effect of scale factor on insertion performance. As expected, tiering -performs better with higher scale factors, whereas the insertion performance of -leveling trails off as the scale factor is increased, due to write -amplification. +The influence of scale factor on update performance is shown in +Figure~\ref{fig:insert_sf}. The effect is different depending on the +layout policy, with larger scale factors benefitting update performance +under tiering, and hurting it under leveling. The effect of the mutable +buffer size on insertion, shown in Figure~\ref{fig:insert_mt}, is a little +less clear, but does show a slight upward trend, with larger buffers +enhancing update performance in all cases. A larger buffer results in +fewer reconstructions, but increases the size of these reconstructions, +so the effect isn't as large as one might initially expect. + +Query performance follows broadly opposite trends to updates. We see in +Figures~\ref{fig:sample_sf} and \ref{fig:sample_mt} that query latency +is better under leveling than tiering, and that tagging is better than +tombstones. More interestingly, the relative effect of the two decisions +is also different. Here, the selection of delete policy has a larger +effect than layout policy, in the sense that the better layout policy +(leveling) with the worse delete policy (tombstones), loses to the worse +layout policy (tiering) with the better delete policy (tagging). In fact, +under tagging, the performance difference between the two layout policies +is almost indistinguishable. + +Scale factor, shown in Figure~\ref{fig:sample_sf} has very little +effect on query performance. Thus, in this context, is would appear +that the scale factor is primarily useful as an insertion performance +tuning tool. The mutable buffer size, in Figure~\ref{fig:sample_mt}, +also generally has no clear effect. This is expected, because the buffer +contains onyl a small number of records relative to the entire dataset, +and so has a fairly low probability of being selected for drawing +a sample from. Even when it is selected, rejection sampling is very +inexpensive. The one exception to this trend is when using tombstones, +where the query performance degrades as the buffer size grows. This is +because the rejection check process for tombstones requires doing a full +buffer scan for every sample in some cases. \begin{figure*} \centering - \subfloat[Per 1000 Sampling Latency vs. Scale Factor]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-sf-sample} \label{fig:sample_sf}} + \subfloat[Insertion Throughput vs.\\Max Delete Proportion]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-tp-insert} \label{fig:insert_delete_prop}} \subfloat[Per 1000 Sampling Latency vs. Max Delete Proportion]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-tp-sample}\label{fig:sample_delete_prop}} \\ - \caption{DE-WSS Design Space Exploration II} + \caption{DE-WSS Design Space Exploration: Delete Bounding} \label{fig:parameter-sweeps2} \end{figure*} -Figures~\ref{fig:insert_delete_prop} and \ref{fig:sample_delete_prop} show the -cost of maintaining $\delta$ with a base delete rate of 25\%. The low cost of -an in-memory sampling rejection results in only a slight upward trend in the -sampling latency as the number of deleted records increases. While compaction -is necessary to avoid pathological cases, there does not seem to be a -significant benefit to aggressive compaction thresholds. -Figure~\ref{fig:insert_delete_prop} shows the effect of compactions on insert -performance. There is little effect on performance under tagging, but there is -a clear negative performance trend associated with aggressive compaction when -using tombstones. Under tagging, a single compaction is guaranteed to remove -all deleted records on a level, whereas with tombstones a compaction can -cascade for multiple levels before the delete bound is satisfied, resulting in -a larger cost per incident. +We also considered the effect that bounding the proportion of deleted +records within the structure has on performance. In these tests, +25\% of all records were eventually deleted over the course of the +benchmark. Figure~\ref{fig:sample_delete_prop} shows the effect +that maintaining these bounds has on query performance. In our +testing, we saw very little benefit to maintaining more aggressive +bounds on deletes on query performance. This is likely because +the cost of rejecting is relatively small in our query model. It +does have a clear effect on insertion performance, though, as shown +in Figure~\ref{fig:insert_delete_prop}. Under tagging, the cost of +maintaining increasingly tight bounds on deleted records is small, likely +because all deleted records can be dropped by a single reconstruction. +This means both that a violation of the bound can be resolved in a single +compaction, and also that violations of the bound are much less likely to +occur, as each reconstruction removes all deleted records. Tombstone-based +deletes require far more work to remove from the structure, and so we +would expect to see a degradation of insertion performance. Interestingly, +we see the opposite--higher bounds result in improved performance. This is +because of the sheer volume of deleted records having a measurable effect +on the size of the dynamized structure. The more proactive compactions +prune these records, resulting in better performance. \begin{figure*} \centering \subfloat[Sampling Latency vs. Sample Size]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-samplesize} \label{fig:sample_k}} \subfloat[Per 1000 Sampling Latency vs. Bloom Filter Memory]{\includegraphics[width=.5\textwidth]{img/sigmod23/plot/fig-ps-wss-bloom}\label{fig:bloom}} \\ - \caption{DE-WSS Design Space Exploration III} + \caption{DE-WSS Design Space Exploration: Misc.} \label{fig:parameter-sweeps3} \end{figure*} -Figure~\ref{fig:bloom} demonstrates the trade-off between memory usage for -Bloom filters and sampling performance under tombstones. This test was run -using 25\% incoming deletes with no compaction, to maximize the number of -tombstones within the index as a worst-case scenario. As expected, allocating -more memory to Bloom filters, decreasing their false positive rates, -accelerates sampling. Finally, Figure~\ref{fig:sample_k} shows the relationship -between average per sample latency and the sample set size. It shows the effect -of amortizing the initial shard alias setup work across an increasing number of -samples, with $k=100$ as the point at which latency levels off. +Finally, we consider two more parameters: memory usage for bloom filters +and the effect of sample set size on query latency. Figure~\ref{fig:bloom} +shows the trade-off between memory allocated to filters and sampling +performance when tombstones are used. Recall that these Bloom filters +are specifically used for tombstones, not for general records, and +are used to accelerate rejection checks of sampled records. In this +test, 25\% of all records were deleted and $\delta$ was set to 0 to +disable all proactive compaction, to present a worst-case scenario in +terms of tombstones. Allocating additional memory to the Bloom filters +decreases their false positive rates, and results in better sampling +performance. Finally, Figure~\ref{fig:sample_k} compares the sample set +size and the average latency of drawing a single sample, to demonstrate +the ability of our procedure to amortize the preliminary work across +multiple samples in a sample set. After a sample set size of $k=100$, +we stop seeing a benefit from increasing the size, indicating the limit +of how much the preliminary work can be effectively amortized. -Based upon these results, a set of parameters was established for the extended -indexes, which is used in the next section for baseline comparisons. This -standard configuration uses tagging as the delete policy and tiering as the -layout policy, with $k=1000$, $N_b = 12000$, $\delta = 0.05$, and $s = 6$. +Based upon the results of this preliminary study, we established a set +of standardized parameters to use for the baseline comparisons in the +remainder of this section. We will use tagging for deletes, tiering as +the layout policy, $k=1000$, $N_b = 12000$, $\delta = 0.5$, and $s = +6$, unless otherwise stated. diff --git a/chapters/sigmod23/experiment.tex b/chapters/sigmod23/experiment.tex index 75cf32e..4dbb4c2 100644 --- a/chapters/sigmod23/experiment.tex +++ b/chapters/sigmod23/experiment.tex @@ -1,18 +1,36 @@ \section{Evaluation} \label{sec:experiment} +In this section, we provide comprehensive performance benchmarks +of implementations of the dynamized structures discussed in +Sections~\ref{sec:instance} and \ref{sec:discussion}. All of the code was +written using C++17. The full implementations, including benchmarking +code, are available on GitHub on the Modified BSD License, at +\url{https://github.com/psu-db/sampling-extension-original}.\footnote{ + We also provide a ``cleaner'' implementation for WSS and WIRS, + with a structure and nomenclature better aligned with this + chapter, here: \url{https://github.com/psu-db/sampling-extension}. +} -\Paragraph{Experimental Setup.} All experiments were run under Ubuntu 20.04 LTS -on a dual-socket Intel Xeon Gold 6242R server with 384 GiB of physical memory -and 40 physical cores. External tests were run using a 4 TB WD Red SA500 SATA -SSD, rated for 95000 and 82000 IOPS for random reads and writes respectively. - -\Paragraph{Datasets.} Testing utilized a variety of synthetic and real-world -datasets. For all datasets used, the key was represented as a 64-bit integer, -the weight as a 64-bit integer, and the value as a 32-bit integer. Each record -also contained a 32-bit header. The weight was omitted from IRS testing. -Keys and weights were pulled from the dataset directly, and values were -generated separately and were unique for each record. The following datasets -were used, +\Paragraph{Experimental Setup.} We ran all of our experiments on Ubuntu +20.04 LTS using a server equipped with dual socket Intel Xeon Gold 6242R +processes with 40 physical cores and 384 GiB of physical memory. We +performed testing of external structures with a 4 TB WD Red SA500 SATA +drive rated at 95000 IOPS for random reads and 82000 IOPS for random +writes All benchmarking code was compiled with GCC version 11.3.0 with +the \texttt{-O3} optimization level. + +\Paragraph{Datasets.} We used a variety of synthetic and real-world +datasets of various distributions to test sampling performance. For all +of our datasets, we treated the data as a sequence of key-value pairs +with a 64-bit integer key and a 32-bit integer value. Our dynamizations +introduced a 32-bit header to each record as well. This header was not +added to records when testing dynamic baselines. Additionally, weighted +testing attached a 64-bit integer weight to each record. This weight was +not included in the record for non-weighted testing. The weights and +keys were both used directly from the datasets, and values were added +seperately and unique to each record. + +We used the following datasets for testing, \begin{itemize} \item \textbf{Synthetic Uniform.} A non-weighted, synthetically generated list of keys drawn from a uniform distribution. @@ -23,26 +41,57 @@ were used, \item \textbf{Delicious~\cite{data-delicious}.} $33.7$ million URLs, represented using unique integers, weighted by the number of associated tags. \item \textbf{OSM~\cite{data-osm}.} $2.6$ billion geospatial coordinates for points - of interest, collected by OpenStreetMap. The latitude, converted - to a 64-bit integer, was used as the key and the number of + of interest, collected by OpenStreetMap. We used the latitude, converted + to a 64-bit integer, as the key and the number of its associated semantic tags as the weight. \end{itemize} -The synthetic datasets were not used for weighted experiments, as they do not -have weights. For unweighted experiments, the Twitter and Delicious datasets -were not used, as they have uninteresting key distributions. - -\Paragraph{Compared Methods.} In this section, indexes extended using the -framework are compared against existing dynamic baselines. Specifically, DE-WSS -(Section~\ref{ssec:wss-struct}), DE-IRS (Section~\ref{ssec:irs-struct}), and -DE-WIRS (Section~\ref{ssec:irs-struct}) are examined. In-memory extensions are -compared against the B+tree with aggregate weight tags on internal nodes (AGG -B+tree) \cite{olken95} and concurrent and external extensions are compared -against the AB-tree \cite{zhao22}. Sampling performance is also compared against -comparable static sampling indexes: the alias structure \cite{walker74} for WSS, -the in-memory ISAM tree for IRS, and the alias-augmented B+tree \cite{afshani17} -for WIRS. Note that all structures under test, with the exception of the -external DE-IRS and external AB-tree, were contained entirely within system -memory. All benchmarking code and data structures were implemented using C++17 -and compiled using gcc 11.3.0 at the \texttt{-O3} optimization level. The -extension framework itself, excluding the shard implementations and utility -headers, consisted of a header-only library of about 1200 SLOC. + +We did not use the synthetic uniform and zipfian data sets for testing +WSS and WIRS, as these datasets lacked weights. We also did not use the +Twitter and Delicious datasets for unweighted testing, as they have +uninteresting key distributions. + +\Paragraph{Structures Compared.} As a basis of comparison, we tested +both our dynamized SSI implementations, and existing dynamic baselines, +for each sampling problem considered. Specifically, we consider a the +following dynamized structures, +\begin{itemize} + +\item \textbf{DE-WSS.} An implementation of the dynamized alias +structure~\cite{walker74} for weighted set sampling discussed +in Section~\ref{ssec:wss-struct}. We compare this against a WSS +implementation of Olken's method on a B+Tree with aggregate weight tags +(\textbf{AGG-BTree})~\cite{olken95}, based on the B+tree implementation +in the TLX library~\cite{tlx}. + +\item \textbf{DE-IRS.} An implementation of the dynamized ISAM tree for +independent range sampling, discussed in Section~\ref{ssec:irs-struct}. We +also implement a concurrent version based on our discussion in +Section~\ref{ssec:ext-concurrency} and an external version from +Section~\ref{ssec:ext-external}. We compare the external and concurrent +versions against the AB-tree~\cite{zhao22}, and the single-threaded, +in memory version was compare with an IRS implementation of Olken's +method on an AGG-BTree. + +\item \textbf{DE-WIRS.} An implementation of the dynamized alias-augmented +B+Tree~\cite{afshani17} as discussed in Section~\ref{ssec:wirs-struct} for +weighted indepedent range sampling. We compare this against a WIRS +implementation of Olken's method on an AGG-BTree. + +\end{itemize} + +All of the tested structures, with the exception of the external memory +DE-IRS implementation and AB-Tree, were wholely contained within system +memory. AB-Tree is a native external structure, so for the in-memory +concurrency evaluation we configured it with enough cache to maintain +the entire structure in memory to simulate an in-memory implementation.\footnote{ + Because of the nature of sampling queries, traditional + efficient locking techniques for B+Trees are not able to be + used~\cite{zhao22}. The alternatives were to run AB-Tree in this + manner, or to globally lock the B+Tree for every operation. We + elected to use the former approach for this chapter. We used the + latter approach in the next chapter. +} + + + diff --git a/chapters/sigmod23/extensions.tex b/chapters/sigmod23/extensions.tex index 2752b0f..06d55a5 100644 --- a/chapters/sigmod23/extensions.tex +++ b/chapters/sigmod23/extensions.tex @@ -1,20 +1,31 @@ \captionsetup[subfloat]{justification=centering} -\section{Extensions to the Framework} +\section{Extensions} \label{sec:discussion} -While this chapter has thus far discussed single-threaded, in-memory data -structures, the framework as proposed can be easily extended to support -other use-cases. In this section, we discuss extending this framework -to support concurrency and external data structures. +While this chapter has thus far discussed single-threaded, in-memory +data structures, our technique can be easily extended to support other +use-cases. In this section, we will discuss extensions to support +concurrency and external data structures. +\subsection{External Data Structures} +\label{ssec:ext-external} -\Paragraph{Larger-than-Memory Data.} Our dynamization techniques, -as discussed thus far, can easily accomodate external data structures +Our dynamization techniques can easily accomodate external data structures as well as in-memory ones. To demonstrate this, we have implemented a dynamized version of an external ISAM tree for use in answering IRS queries. The mutable buffer remains an unsorted array in memory, however -the shards themselves can either \emph{either} an in-memory ISAM tree -or an external one. Our system allows for a user-configurable number of -shards and the rest on disk, for performance tuning purposes. +the shards themselves can be \emph{either} an in-memory ISAM tree, or an +external one. Our system allows for a user-configurable number of shards +to reside in memory, and the rest on disk. This allows for the smallest +few shards, which sustain the most reconstructions, to reside in memory +for performance, while storing most of the data on disk, in an attempt +to get the best of both worlds, so to speak.\footnote{ + In traditional LSM Trees, which are an external data structure, + only the memtable resides in memory. We have decided to break with + this model because, for query performance reasons, the mutable + buffer must remain small. By placing a few levels in memory, the + performance effects of frequent buffer flushes can be mitigated. This + isn't strictly necessary, however. +} The on-disk shards are built from standard ISAM trees using $8$ KiB page-aligned internal and leaf nodes. To avoid random writes, we only @@ -25,43 +36,50 @@ when it is not located. However, because of the geometric growth rate of the shards, at any given time the majority of the data will be on disk anyway, so this would only provide a marginal improvement. -Our implementation does not include a buffer manager, for simplicty. The -external interface requires passing in page-aligned buffers. +\subsection{Distributed Data Structures} +Many distributed data processing systems are built on immutable +abstractions, such Apache Spark's resilient distributed dataset +(RDD)~\cite{rdd} or the Hadoop file system's (HDFS) append-only +files~\cite{hadoop}. Each shard can be encapsulated within an HDFS +file or a Spark RDD, and a centralized control node can manage the +mutable buffer. Flushing this buffer would create a new file/RDD, and +reconstructions could likewise be performed by creating new immutable +structures through the merging of existing ones, using the same basic +scheme as has already been discussed in this chapter. Using thes tools, +SSIs over datasets that exceed the capacity of a single node could be +supported. Such distributed SSIs do exist, such as the RDD-based sampling +structure using in XDB~\cite{li19}. +\subsection{Concurrency} +\label{ssec:ext-concurrency} +Because our dynamization technique is built on top of static data +structures, a limited form of concurrency support is straightforward to +implement. To that end, created a proof-of-concept dynamization of an +ISAM Tree for IRS based on a simplified version of a general concurrency +controlled scheme for log-structured data stores~\cite{golan-gueta15}. -\Paragraph{Applications to distributed data structures.} -Many distributed file-systems are built on immutable abstracted, such -Apache Spark's resilient distributed dataset (RDD)~\cite{rdd} or Hadoop's -immutable +First, we restrict ourselves to tombstone deletes. This ensures that +all the static data structures within our dynamization are also immutable. +When using tagging, the deleted flags on records in these structures could +be dynamically updated, leading to possible synchronization issues. While +this isn't a fundamentally unsolvable problem, and could be addressed +simply through the use of a timestamp in the header of the records, we +decided to keep things simple and implement our concurrency scheme on the +assumption of full shard immutability. +Given this immutability, we can construct a simple versioning system over +the entire structure. Reconstructions can be performed in the background +and then ``activated'' atomically by using a simple compare-and-swap of +a pointer to the entire structure. Reference counting can then be used +to automatically free old versions of the structure when all queries +accessing them have finished. -Because the framework maintains immutability of shards, it is also well suited for -use on top of distributed file-systems or with other distributed data -abstractions like RDDs in Apache Spark~\cite{rdd}. Each shard can be -encapsulated within an immutable file in HDFS or an RDD in Spark. A centralized -control node or driver program can manage the mutable buffer, flushing it into -a new file or RDD when it is full, merging with existing files or RDDs using -the same reconstruction scheme already discussed for the framework. This setup -allows for datasets exceeding the capacity of a single node to be supported. As -an example, XDB~\cite{li19} features an RDD-based distributed sampling -structure that could be supported by this framework. - -\Paragraph{Concurrency.} The immutability of the majority of the structures -within the index makes for a straightforward concurrency implementation. -Concurrency control on the buffer is made trivial by the fact it is a simple, -unsorted array. The rest of the structure is never updated (aside from possible -delete tagging), and so concurrency becomes a simple matter of delaying the -freeing of memory used by internal structures until all the threads accessing -them have exited, rather than immediately on merge completion. A very basic -concurrency implementation can be achieved by using the tombstone delete -policy, and a reference counting scheme to control the deletion of the shards -following reconstructions. Multiple insert buffers can be used to improve -insertion throughput, as this will allow inserts to proceed in parallel with -merges, ultimately allowing concurrency to scale up to the point of being -bottlenecked by memory bandwidth and available storage. This proof-of-concept -implementation is based on a simplified version of an approach proposed by -Golan-Gueta et al. for concurrent log-structured data stores -\cite{golan-gueta15}. - +The buffer itself is an unsorted array, so a query can capture a +consistent and static version by storing the tail pointer at the time +the query begins. New inserts can be performed concurrently by doing +a fetch-and-and on the tail. By using multiple buffers, inserts and +reconstructions can proceed, to some extent, in parallel, which helps to +hide some of the insertion tail latency due to blocking on reconstructions +during a buffer flush. diff --git a/chapters/sigmod23/framework.tex b/chapters/sigmod23/framework.tex index 89f15c3..0f3fac8 100644 --- a/chapters/sigmod23/framework.tex +++ b/chapters/sigmod23/framework.tex @@ -232,12 +232,13 @@ or are naturally determined as part of the pre-processing, and thus the $W(n)$ term can be merged into $P(n)$. \subsection{Supporting Deletes} +\ref{ssec:sampling-deletes} As discussed in Section~\ref{ssec:background-deletes}, the Bentley-Saxe method can support deleting records through the use of either weak -deletes, or a secondary ghost structure, assume certain properties are +deletes, or a secondary ghost structure, assuming certain properties are satisfied by either the search problem or data structure. Unfortunately, -neither approach can work as a "drop-in" solution in the context of +neither approach can work as a ``drop-in'' solution in the context of sampling problems, because of the way that deleted records interact with the sampling process itself. Sampling problems, as formalized here, are neither invertable, nor deletion decomposable. In this section, @@ -258,9 +259,9 @@ the structure with a tombstone bit set in the header. This mechanism is used to support \emph{ghost structure} based deletes. \end{enumerate} -Broadly speaking, for sampling problems, tombstone deletes cause a number -of problems because \emph{sampling problems are not invertible}. However, -this limitation can be worked around during the query process if desired. +Broadly speaking, for sampling problems, tombstone deletes cause a +number of problems because \emph{sampling problems are not invertible}. +This limitation can be worked around during the query process if desired. Tagging is much more natural for these search problems. However, the flexibility of selecting either option is desirable because of their different performance characteristics. @@ -527,8 +528,8 @@ unwieldy and are targetted at tuning the worst-case at the expense of the common case. We will take a different approach to adding configurability to our dynamization system. -Though it has thus far gone unmentioned, readers familiar with LSM Trees -may have noted the astonishing similarity between decomposition-based +Though it has thus far gone unmentioned, some readers may have +noted the astonishing similarity between decomposition-based dynamization techniques, and a data structure called the Log-structured Merge-tree. First proposed by O'Neil in the mid '90s\cite{oneil96}, the LSM Tree was designed to optmize write throughout for external data @@ -541,7 +542,7 @@ layered, external structures, to reduce the cost of reconstruction. In more recent times, the LSM Tree has seen significant development and been used as the basis for key-value stores like RocksDB~\cite{dong21} -and LevelDB~\cite{leveldb}. This work as produced an incredibly large +and LevelDB~\cite{leveldb}. This work has produced an incredibly large and well explored parameterization of the reconstruction procedures of LSM Trees, a good summary of which can be bound in this recent tutorial paper~\cite{sarkar23}. Examples of this design space exploration include: @@ -701,7 +702,7 @@ levels below it, which may require further reconstructions to occur to make room. The manner in which these reconstructions proceed follows the selection of layout policy, \begin{itemize} -\item[\textbf{Leveling}] When a buffer flush occurs in the leveling +\item \textbf{Leveling.} When a buffer flush occurs in the leveling policy, the system scans the existing levels to find the first level which has sufficient empty space to store the contents of the level above it. More formally, if the number of records in level $i$ is $N_i$, then @@ -711,8 +712,8 @@ empty level is added and $i$ is set to the index of this new level. Then, a reconstruction is executed containing all of the records in levels $i$ and $i - 1$ (where $i=-1$ indicates the temporary shard built from the buffer). Following this reconstruction, all levels $j < i$ are shifted -by one level. -\item[\textbf{Tiering}] When using tiering, the system will locate +by one level to $j + 1$. +\item \textbf{Tiering.} When using tiering, the system will locate the first level, $i$, containing fewer than $s$ shards. If no such level exists, then a new empty level is added and $i$ is set to the index of that level. Then, for each level $j < i$, a reconstruction diff --git a/references/references.bib b/references/references.bib index 5fef30a..1bc708b 100644 --- a/references/references.bib +++ b/references/references.bib @@ -508,7 +508,7 @@ @misc {postgres-doc, title = {PostgreSQL Documentation}, url = {https://www.postgresql.org/docs/15/sql-select.html}, - year = {2023} + year = {2025} } @misc {db2-doc, @@ -521,43 +521,49 @@ @online {pinecone, title = {Pinecone DB}, url = {https://www.pinecone.io/}, - year = {2023} + year = {2025} } @online {neptune, title = {Amazon Neptune}, url = {https://aws.amazon.com/neptune/}, - year = {2023} + year = {2025} } @online {teradata, title = {Teradata}, url = {https://www.teradata.com/}, - year = {2023} + year = {2025} } @online {pinot, title = {Apache Pinot}, url = {https://pinot.apache.org/}, - year = {2023} + year = {2025} +} + +@online{neo4j, + title = {neo4j}, + url = {https://neo4j.com/}, + year = {2025} } @online {pinecone-db, title = {Pinecone DB: Hierarchical Navigable Small Worlds}, url = {https://www.pinecone.io/learn/series/faiss/hnsw/}, - year = {2023} + year = {2025} } @online {postgis-doc, title = {Introduction to PostGIS: Spatial Indexing}, url = {https://postgis.net/workshops/postgis-intro/indexing.html}, - year = {2023} + year = {2025} } @online {mysql-btree-hash, title = {MySQL Documentation - Comparison of B-tree and Hash Indexes}, url = {https://dev.mysql.com/doc/refman/8.0/en/index-btree-hash.html}, - year = {2023} + year = {2025} } @article{olken95, @@ -1580,3 +1586,47 @@ keywords = {analytic model, analysis of algorithms, overflow chaining, performan bibsource = {dblp computer science bibliography, https://dblp.org} } +@inproceedings{hadoop, + author = {Konstantin Shvachko and + Hairong Kuang and + Sanjay Radia and + Robert Chansler}, + editor = {Mohammed G. Khatib and + Xubin He and + Michael Factor}, + title = {The Hadoop Distributed File System}, + booktitle = {{IEEE} 26th Symposium on Mass Storage Systems and Technologies, {MSST} + 2012, Lake Tahoe, Nevada, USA, May 3-7, 2010}, + pages = {1--10}, + publisher = {{IEEE} Computer Society}, + year = {2010}, + url = {https://doi.org/10.1109/MSST.2010.5496972}, + doi = {10.1109/MSST.2010.5496972}, + timestamp = {Fri, 24 Mar 2023 00:01:51 +0100}, + biburl = {https://dblp.org/rec/conf/mss/ShvachkoKRC10.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + +} +@Misc{tlx, + title = {{TLX}: Collection of Sophisticated {C++} Data Structures, Algorithms, and Miscellaneous Helpers}, + author = {Timo Bingmann}, + year = 2018, + note = {\url{https://panthema.net/tlx}, retrieved {Oct.} 7, 2020}, +} + + +@article{codd70, + author = {E. F. Codd}, + title = {A Relational Model of Data for Large Shared Data Banks}, + journal = {Commun. {ACM}}, + volume = {13}, + number = {6}, + pages = {377--387}, + year = {1970}, + url = {https://doi.org/10.1145/362384.362685}, + doi = {10.1145/362384.362685}, + timestamp = {Fri, 24 Mar 2023 16:31:07 +0100}, + biburl = {https://dblp.org/rec/journals/cacm/Codd70.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + |