From 0dc1a8ea20820168149cedaa14e223d4d31dc4b6 Mon Sep 17 00:00:00 2001
From: "Douglas B. Rumbaugh" <doug@douglasrumbaugh.com>
Date: Sun, 6 Jul 2025 18:21:32 -0400
Subject: updates

---
 chapters/design-space.tex | 127 ++++++++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 55 deletions(-)

(limited to 'chapters/design-space.tex')

diff --git a/chapters/design-space.tex b/chapters/design-space.tex
index f85883c..22773e5 100644
--- a/chapters/design-space.tex
+++ b/chapters/design-space.tex
@@ -127,72 +127,89 @@ be $N_B (s - 1) \cdot s^i$, where $N_B$ is the size of the buffer. The
 resulting structure will have at most $\log_s n$ shards. The resulting
 policy is described in Algorithm~\ref{alg:design-bsm}.
 
-Unfortunately, the approach used by Bentley and Saxe to calculate the
-amortized insertion cost of the BSM does not generalize to larger bases,
-and so we will need to derive this result using a different approach.
+Analyzing the amortized insertion performance of BSM is slightly
+complicated by the fact that each record is \emph{not} written on
+every level. For the purposes of our analysis, establishing a reasonable
+upper bound on the amortized insertion cost is sufficient, however, so
+we will settle for a looser bound to keep things simple.
 
 \begin{theorem}
 The amortized insertion cost for generalized BSM with a growth factor of
-$s$ is $\Theta\left(\frac{B(n)}{n} \cdot s\log_s n)\right)$.
+$s$ is $O\left(\frac{B(n)}{n} \cdot s\log_s n)\right)$.
 \end{theorem}
 \begin{proof}
+In generalized BSM, each record will be written at most $s$ times
+per level. We will use this result to provide an upper-bound on the
+amortized insertion performance. The worst case cost associated with a
+reconstruction in BSM is a full compaction of the structure, which will
+require $B(n)$ time to complete. Further, there are $O(\log_s n)$ levels
+in the decomposition. As a result, the amortized insertion cost can be
+bounded above by,
+\begin{equation}
+I_A(n) \in O\left(\frac{B(n)}{n} \cdot s \log_s n\right)
+\end{equation}
+\end{proof}
 
-In order to calculate the amortized insertion cost, we will first
-determine the average number of times that a record is involved in a
-reconstruction, and then amortize those reconstructions over the records
-in the structure.
 
-If we consider only the first level of the structure, it's clear that
-the reconstruction count associated with each record in that structure
-will follow the pattern, $1, 2, 3, 4, ..., s-1$ when the level is full.
-Thus, the total number of reconstructions associated with records on level
-$i=0$ is the sum of that sequence, or
-\begin{equation*}
-W(0) = \sum_{j=1}^{s-1} j = \frac{1}{2}\left(s^2 - s\right)
-\end{equation*}
 
-Considering the next level, $i=1$, each reconstruction involving this
-level will copy down the entirety of the structure above it, adding
-one more write per record, as well as one extra write for the new record.
-More specifically, in the above example, the first "batch" of records in
-level $i=1$ will have the following write counts: $1, 2, 3, 4, 5, ..., s$,
-the second "batch" of records will increment all of the existing write
-counts by one, and then introduce another copy of $1, 2, 3, 4, 5, ..., s$
-writes, and so on.
+% \begin{proof}
 
-Thus, each new "batch" written to level $i$ will introduce $W(i-1) + 1$
-writes from the previous level into level $i$, as well as rewriting all
-of the records currently on level $i$.
+% In order to calculate the amortized insertion cost, we will first
+% determine the average number of times that a record is involved in a
+% reconstruction, and then amortize those reconstructions over the records
+% in the structure.
 
-The net result of this is that the number of writes on level $i$ is given
-by the following recurrence relation (combined with the $W(0)$ base case),
+% If we consider only the first level of the structure, it's clear that
+% the reconstruction count associated with each record in that structure
+% will follow the pattern, $1, 2, 3, 4, ..., s-1$ when the level is full.
+% Thus, the total number of reconstructions associated with records on level
+% $i=0$ is the sum of that sequence, or
+% \begin{equation*}
+% W(0) = \sum_{j=1}^{s-1} j = \frac{1}{2}\left(s^2 - s\right)
+% \end{equation*}
 
-\begin{equation*}
-W(i) = sW(i-1) + \frac{1}{2}\left(s-1\right)^2 \cdot s^i
-\end{equation*}
+% Considering the next level, $i=1$, each reconstruction involving this
+% level will copy down the entirety of the structure above it, adding
+% one more write per record, as well as one extra write for the new record.
+% More specifically, in the above example, the first ``batch'' of records in
+% level $i=1$ will have the following write counts: $1, 2, 3, 4, 5, ..., s$,
+% the second ``batch'' of records will increment all of the existing write
+% counts by one, and then introduce another copy of $1, 2, 3, 4, 5, ..., s$
+% writes, and so on.
 
-which can be solved to give the following closed-form expression,
-\begin{equation*}
-W(i) = s^i \cdot \left(\frac{1}{2} (s-1) \cdot (s(i+1) - i)\right)
-\end{equation*}
-which provides the total number of reconstructions that records in
-level $i$ of the structure have participated in. As each record
-is involved in a different number of reconstructions, we'll consider the
-average number by dividing $W(i)$ by the number of records in level $i$.
-
-From here, the proof proceeds in the standard way for this sort of
-analysis. The worst-case cost of a reconstruction is $B(n)$, and there
-are $\log_s(n)$ total levels, so the total reconstruction costs associated
-with a record can be upper-bounded by, $B(n) \cdot
-\frac{W(\log_s(n))}{n}$, and then this cost amortized over the $n$
-insertions necessary to get the record into the last level. We'll also
-condense the multiplicative constants and drop the additive ones to more
-clearly represent the relationship we're looking to show. This results
-in an amortized insertion cost of,
-\begin{equation*}
-\frac{B(n)}{n} \cdot s \log_s n
-\end{equation*}
-\end{proof}
+% Thus, each new ``batch'' written to level $i$ will introduce $W(i-1) + 1$
+% writes from the previous level into level $i$, as well as rewriting all
+% of the records currently on level $i$.
+
+% The net result of this is that the number of writes on level $i$ is given
+% by the following recurrence relation (combined with the $W(0)$ base case),
+
+% \begin{equation*}
+% W(i) = sW(i-1) + \frac{1}{2}\left(s-1\right)^2 \cdot s^i
+% \end{equation*}
+
+% which can be solved to give the following closed-form expression,
+% \begin{equation*}
+% W(i) = s^i \cdot \left(\frac{1}{2} (s-1) \cdot (s(i+1) - i)\right)
+% \end{equation*}
+% which provides the total number of reconstructions that records in
+% level $i$ of the structure have participated in. As each record
+% is involved in a different number of reconstructions, we'll consider the
+% average number by dividing $W(i)$ by the number of records in level $i$.
+
+% From here, the proof proceeds in the standard way for this sort of
+% analysis. The worst-case cost of a reconstruction is $B(n)$, and there
+% are $\log_s(n)$ total levels, so the total reconstruction costs associated
+% with a record can be upper-bounded by, $B(n) \cdot
+% \frac{W(\log_s(n))}{n}$, and then this cost amortized over the $n$
+% insertions necessary to get the record into the last level. We'll also
+% condense the multiplicative constants and drop the additive ones to more
+% clearly represent the relationship we're looking to show. This results
+% in an amortized insertion cost of,
+% \begin{equation*}
+% \frac{B(n)}{n} \cdot s \log_s n
+% \end{equation*}
+% \end{proof}
 
 \begin{theorem}
 The worst-case insertion cost for generalized BSM with a scale factor
@@ -586,7 +603,7 @@ reconstructions, one per level.
 \hline
 & \textbf{Gen. BSM} & \textbf{Leveling} & \textbf{Tiering} \\ \hline
 $I(n)$ & $\Theta(B(n))$ & $\Theta\left(B\left(\frac{s-1}{s} \cdot n\right)\right)$ & $ \Theta\left(\sum_{i=0}^{\log_s n} B(s^i)\right)$ \\ \hline
-$I_A(n)$ & $\Theta\left(\frac{B(n)}{n} s\log_s n)\right)$ & $\Theta\left(\frac{B(n)}{n} s\log_s n\right)$& $\Theta\left(\frac{B(n)}{n} \log_s n\right)$ \\ \hline
+$I_A(n)$ & $O\left(\frac{B(n)}{n} s\log_s n)\right)$ & $\Theta\left(\frac{B(n)}{n} s\log_s n\right)$& $\Theta\left(\frac{B(n)}{n} \log_s n\right)$ \\ \hline
 $\mathscr{Q}(n)$ &$O\left(\log_s n \cdot \mathscr{Q}_S(n)\right)$ & $O\left(\log_s n \cdot \mathscr{Q}_S(n)\right)$ & $O\left(s \log_s n  \cdot \mathscr{Q}_S(n)\right)$\\ \hline
 $\mathscr{Q}_B(n)$ & $\Theta(\mathscr{Q}_S(n))$ & $O(\log_s n \cdot \mathscr{Q}_S(n))$ & $O(\log_s n \cdot \mathscr{Q}_S(n))$ \\ \hline
 \end{tabular}
-- 
cgit v1.2.3