From 0bb5b46ec2b64be17f6269631915e62d02e315e4 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Mon, 22 Apr 2024 12:27:43 -0400 Subject: Added plain BSM and MDSP BSM benchmark --- benchmarks/include/triespline_bsm.h | 134 ++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 benchmarks/include/triespline_bsm.h (limited to 'benchmarks/include/triespline_bsm.h') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h new file mode 100644 index 0000000..9613a62 --- /dev/null +++ b/benchmarks/include/triespline_bsm.h @@ -0,0 +1,134 @@ +#pragma once + +#include +#include +#include "ts/builder.h" + +template +class BSMTrieSpline { +private: + typedef std::pair R; + +public: + struct RangeQueryParameters { + K lower_bound; + K upper_bound; + }; + +public: + static BSMTrieSpline *build(std::vector &records) { + std::sort(records.begin(), records.end()); + return new BSMTrieSpline(records); + } + + static BSMTrieSpline *build_presorted(std::vector &records) { + return new BSMTrieSpline(records); + } + + std::vector unbuild() { + return std::move(m_data); + } + + std::vector query(void *q) { + std::vector rs; + + /* return an empty result set if q is invalid */ + if (q == nullptr) { + return rs; + } + + auto parms = (BSMTrieSpline::RangeQueryParameters*) q; + + size_t idx = lower_bound(parms->lower_bound); + + while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + rs.emplace_back(m_data[idx++]); + } + + return std::move(rs); + } + + std::vector query_merge(std::vector &rsa, std::vector &rsb) { + rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + return std::move(rsa); + } + + size_t record_count() { + return m_data.size(); + } + + + ~BSMTrieSpline() = default; + + +private: + std::vector m_data; + K m_max_key; + K m_min_key; + ts::TrieSpline m_ts; + + BSMTrieSpline(std::vector &records) { + m_data = std::move(records); + m_min_key = m_data[0].first; + m_max_key = m_data[m_data.size() - 1].first; + + auto bldr = ts::Builder(m_min_key, m_max_key, E); + for (size_t i=0; i 1) { + m_ts = bldr.Finalize(); + } + } + + size_t lower_bound(K key) { + if (m_data.size() == 0) { + return 1; + } else if (m_data.size() == 1) { + if (m_data[0].first < key) { + return 1; + } else { + return 0; + } + } + + auto bound = m_ts.GetSearchBound(key); + size_t idx = bound.begin; + + if (idx >= m_data.size()) { + return m_data.size(); + } + + // If the region to search is less than some pre-specified + // amount, perform a linear scan to locate the record. + if (bound.end - bound.begin < 256) { + while (idx < bound.end && m_data[idx].first < key) { + idx++; + } + } else { + // Otherwise, perform a binary search + idx = bound.begin; + size_t max = bound.end; + + while (idx < max) { + size_t mid = (idx + max) / 2; + if (key > m_data[mid].first) { + idx = mid + 1; + } else { + max = mid; + } + } + } + + if (idx == m_data.size()) { + return m_data.size(); + } + + if (m_data[idx].first > key && idx > 0 && m_data[idx-1].first <= key) { + return idx-1; + } + + return idx; + } +}; -- cgit v1.2.3 From d710125be2958f0b76c2601c357966fd74263c87 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 23 Apr 2024 13:24:50 -0400 Subject: BSMTriespline: updated to use a rangecount query --- benchmarks/include/triespline_bsm.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'benchmarks/include/triespline_bsm.h') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 9613a62..4276074 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -34,6 +34,7 @@ public: /* return an empty result set if q is invalid */ if (q == nullptr) { + rs.push_back({0, 0}); return rs; } @@ -41,15 +42,18 @@ public: size_t idx = lower_bound(parms->lower_bound); + size_t cnt = 0; while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { - rs.emplace_back(m_data[idx++]); + cnt++; } + rs.push_back({cnt, 0}); + return std::move(rs); } std::vector query_merge(std::vector &rsa, std::vector &rsb) { - rsa.insert(rsa.end(), rsb.begin(), rsb.end()); + rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -57,10 +61,8 @@ public: return m_data.size(); } - ~BSMTrieSpline() = default; - private: std::vector m_data; K m_max_key; -- cgit v1.2.3 From 47a386b50d904d3f1b7ce3cfc13c29ea96dd1e43 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Apr 2024 14:18:08 -0400 Subject: Added VPTree BSM benchmark --- benchmarks/include/triespline_bsm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'benchmarks/include/triespline_bsm.h') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index 4276074..dfddc43 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -2,6 +2,7 @@ #include #include +#include #include "ts/builder.h" template @@ -52,7 +53,7 @@ public: return std::move(rs); } - std::vector query_merge(std::vector &rsa, std::vector &rsb) { + std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { rsa[0].first += rsb[0].first; return std::move(rsa); } -- cgit v1.2.3 From 349cfd5090f586b7ec189b72c00786522199fe34 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Wed, 1 May 2024 16:03:19 -0400 Subject: TS BSM Adjustments --- benchmarks/include/triespline_bsm.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'benchmarks/include/triespline_bsm.h') diff --git a/benchmarks/include/triespline_bsm.h b/benchmarks/include/triespline_bsm.h index dfddc43..eaf1079 100644 --- a/benchmarks/include/triespline_bsm.h +++ b/benchmarks/include/triespline_bsm.h @@ -18,11 +18,19 @@ public: public: static BSMTrieSpline *build(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + std::sort(records.begin(), records.end()); return new BSMTrieSpline(records); } static BSMTrieSpline *build_presorted(std::vector &records) { + if (records.size() == 0) { + return nullptr; + } + return new BSMTrieSpline(records); } @@ -44,8 +52,9 @@ public: size_t idx = lower_bound(parms->lower_bound); size_t cnt = 0; - while (idx < m_data.size() && m_data[idx].first < parms->upper_bound) { + while (idx < m_data.size() && m_data[idx].first <= parms->upper_bound) { cnt++; + idx++; } rs.push_back({cnt, 0}); @@ -54,6 +63,10 @@ public: } std::vector query_merge(std::vector &rsa, std::vector &rsb, void *parms) { + /* initialize rsa on the first merge */ + if (rsa.size() == 0) { + rsa.push_back({0, 0}); + } rsa[0].first += rsb[0].first; return std::move(rsa); } @@ -75,6 +88,10 @@ private: m_min_key = m_data[0].first; m_max_key = m_data[m_data.size() - 1].first; + if (m_data.size() < 50) { + return; + } + auto bldr = ts::Builder(m_min_key, m_max_key, E); for (size_t i=0; i= key) { + return i; + } } + + return m_data.size(); } auto bound = m_ts.GetSearchBound(key); -- cgit v1.2.3