From f0ac69f9d5179abd385644a520be670a8ca5e942 Mon Sep 17 00:00:00 2001
From: Douglas Rumbaugh <dbr4@psu.edu>
Date: Mon, 17 Feb 2025 10:46:24 -0500
Subject: Multi-threaded mixed workload benchmark

---
 CMakeLists.txt                             |   6 ++
 benchmarks/tail-latency/mixed_workload.cpp | 150 +++++++++++++++++++++++++++++
 include/framework/DynamicExtension.h       |  11 +--
 3 files changed, 157 insertions(+), 10 deletions(-)
 create mode 100644 benchmarks/tail-latency/mixed_workload.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4723da4..e053367 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -306,6 +306,11 @@ if (tail_bench)
     target_link_libraries(btree_insert_dist PUBLIC gsl pthread atomic)
     target_include_directories(btree_insert_dist PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(btree_insert_dist PUBLIC -mcx16)
+
+    add_executable(mixed_workload ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/tail-latency/mixed_workload.cpp)
+    target_link_libraries(mixed_workload PUBLIC gsl pthread atomic)
+    target_include_directories(mixed_workload PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+    target_link_options(mixed_workload PUBLIC -mcx16)
 endif()
 
 if (bench) 
@@ -364,4 +369,5 @@ if (bench)
     target_link_libraries(insert_tail_latency PUBLIC gsl pthread atomic)
     target_include_directories(insert_tail_latency PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
     target_link_options(insert_tail_latency PUBLIC -mcx16)
+
 endif()
diff --git a/benchmarks/tail-latency/mixed_workload.cpp b/benchmarks/tail-latency/mixed_workload.cpp
new file mode 100644
index 0000000..d8ea890
--- /dev/null
+++ b/benchmarks/tail-latency/mixed_workload.cpp
@@ -0,0 +1,150 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+#define TS_TEST
+
+#include <thread>
+
+#include "framework/scheduling/SerialScheduler.h"
+#include "framework/util/Configuration.h"
+#include "util/types.h"
+#include "file_util.h"
+#include "framework/DynamicExtension.h"
+#include "framework/interface/Record.h"
+#include "framework/scheduling/FIFOScheduler.h"
+#include "query/rangecount.h"
+#include "shard/TrieSpline.h"
+#include "standard_benchmarks.h"
+
+#include "framework/reconstruction/FixedShardCountPolicy.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::TrieSpline<Rec> Shard;
+typedef de::rc::Query<Shard> Q;
+typedef de::DynamicExtension<Shard, Q, de::DeletePolicy::TOMBSTONE,
+                             de::FIFOScheduler>
+    Ext;
+typedef Q::Parameters QP;
+typedef de::DEConfiguration<Shard, Q, de::DeletePolicy::TOMBSTONE,
+                            de::FIFOScheduler>
+    Conf;
+
+std::atomic<size_t> idx;
+std::atomic<bool> inserts_done = false;
+
+size_t query_ratio = 3;
+
+std::atomic<size_t> total_res = 0;
+size_t reccnt = 0;
+
+void operation_thread(Ext *extension, std::vector<QP> *queries,
+                      std::vector<Rec> *records) {
+  TIMER_INIT();
+  while (!inserts_done.load()) {
+    auto type = rand() % 10;
+
+    if (type < 8) {
+      auto q_idx = rand() % queries->size();
+
+      auto q = (*queries)[q_idx];
+
+      TIMER_START();
+      auto res = extension->query(std::move(q)).get();
+      TIMER_STOP();
+
+      fprintf(stdout, "Q\t%ld\n", TIMER_RESULT());
+
+      total_res.fetch_add(res);
+
+    } else {
+      for (size_t i = 0; i < 1000; i++) {
+        auto insert_idx = idx.fetch_add(1);
+
+        TIMER_START();
+        while (!extension->insert((*records)[insert_idx])) {
+          usleep(1);
+        }
+        TIMER_STOP();
+
+        fprintf(stdout, "I\t%ld\n", TIMER_RESULT());
+
+        if (idx.load() == reccnt) {
+          inserts_done.store(true);
+        }
+      }
+    }
+  }
+}
+
+void usage(char *progname) {
+  fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+  if (argc < 4) {
+    usage(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  size_t n = atol(argv[1]);
+  std::string d_fname = std::string(argv[2]);
+  std::string q_fname = std::string(argv[3]);
+
+  auto data = read_sosd_file<Rec>(d_fname, n);
+  auto queries = read_range_queries<QP>(q_fname, .0001);
+
+  std::vector<size_t> sfs = {8}; //, 4, 8, 16, 32, 64, 128, 256, 512, 1024};
+  size_t buffer_size = 8000;
+  std::vector<size_t> policies = {
+      0,
+  };
+
+  reccnt = n;
+
+  for (auto pol : policies) {
+    for (size_t i = 0; i < sfs.size(); i++) {
+      auto policy = get_policy<Shard, Q>(sfs[i], buffer_size, pol, n);
+      auto config = Conf(std::move(policy));
+      config.recon_enable_maint_on_flush = false;
+      config.recon_maint_disabled = true;
+      config.buffer_flush_trigger = 4000;
+
+      auto extension = new Ext(std::move(config));
+
+      /* warmup structure w/ 10% of records */
+      size_t warmup = .1 * n;
+      for (size_t j = 0; j < warmup; j++) {
+        while (!extension->insert(data[j])) {
+          usleep(1);
+        }
+      }
+
+      extension->await_version();
+
+      idx.store(warmup);
+
+      size_t thrd_cnt = 8;
+      std::thread thrds[thrd_cnt];
+
+      for (size_t i=0; i<thrd_cnt; i++) {
+        thrds[i] = std::thread(operation_thread, extension, &queries, &data);
+      }
+
+      for (size_t i=0; i<thrd_cnt; i++) {
+        thrds[i].join();
+      }
+
+      fprintf(stderr, "%ld\n", total_res.load());
+      delete extension;
+    }
+  }
+
+  fflush(stderr);
+}
diff --git a/include/framework/DynamicExtension.h b/include/framework/DynamicExtension.h
index 67f6ff8..4d4585f 100644
--- a/include/framework/DynamicExtension.h
+++ b/include/framework/DynamicExtension.h
@@ -759,22 +759,13 @@ private:
   }
 
   int internal_append(const RecordType &rec, bool ts) {
-    size_t max_l0 = (log(get_record_count()) / log(8)) + 1;
-    size_t current_l0 = get_active_version()->get_structure()->get_level_vector()[0]->get_shard_count();
-
-    if (m_buffer->is_at_low_watermark() && current_l0 <= max_l0) {
+    if (m_buffer->is_at_low_watermark()) {
       auto old = false;
       if (m_scheduling_reconstruction.compare_exchange_strong(old, true)) {
         schedule_flush();
       }
     }
 
-    if (m_buffer->is_at_high_watermark() && current_l0 > max_l0) {
-      schedule_maint_reconstruction(true);
-      // fprintf(stderr, "[I] Current L0: %ld\tMax L0:%ld\n", current_l0, max_l0);
-      return 0;
-    }
-
     /* this will fail if the HWM is reached and return 0 */
     return m_buffer->append(rec, ts);
   }
-- 
cgit v1.2.3