summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--benchmarks/include/btree-util.h27
-rw-r--r--benchmarks/include/data-proc.h244
-rw-r--r--benchmarks/insert_tail_latency.cpp80
-rw-r--r--benchmarks/old-bench/include/bench.h (renamed from benchmarks/include/bench.h)0
-rw-r--r--benchmarks/old-bench/include/bench_utility.h (renamed from benchmarks/include/bench_utility.h)0
-rw-r--r--benchmarks/old-bench/include/standalone_utility.h (renamed from benchmarks/include/standalone_utility.h)24
-rw-r--r--benchmarks/static_dynamic_comp.cpp117
-rw-r--r--benchmarks/watermark_testing.cpp4
-rw-r--r--include/framework/scheduling/statistics.h29
9 files changed, 495 insertions, 30 deletions
diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h
new file mode 100644
index 0000000..571c073
--- /dev/null
+++ b/benchmarks/include/btree-util.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <cstdlib>
+#include "psu-ds/BTree.h"
+
+struct btree_record {
+ int64_t key;
+ int64_t value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+struct btree_key_extract {
+ static const int64_t &get(const btree_record &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
+
+
diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h
new file mode 100644
index 0000000..f758ed4
--- /dev/null
+++ b/benchmarks/include/data-proc.h
@@ -0,0 +1,244 @@
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <gsl/gsl_rng.h>
+#include <cstring>
+#include <vector>
+
+#include "psu-ds/BTree.h"
+
+typedef uint64_t key_type;
+typedef uint64_t value_type;
+typedef uint64_t weight_type;
+
+static gsl_rng *g_rng;
+static bool g_osm_data;
+
+struct btree_record {
+ key_type key;
+ value_type value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+struct btree_key_extract {
+ static const key_type &get(const btree_record &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
+
+static key_type g_min_key = UINT64_MAX;
+static key_type g_max_key = 0;
+
+static size_t g_max_record_cnt = 0;
+static size_t g_reccnt = 0;
+
+static constexpr unsigned int DEFAULT_SEED = 0;
+
+static unsigned int get_random_seed()
+{
+ unsigned int seed = 0;
+ std::fstream urandom;
+ urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
+ urandom.read((char *) &seed, sizeof(seed));
+ urandom.close();
+
+ return seed;
+}
+
+static key_type osm_to_key(const char *key_field) {
+ double tmp_key = (atof(key_field) + 180) * 10e6;
+ return (key_type) tmp_key;
+}
+
+static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
+{
+ g_rng = gsl_rng_alloc(type);
+ gsl_rng_set(g_rng, seed);
+}
+
+static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
+{
+ unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
+ init_bench_rng(seed, gsl_rng_mt19937);
+ g_osm_data = osm_correction;
+ g_max_record_cnt = max_reccnt;
+ g_reccnt = 0;
+}
+
+static void delete_bench_env()
+{
+ gsl_rng_free(g_rng);
+}
+
+
+template <typename QP>
+static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.target_key = start;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.lower_bound = start;
+ q.upper_bound = stop;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+/*
+ * NOTE: The QP type must have lower_bound and upper_bound attributes, which
+ * this function will initialize. Any other query parameter attributes must
+ * be manually initialized after the call.
+ */
+template <typename R>
+static bool next_vector_record(std::fstream &file, R &record, bool binary=false) {
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ for (size_t i=0; i<300; i++) {
+ std::string dimension;
+
+ std::getline(line_stream, dimension, ' ');
+ record.data[i] = atof(dimension.c_str());
+ }
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+
+}
+
+template <typename R>
+static bool next_record(std::fstream &file, R &record, bool binary=false)
+{
+ static value_type value = 1;
+ if (g_reccnt >= g_max_record_cnt) return false;
+
+ if (binary) {
+ if (file.good()) {
+ decltype(R::key) key;
+
+ file.read((char*) &key, sizeof(key));
+ record.key = key;
+ record.value = value;
+ value++;
+
+ if (record.key < g_min_key) g_min_key = record.key;
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ std::string key_field;
+ std::string value_field;
+ std::string weight_field;
+
+ std::getline(line_stream, value_field, '\t');
+ std::getline(line_stream, key_field, '\t');
+ std::getline(line_stream, weight_field, '\t');
+
+ record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
+ record.value = atol(value_field.c_str());
+
+ if (record.key < g_min_key) g_min_key = record.key;
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+}
+
+template <typename R>
+static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
+ vec.clear();
+
+ size_t cnt = 0;
+ while (cnt < n) {
+ if (to_delete.size() == 0) {
+ return false;
+ }
+
+ auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
+ vec.emplace_back(to_delete[i]);
+ to_delete.erase(to_delete.begin() + i);
+ }
+td:
+ return true;
+}
diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp
new file mode 100644
index 0000000..5e32898
--- /dev/null
+++ b/benchmarks/insert_tail_latency.cpp
@@ -0,0 +1,80 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "framework/DynamicExtension.h"
+#include "shard/ISAMTree.h"
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include <unistd.h>
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<int64_t, int64_t> Rec;
+typedef de::ISAMTree<Rec> ISAM;
+typedef de::rc::Query<ISAM, Rec> Q;
+typedef de::DynamicExtension<Rec, ISAM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::FIFOScheduler> Ext;
+
+std::atomic<size_t> total_latency = 0;
+
+void insert_thread(Ext *extension, size_t n, size_t k, size_t rate) {
+ int64_t delay = (1.0 / (double) rate) * 10e6; // delay in us
+ TIMER_INIT();
+ for (int64_t i=0; i<n; i+=k) {
+ TIMER_START();
+ for (int64_t j=0; j<k; j++) {
+ Rec r = {i+j, i+j};
+ while (!extension->insert(r)) {
+ _mm_pause();
+ }
+
+ //usleep(delay);
+ /*
+ for (size_t i=0; i<10000; i++) {
+ __asm__ __volatile__ ("":::"memory");
+ }
+ */
+ }
+ TIMER_STOP();
+
+ auto insert_lat = TIMER_RESULT();
+
+ total_latency.fetch_add(insert_lat);
+ fprintf(stdout, "I\t%ld\t%ld\t%ld\n", i+k, insert_lat, k);
+ }
+}
+
+int main(int argc, char **argv) {
+
+ /* the closeout routine takes _forever_ ... so we'll just leak the memory */
+ auto extension = new Ext(100, 1000000, 3);
+ size_t n = 100000000;
+ size_t per_trial = 1000;
+ double selectivity = .001;
+ size_t rate = 1000000;
+
+ total_latency.store(0);
+
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ std::thread i_thrd1(insert_thread, extension, n/2, per_trial, rate);
+ std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate);
+
+ i_thrd1.join();
+ i_thrd2.join();
+
+ auto avg_latency = total_latency.load() / n;
+ auto throughput = (int64_t) ((double) n / (double) total_latency * 1e9);
+
+ fprintf(stdout, "AVG LAT: %ld\nThroughput: %ld\n", avg_latency, throughput);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/include/bench.h b/benchmarks/old-bench/include/bench.h
index 586ff12..586ff12 100644
--- a/benchmarks/include/bench.h
+++ b/benchmarks/old-bench/include/bench.h
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h
index e33b93d..e33b93d 100644
--- a/benchmarks/include/bench_utility.h
+++ b/benchmarks/old-bench/include/bench_utility.h
diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/old-bench/include/standalone_utility.h
index 9876e84..727daa5 100644
--- a/benchmarks/include/standalone_utility.h
+++ b/benchmarks/old-bench/include/standalone_utility.h
@@ -1,18 +1,12 @@
#include <cstdlib>
#include <cstdio>
-#include <chrono>
-#include <algorithm>
-#include <numeric>
-#include <memory>
#include <iostream>
#include <fstream>
#include <sstream>
-#include <unordered_set>
-#include <set>
#include <string>
-#include <random>
#include <gsl/gsl_rng.h>
#include <cstring>
+#include <vector>
typedef uint64_t key_type;
typedef uint64_t value_type;
@@ -244,19 +238,3 @@ static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, siz
td:
return true;
}
-
-/*
- * helper routines for displaying progress bars to stderr
- */
-static const char *g_prog_bar = "======================================================================";
-static const size_t g_prog_width = 50;
-
-static void progress_update(double percentage, std::string prompt) {
- int val = (int) (percentage * 100);
- int lpad = (int) (percentage * g_prog_width);
- int rpad = (int) (g_prog_width - lpad);
- fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, "");
- fflush(stderr);
-
- if (percentage >= 1) fprintf(stderr, "\n");
-}
diff --git a/benchmarks/static_dynamic_comp.cpp b/benchmarks/static_dynamic_comp.cpp
new file mode 100644
index 0000000..2d8f041
--- /dev/null
+++ b/benchmarks/static_dynamic_comp.cpp
@@ -0,0 +1,117 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "query/rangecount.h"
+#include "shard/TrieSpline.h"
+#include "shard/ISAMTree.h"
+
+
+#include "framework/interface/Record.h"
+#include "framework/interface/Query.h"
+#include "include/data-proc.h"
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<key_type, value_type> Rec;
+typedef de::ISAMTree<Rec> ISAM;
+typedef de::TrieSpline<Rec> TS;
+
+typedef de::rc::Query<ISAM, Rec> Q;
+typedef de::DynamicExtension<Rec, ISAM, Q> Ext;
+
+typedef de::MutableBuffer<Rec> Buffer;
+
+typedef de::rc::Parms<Rec> query;
+
+Buffer *file_to_mbuffer(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ auto buff = new Buffer(n, n+1);
+
+ Rec rec;
+ while (next_record(file, rec) && buff->get_record_count() < n) {
+ buff->append(rec);
+ }
+
+ return buff;
+}
+
+BenchBTree *file_to_btree(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ auto btree = new BenchBTree();
+ Rec rec;
+ while (next_record(file, rec) && btree->size() < n) {
+ btree->insert({rec.key, rec.value});
+ }
+
+ return btree;
+}
+
+template<de::ShardInterface S>
+void benchmark_shard(S *shard, std::vector<query> &queries) {
+ TIMER_INIT();
+
+ TIMER_START();
+ for (auto & q : queries) {
+ auto state = de::rc::Query<S, Rec>::get_query_state(shard, &q);
+ auto res = de::rc::Query<S, Rec>::query(shard, state, &q);
+ }
+ TIMER_STOP();
+
+ auto latency = TIMER_RESULT() / queries.size();
+ fprintf(stdout, "%ld %ld\n", latency, shard->get_memory_usage() - shard->get_record_count() * sizeof(de::Wrapped<Rec>));
+}
+
+void benchmark_btree(BenchBTree *btree, std::vector<query> &queries) {
+ TIMER_INIT();
+
+ TIMER_START();
+ for (auto & q : queries) {
+ size_t c = 0;
+ auto ptr = btree->find(q.lower_bound);
+ while(ptr != btree->end() && ptr->key <= q.upper_bound) {
+ c++;
+ }
+ }
+ TIMER_STOP();
+
+ auto latency = TIMER_RESULT() / queries.size();
+ auto mem = btree->get_stats().inner_nodes * psudb::btree_default_traits<key_type, btree_record>::inner_slots * (sizeof(key_type) + sizeof(void*));
+ fprintf(stdout, "%ld %ld\n", latency, mem);
+}
+
+int main(int argc, char **argv) {
+ if (argc < 4) {
+ fprintf(stderr, "Usage: static_dynamic_comp <filename> <record_count> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string d_fname = std::string(argv[1]);
+ size_t reccnt = atol(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ init_bench_env(reccnt, true, false);
+ auto queries = read_range_queries<query>(q_fname, .001);
+
+ auto buff = file_to_mbuffer(d_fname, reccnt);
+
+ TS *ts = new TS(buff->get_buffer_view());
+ benchmark_shard<TS>(ts, queries);
+ delete ts;
+
+ ISAM *isam = new ISAM(buff->get_buffer_view());
+ benchmark_shard<ISAM>(isam, queries);
+ delete isam;
+
+ auto btree = file_to_btree(d_fname, reccnt);
+
+}
+
diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp
index 1abe7f5..e016aa4 100644
--- a/benchmarks/watermark_testing.cpp
+++ b/benchmarks/watermark_testing.cpp
@@ -21,7 +21,7 @@ typedef de::DynamicExtension<Rec, ISAM, Q> Ext;
int main(int argc, char **argv) {
std::vector hwms = {5000l, 10000l, 20000l, 50000l};
- std::vector lwms = {.1, .2, .3, .4, .5};
+ std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9};
size_t n = 1000000;
@@ -46,6 +46,8 @@ int main(int argc, char **argv) {
fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput);
+ extension->print_scheduler_statistics();
+
delete extension;
}
}
diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h
index 8466ffc..50ba196 100644
--- a/include/framework/scheduling/statistics.h
+++ b/include/framework/scheduling/statistics.h
@@ -67,19 +67,33 @@ public:
if (type == 1) {
m_type_1_cnt.fetch_add(1);
m_type_1_total_time.fetch_add(length);
+
+ if (length > m_type_1_largest_time) {
+ m_type_1_largest_time.store(length);
+ }
} else {
m_type_2_cnt.fetch_add(1);
m_type_2_total_time.fetch_add(length);
+
+ if (length > m_type_2_largest_time) {
+ m_type_2_largest_time.store(length);
+ }
}
}
void print_statistics() {
- fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\n",
- m_type_1_cnt.load(),
- m_type_1_total_time.load() / m_type_1_cnt.load());
- fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\n",
- m_type_2_cnt.load(),
- m_type_2_total_time.load() / m_type_2_cnt.load());
+ if (m_type_1_cnt > 0) {
+ fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n",
+ m_type_1_cnt.load(),
+ m_type_1_total_time.load() / m_type_1_cnt.load(),
+ m_type_1_largest_time.load());
+ }
+ if (m_type_2_cnt > 0) {
+ fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n",
+ m_type_2_cnt.load(),
+ m_type_2_total_time.load() / m_type_2_cnt.load(),
+ m_type_2_largest_time.load());
+ }
}
private:
@@ -92,5 +106,8 @@ private:
std::atomic<size_t> m_type_2_cnt;
std::atomic<size_t> m_type_2_total_time;
+
+ std::atomic<size_t> m_type_1_largest_time;
+ std::atomic<size_t> m_type_2_largest_time;
};
}