summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/CMak0
-rw-r--r--benchmarks/include/benchmark_types.h50
-rw-r--r--benchmarks/include/btree-util.h27
-rw-r--r--benchmarks/include/data-proc.h258
-rw-r--r--benchmarks/include/file_util.h130
-rw-r--r--benchmarks/include/standard_benchmarks.h212
-rw-r--r--benchmarks/irs_bench.cpp73
-rw-r--r--benchmarks/old-bench/include/bench_utility.h2
-rw-r--r--benchmarks/pgm_bench.cpp64
-rw-r--r--benchmarks/ts_bench.cpp64
-rw-r--r--benchmarks/vptree_bench.cpp89
-rw-r--r--benchmarks/watermark_testing_knn.cpp61
12 files changed, 583 insertions, 447 deletions
diff --git a/benchmarks/CMak b/benchmarks/CMak
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/benchmarks/CMak
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h
new file mode 100644
index 0000000..85e9565
--- /dev/null
+++ b/benchmarks/include/benchmark_types.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <cstdlib>
+#include "psu-ds/BTree.h"
+#include "mtree.h"
+#include "framework/interface/Record.h"
+
+/* TLX BTree definitions*/
+template <typename K, typename V>
+struct btree_record {
+ K key;
+ V value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+template <typename K, typename V>
+struct btree_key_extract {
+ static const K &get(const btree_record<K, V> &v) {
+ return v.key;
+ }
+};
+
+typedef psudb::BTree<int64_t, btree_record<int64_t, int64_t>, btree_key_extract<int64_t, int64_t>> BenchBTree;
+
+
+/*MTree Definitions*/
+
+const size_t W2V_SIZE = 300;
+typedef de::EuclidPoint<double, W2V_SIZE> Word2VecRec;
+
+struct euclidean_distance {
+ double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
+ double dist = 0;
+ for (size_t i=0; i<W2V_SIZE; i++) {
+ dist += (first.data[i] - second.data[i]) * (first.data[i] - second.data[i]);
+ }
+
+ return std::sqrt(dist);
+ }
+};
+
+typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
+
diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h
deleted file mode 100644
index 571c073..0000000
--- a/benchmarks/include/btree-util.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <cstdlib>
-#include "psu-ds/BTree.h"
-
-struct btree_record {
- int64_t key;
- int64_t value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const int64_t &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-
diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h
deleted file mode 100644
index 444cb94..0000000
--- a/benchmarks/include/data-proc.h
+++ /dev/null
@@ -1,258 +0,0 @@
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <gsl/gsl_rng.h>
-#include <cstring>
-#include <vector>
-
-#include "psu-ds/BTree.h"
-
-#pragma once
-
-typedef int64_t key_type;
-typedef int64_t value_type;
-typedef uint64_t weight_type;
-
-static gsl_rng *g_rng;
-static bool g_osm_data;
-
-struct btree_record {
- key_type key;
- value_type value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const key_type &get(const btree_record &v) {
- return v.key;
- }
-};
-
-typedef psudb::BTree<int64_t, btree_record, btree_key_extract> BenchBTree;
-
-static key_type g_min_key = UINT64_MAX;
-static key_type g_max_key = 0;
-
-static size_t g_max_record_cnt = 0;
-static size_t g_reccnt = 0;
-
-static constexpr unsigned int DEFAULT_SEED = 0;
-
-static unsigned int get_random_seed()
-{
- unsigned int seed = 0;
- std::fstream urandom;
- urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
- urandom.read((char *) &seed, sizeof(seed));
- urandom.close();
-
- return seed;
-}
-
-static key_type osm_to_key(const char *key_field) {
- double tmp_key = (atof(key_field) + 180) * 10e6;
- return (key_type) tmp_key;
-}
-
-static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
-{
- g_rng = gsl_rng_alloc(type);
- gsl_rng_set(g_rng, seed);
-}
-
-static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
-{
- unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
- init_bench_rng(seed, gsl_rng_mt19937);
- g_osm_data = osm_correction;
- g_max_record_cnt = max_reccnt;
- g_reccnt = 0;
-}
-
-static void delete_bench_env()
-{
- gsl_rng_free(g_rng);
-}
-
-
-template <typename QP>
-static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.target_key = start;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.lower_bound = start;
- q.upper_bound = stop;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- char *line = NULL;
- size_t len = 0;
-
- while (getline(&line, &len, qf) > 0) {
- char *token;
- QP query;
- size_t idx = 0;
-
- token = strtok(line, " ");
- do {
- query.point.data[idx++] = atof(token);
- } while ((token = strtok(NULL, " ")));
-
- query.k = k;
- queries.emplace_back(query);
- }
-
- free(line);
- fclose(qf);
-
- return queries;
-}
-
-/*
- * NOTE: The QP type must have lower_bound and upper_bound attributes, which
- * this function will initialize. Any other query parameter attributes must
- * be manually initialized after the call.
- */
-template <typename R>
-static bool next_vector_record(std::fstream &file, R &record, bool binary=false) {
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- for (size_t i=0; i<300; i++) {
- std::string dimension;
-
- std::getline(line_stream, dimension, ' ');
- record.data[i] = atof(dimension.c_str());
- }
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-
-}
-
-template <typename R>
-static bool next_record(std::fstream &file, R &record, bool binary=false)
-{
- static value_type value = 1;
- if (g_reccnt >= g_max_record_cnt) return false;
-
- if (binary) {
- if (file.good()) {
- decltype(R::key) key;
-
- file.read((char*) &key, sizeof(key));
- record.key = key;
- record.value = value;
- value++;
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- return true;
- }
-
- return false;
- }
-
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- std::string key_field;
- std::string value_field;
- std::string weight_field;
-
- std::getline(line_stream, value_field, '\t');
- std::getline(line_stream, key_field, '\t');
- std::getline(line_stream, weight_field, '\t');
-
- record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
- record.value = atol(value_field.c_str());
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-}
-
-template <typename R>
-static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
- vec.clear();
-
- size_t cnt = 0;
- while (cnt < n) {
- if (to_delete.size() == 0) {
- return false;
- }
-
- auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
- vec.emplace_back(to_delete[i]);
- to_delete.erase(to_delete.begin() + i);
- }
-td:
- return true;
-}
-
-static std::vector<int64_t> read_sosd_file(std::string &fname, size_t n) {
- std::fstream file;
- file.open(fname, std::ios::in | std::ios::binary);
-
- std::vector<int64_t> records(n);
- for (size_t i=0; i<n; i++) {
- file.read((char*) &(records[i]), sizeof(int64_t));
- }
-
- return records;
-}
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h
new file mode 100644
index 0000000..2a3300a
--- /dev/null
+++ b/benchmarks/include/file_util.h
@@ -0,0 +1,130 @@
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <gsl/gsl_rng.h>
+#include <cstring>
+#include <vector>
+
+#include "framework/interface/Record.h"
+#include "query/irs.h"
+
+#pragma once
+
+template <typename QP>
+static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.target_key = start;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_range_queries(std::string &fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.lower_bound = start;
+ q.upper_bound = stop;
+
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+template<de::KVPInterface R>
+static std::vector<R> read_sosd_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in | std::ios::binary);
+
+ std::vector<R> records(n);
+ for (size_t i=0; i<n; i++) {
+ uint64_t k;
+ file.read((char*) &(k), sizeof(uint64_t));
+ records[i].key = k;
+ records[i].value = i;
+ }
+
+ return records;
+}
+
+/*
+ * This function expects a plaintext file with each vector on its own line.
+ * There should be D dimensions (or more) for each record, separated by
+ * whitespace. The function will generate a vector of n records, each
+ * record built from the first D dimensions of the data in the file.
+ */
+template <typename R, size_t D>
+static std::vector<R> read_vector_file(std::string &fname, size_t n) {
+ std::fstream file;
+ file.open(fname, std::ios::in);
+
+ std::vector<R> records;
+ records.reserve(n);
+
+ for (size_t i=0; i<n; i++) {
+ std::string line;
+ if (!std::getline(file, line, '\n')) break;
+
+ std::stringstream line_stream(line);
+ R rec;
+ for (size_t j=0; j<D; j++) {
+ std::string dim;
+ if (!std::getline(line_stream, dim, ' ')) break;
+
+ rec.data[j] = atof(dim.c_str());
+ }
+ records.emplace_back(rec);
+ }
+
+ return records;
+}
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
new file mode 100644
index 0000000..a42cdd6
--- /dev/null
+++ b/benchmarks/include/standard_benchmarks.h
@@ -0,0 +1,212 @@
+/*
+ * benchmarks/include/bench.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <fstream>
+#include <gsl/gsl_rng.h>
+
+#include "framework/DynamicExtension.h"
+#include "framework/interface/Query.h"
+#include "psu-util/progress.h"
+#include "benchmark_types.h"
+
+static size_t g_deleted_records = 0;
+static double delete_proportion = 0.05;
+
+template<typename DE, typename QP>
+static void run_queries(DE *extension, std::vector<QP> &queries, gsl_rng *rng) {
+ size_t total;
+ for (size_t i=0; i<queries.size(); i++) {
+ auto q = &queries[i];
+
+ auto res = extension->query(q);
+ auto r = res.get();
+ total += r.size();
+ }
+}
+
+
+template<typename DE, de::RecordInterface R>
+static void insert_records(DE *extension, size_t start, size_t stop,
+ std::vector<R> &records, std::vector<size_t> &to_delete,
+ size_t &delete_idx, bool delete_records, gsl_rng *rng) {
+
+ psudb::progress_update(0, "Insert Progress");
+ size_t reccnt = 0;
+ for (size_t i=start; i<stop; i++) {
+ while (!extension->insert(records[i])) {
+ psudb::progress_update((double) i / (double)(stop - start), "Insert Progress");
+ usleep(1);
+ }
+
+ if (delete_records && gsl_rng_uniform(rng) <=
+ delete_proportion && to_delete[delete_idx] <= i) {
+
+ while (!extension->erase(records[to_delete[delete_idx]])) {
+ usleep(1);
+ }
+
+ delete_idx++;
+ g_deleted_records++;
+ }
+ }
+
+ psudb::progress_update(1, "Insert Progress");
+}
+
+template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
+static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
+ double delete_prop, gsl_rng *rng, std::vector<R> &to_delete, bool binary=false) {
+
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ std::vector<R> insert_vec;
+ std::vector<R> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(rng) < delete_prop) {
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ de_index.erase_one(delete_vec[delete_idx++].key);
+ } else if constexpr (std::is_same_v<MTree, DE>) {
+ de_index.remove(delete_vec[delete_idx++]);
+ } else {
+ de_index.erase(delete_vec[delete_idx++]);
+ }
+ applied_deletes++;
+ }
+
+ // insert the record;
+ if constexpr (std::is_same_v<MTree, DE>) {
+ de_index.add(insert_vec[i]);
+ } else {
+ de_index.insert(insert_vec[i]);
+ }
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ if constexpr (PROGRESS) {
+ psudb::progress_update(1.0, "inserting:");
+ }
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "%ld\t", throughput);
+
+ return continue_benchmark;
+}
+
+template <typename DE, de::RecordInterface R, typename QP, bool PROGRESS=true>
+static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t trial_cnt=1) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ auto res = de_index.query(&queries[j]);
+
+ total_results += res.size();
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
+
+
+template <typename Shard, de::RecordInterface R, typename QP, de::QueryInterface<R, Shard> Q, bool PROGRESS=true>
+static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t trial_cnt=100) {
+ char progbuf[25];
+ if constexpr (PROGRESS) {
+ sprintf(progbuf, "querying:");
+ }
+
+ size_t total_time = 0;
+ size_t total_results = 0;
+
+ for (size_t i=0; i<trial_cnt; i++) {
+ if constexpr (PROGRESS) {
+ psudb::progress_update((double) (i) / (double) trial_cnt, progbuf);
+ }
+
+ std::vector<void *> states(1);
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ states[0] = Q::get_query_state(shard, &queries[j]);
+ Q::process_query_states(&queries[j], states, nullptr);
+ auto res = Q::query(shard, states[0], &queries[j]);
+ total_results += res.size();
+ Q::delete_query_state(states[0]);
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ psudb::progress_update(1.0, progbuf);
+
+ size_t query_latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", query_latency);
+ fflush(stdout);
+
+ return true;
+}
diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp
index 49b1630..9895295 100644
--- a/benchmarks/irs_bench.cpp
+++ b/benchmarks/irs_bench.cpp
@@ -4,76 +4,32 @@
#define ENABLE_TIMER
-#include <thread>
-
#include "framework/DynamicExtension.h"
#include "shard/ISAMTree.h"
#include "query/irs.h"
#include "framework/interface/Record.h"
-#include "include/data-proc.h"
+#include "include/file_util.h"
#include <gsl/gsl_rng.h>
#include "psu-util/timer.h"
+#include "include/standard_benchmarks.h"
typedef de::Record<uint64_t, uint64_t> Rec;
-typedef de::ISAMTree<Rec> ISAM;
-typedef de::irs::Query<Rec, ISAM> Q;
-typedef de::DynamicExtension<Rec, ISAM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
+typedef de::ISAMTree<Rec> Shard;
+typedef de::irs::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext;
typedef de::irs::Parms<Rec> QP;
-void run_queries(Ext *extension, std::vector<QP> &queries, gsl_rng *rng) {
- size_t total;
- for (size_t i=0; i<queries.size(); i++) {
- auto q = &queries[i];
- q->rng = rng;
- q->sample_size = 1000;
-
- auto res = extension->query(q);
- auto r = res.get();
- total += r.size();
- }
-
- fprintf(stderr, "%ld\n", total);
-}
-
-size_t g_deleted_records = 0;
-double delete_proportion = 0.05;
-
-void insert_records(Ext *extension, size_t start,
- size_t stop,
- std::vector<int64_t> &records,
- std::vector<size_t> &to_delete,
- size_t &delete_idx,
- bool delete_records,
- gsl_rng *rng) {
- size_t reccnt = 0;
- Rec r;
- for (size_t i=start; i<stop; i++) {
- r.key = records[i];
- r.value = i;
-
- while (!extension->insert(r)) {
- usleep(1);
- }
-
- if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
- r.key = records[to_delete[delete_idx]];
- r.value = (int64_t) (to_delete[delete_idx]);
- while (!extension->erase(r)) {
- usleep(1);
- }
- delete_idx++;
- g_deleted_records++;
- }
- }
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
}
int main(int argc, char **argv) {
if (argc < 4) {
- fprintf(stderr, "irs_bench reccnt datafile queryfile\n");
+ usage(argv[0]);
exit(EXIT_FAILURE);
}
@@ -84,7 +40,7 @@ int main(int argc, char **argv) {
auto extension = new Ext(12000, 12001, 8, 0, 64);
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto data = read_sosd_file(d_fname, n);
+ auto data = read_sosd_file<Rec>(d_fname, n);
std::vector<size_t> to_delete(n * delete_proportion);
size_t j=0;
for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
@@ -92,26 +48,31 @@ int main(int argc, char **argv) {
to_delete[j++] = i;
}
}
+ /* read in the range queries and add sample size and rng for sampling */
auto queries = read_range_queries<QP>(q_fname, .001);
+ for (auto q : queries) {
+ q.sample_size = 1000;
+ q.rng = rng;
+ }
/* warmup structure w/ 10% of records */
size_t warmup = .3 * n;
size_t delete_idx = 0;
- insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
extension->await_next_epoch();
TIMER_INIT();
TIMER_START();
- insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
TIMER_STOP();
auto insert_latency = TIMER_RESULT();
size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
TIMER_START();
- run_queries(extension, queries, rng);
+ run_queries<Ext, QP>(extension, queries, rng);
TIMER_STOP();
auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h
index e33b93d..f495f18 100644
--- a/benchmarks/old-bench/include/bench_utility.h
+++ b/benchmarks/old-bench/include/bench_utility.h
@@ -79,8 +79,6 @@ struct cosine_similarity {
}
};
-typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
-typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
template <de::RecordInterface R>
static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp
index 72d3b52..3643abb 100644
--- a/benchmarks/pgm_bench.cpp
+++ b/benchmarks/pgm_bench.cpp
@@ -10,7 +10,8 @@
#include "shard/PGM.h"
#include "query/rangecount.h"
#include "framework/interface/Record.h"
-#include "include/data-proc.h"
+#include "include/file_util.h"
+#include "include/standard_benchmarks.h"
#include <gsl/gsl_rng.h>
@@ -18,60 +19,19 @@
typedef de::Record<uint64_t, uint64_t> Rec;
-typedef de::PGM<Rec> S;
-typedef de::rc::Query<Rec, S> Q;
-typedef de::DynamicExtension<Rec, S, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::PGM<Rec> Shard;
+typedef de::rc::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
typedef de::rc::Parms<Rec> QP;
-void run_queries(Ext *extension, std::vector<QP> &queries, gsl_rng *rng) {
- size_t total;
- for (size_t i=0; i<queries.size(); i++) {
- auto q = &queries[i];
-
- auto res = extension->query(q);
- auto r = res.get();
- total += r.size();
- }
-
- fprintf(stderr, "%ld\n", total);
-}
-
-size_t g_deleted_records = 0;
-double delete_proportion = 0.05;
-
-void insert_records(Ext *extension, size_t start,
- size_t stop,
- std::vector<int64_t> &records,
- std::vector<size_t> &to_delete,
- size_t &delete_idx,
- bool delete_records,
- gsl_rng *rng) {
- size_t reccnt = 0;
- Rec r;
- for (size_t i=start; i<stop; i++) {
- r.key = records[i];
- r.value = i;
-
- while (!extension->insert(r)) {
- usleep(1);
- }
-
- if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
- r.key = records[to_delete[delete_idx]];
- r.value = (int64_t) (to_delete[delete_idx]);
- while (!extension->erase(r)) {
- usleep(1);
- }
- delete_idx++;
- g_deleted_records++;
- }
- }
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
}
int main(int argc, char **argv) {
if (argc < 4) {
- fprintf(stderr, "pgm_bench reccnt datafile queryfile\n");
+ usage(argv[0]);
exit(EXIT_FAILURE);
}
@@ -82,7 +42,7 @@ int main(int argc, char **argv) {
auto extension = new Ext(12000, 12001, 8, 0, 64);
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto data = read_sosd_file(d_fname, n);
+ auto data = read_sosd_file<Rec>(d_fname, n);
std::vector<size_t> to_delete(n * delete_proportion);
size_t j=0;
for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
@@ -95,21 +55,21 @@ int main(int argc, char **argv) {
/* warmup structure w/ 10% of records */
size_t warmup = .1 * n;
size_t delete_idx = 0;
- insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
extension->await_next_epoch();
TIMER_INIT();
TIMER_START();
- insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
TIMER_STOP();
auto insert_latency = TIMER_RESULT();
size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
TIMER_START();
- run_queries(extension, queries, rng);
+ run_queries<Ext, QP>(extension, queries, rng);
TIMER_STOP();
auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp
index 3df3371..3dc619e 100644
--- a/benchmarks/ts_bench.cpp
+++ b/benchmarks/ts_bench.cpp
@@ -10,7 +10,8 @@
#include "shard/TrieSpline.h"
#include "query/rangecount.h"
#include "framework/interface/Record.h"
-#include "include/data-proc.h"
+#include "include/file_util.h"
+#include "include/standard_benchmarks.h"
#include <gsl/gsl_rng.h>
@@ -18,60 +19,19 @@
typedef de::Record<uint64_t, uint64_t> Rec;
-typedef de::TrieSpline<Rec> TS;
-typedef de::rc::Query<Rec, TS> Q;
-typedef de::DynamicExtension<Rec, TS, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::TrieSpline<Rec> PGM;
+typedef de::rc::Query<Rec, PGM> Q;
+typedef de::DynamicExtension<Rec, PGM, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
typedef de::rc::Parms<Rec> QP;
-void run_queries(Ext *extension, std::vector<QP> &queries, gsl_rng *rng) {
- size_t total;
- for (size_t i=0; i<queries.size(); i++) {
- auto q = &queries[i];
-
- auto res = extension->query(q);
- auto r = res.get();
- total += r.size();
- }
-
- fprintf(stderr, "%ld\n", total);
-}
-
-size_t g_deleted_records = 0;
-double delete_proportion = 0.05;
-
-void insert_records(Ext *extension, size_t start,
- size_t stop,
- std::vector<int64_t> &records,
- std::vector<size_t> &to_delete,
- size_t &delete_idx,
- bool delete_records,
- gsl_rng *rng) {
- size_t reccnt = 0;
- Rec r;
- for (size_t i=start; i<stop; i++) {
- r.key = records[i];
- r.value = i;
-
- while (!extension->insert(r)) {
- usleep(1);
- }
-
- if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) {
- r.key = records[to_delete[delete_idx]];
- r.value = (int64_t) (to_delete[delete_idx]);
- while (!extension->erase(r)) {
- usleep(1);
- }
- delete_idx++;
- g_deleted_records++;
- }
- }
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
}
int main(int argc, char **argv) {
if (argc < 4) {
- fprintf(stderr, "ts_bench reccnt datafile queryfile\n");
+ usage(argv[0]);
exit(EXIT_FAILURE);
}
@@ -82,7 +42,7 @@ int main(int argc, char **argv) {
auto extension = new Ext(12000, 12001, 8, 0, 64);
gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
- auto data = read_sosd_file(d_fname, n);
+ auto data = read_sosd_file<Rec>(d_fname, n);
std::vector<size_t> to_delete(n * delete_proportion);
size_t j=0;
for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
@@ -95,21 +55,21 @@ int main(int argc, char **argv) {
/* warmup structure w/ 10% of records */
size_t warmup = .1 * n;
size_t delete_idx = 0;
- insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
extension->await_next_epoch();
TIMER_INIT();
TIMER_START();
- insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
TIMER_STOP();
auto insert_latency = TIMER_RESULT();
size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
TIMER_START();
- run_queries(extension, queries, rng);
+ run_queries<Ext, QP>(extension, queries, rng);
TIMER_STOP();
auto query_latency = TIMER_RESULT() / queries.size();
diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vptree_bench.cpp
new file mode 100644
index 0000000..f4c7d0e
--- /dev/null
+++ b/benchmarks/vptree_bench.cpp
@@ -0,0 +1,89 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+#include "include/file_util.h"
+#include "include/standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+
+typedef de::VPTree<Rec, 100, true> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto extension = new Ext(100, 1000, 8, 0, 64);
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_knn_queries<QP>(q_fname, 10);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ extension->await_next_epoch();
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<Ext, QP>(extension, queries, rng);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records);
+
+ gsl_rng_free(rng);
+ delete extension;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/watermark_testing_knn.cpp b/benchmarks/watermark_testing_knn.cpp
new file mode 100644
index 0000000..7cea594
--- /dev/null
+++ b/benchmarks/watermark_testing_knn.cpp
@@ -0,0 +1,61 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "framework/DynamicExtension.h"
+#include "shard/VPTree.h"
+#include "query/knn.h"
+#include "framework/interface/Record.h"
+
+#include "psu-util/timer.h"
+
+constexpr size_t D = 100;
+
+typedef de::EuclidPoint<int64_t, D> Rec;
+typedef de::VPTree<Rec> Shard;
+typedef de::knn::Query<Rec, Shard> Q;
+typedef de::DynamicExtension<Rec, Shard, Q> Ext;
+
+int main(int argc, char **argv) {
+ std::vector hwms = {1000l, 2000l, 4000l, 10000l};
+ std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9};
+
+ size_t n = 1000000;
+
+ std::vector<Rec> records(n);
+ for (size_t i=0; i<n; i++) {
+ Rec r;
+ for (size_t j=0; j<D; j++) {
+ r.data[j] = rand() % n;
+ }
+ }
+
+ TIMER_INIT();
+
+ for (auto &hwm : hwms) {
+ for (size_t i=0; i<lwms.size(); i++) {
+ size_t lwm = hwm * lwms[i];
+
+ auto extension = new Ext(lwm, hwm, 8);
+ TIMER_START();
+ for (int64_t i=0; i<n; i++) {
+ while (!extension->insert(records[i])) {
+ _mm_pause();
+ }
+ }
+ TIMER_STOP();
+
+ auto insert_time = TIMER_RESULT();
+ double insert_throughput = (double) n / (double) insert_time * 1e9;
+
+ fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput);
+ extension->print_scheduler_statistics();
+
+ fflush(stdout);
+ delete extension;
+ }
+ }
+}
+