summaryrefslogtreecommitdiffstats
path: root/benchmarks/include
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-07-25 13:35:30 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-07-25 13:35:30 -0400
commit0265acc2caf2d0dffb640ccd4f46c5d9d8163903 (patch)
tree02ad1b455a572d6467fe88c0df4c1a4455ea4cf0 /benchmarks/include
parentdd5b728cb4d849b2a4574f4cd6de40e84a4a4150 (diff)
downloaddynamic-extension-0265acc2caf2d0dffb640ccd4f46c5d9d8163903.tar.gz
ALEX Benchmarking: added benchmark for ALEX
Diffstat (limited to 'benchmarks/include')
-rw-r--r--benchmarks/include/bench_utility.h236
-rw-r--r--benchmarks/include/standalone_utility.h242
2 files changed, 243 insertions, 235 deletions
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
index 28040be..2ab1532 100644
--- a/benchmarks/include/bench_utility.h
+++ b/benchmarks/include/bench_utility.h
@@ -17,6 +17,7 @@
#include "ds/BTree.h"
#include "shard/VPTree.h"
#include "mtree.h"
+#include "standalone_utility.h"
#include <cstdlib>
#include <cstdio>
@@ -49,25 +50,6 @@ typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedI
typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ;
typedef de::DynamicExtension<Word2VecRec, de::VPTree<Word2VecRec>, de::KNNQuery<Word2VecRec>> ExtendedVPTree_KNN;
-struct btree_record {
- key_type key;
- value_type value;
-
- inline bool operator<(const btree_record& other) const {
- return key < other.key || (key == other.key && value < other.value);
- }
-
- inline bool operator==(const btree_record& other) const {
- return key == other.key && value == other.value;
- }
-};
-
-struct btree_key_extract {
- static const key_type &get(const btree_record &v) {
- return v.key;
- }
-};
-
struct euclidean_distance {
double operator()(const Word2VecRec &first, const Word2VecRec &second) const {
double dist = 0;
@@ -99,189 +81,6 @@ struct cosine_similarity {
typedef tlx::BTree<key_type, btree_record, btree_key_extract> TreeMap;
typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
-static gsl_rng *g_rng;
-static std::set<WRec> *g_to_delete;
-static bool g_osm_data;
-
-static key_type g_min_key = UINT64_MAX;
-static key_type g_max_key = 0;
-
-static size_t g_max_record_cnt = 0;
-static size_t g_reccnt = 0;
-
-static constexpr unsigned int DEFAULT_SEED = 0;
-
-static unsigned int get_random_seed()
-{
- unsigned int seed = 0;
- std::fstream urandom;
- urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
- urandom.read((char *) &seed, sizeof(seed));
- urandom.close();
-
- return seed;
-}
-
-static key_type osm_to_key(const char *key_field) {
- double tmp_key = (atof(key_field) + 180) * 10e6;
- return (key_type) tmp_key;
-}
-
-static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
-{
- g_rng = gsl_rng_alloc(type);
- gsl_rng_set(g_rng, seed);
-}
-
-static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
-{
- unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
- init_bench_rng(seed, gsl_rng_mt19937);
- g_to_delete = new std::set<WRec>();
- g_osm_data = osm_correction;
- g_max_record_cnt = max_reccnt;
- g_reccnt = 0;
-}
-
-static void delete_bench_env()
-{
- gsl_rng_free(g_rng);
- delete g_to_delete;
-}
-
-/*
- * NOTE: The QP type must have lower_bound and upper_bound attributes, which
- * this function will initialize. Any other query parameter attributes must
- * be manually initialized after the call.
- */
-template <typename QP>
-static std::vector<QP> read_range_queries(std::string fname, double selectivity) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- size_t start, stop;
- double sel;
- while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
- if (start < stop && std::abs(sel - selectivity) < 0.1) {
- QP q;
- q.lower_bound = start;
- q.upper_bound = stop;
- queries.push_back(q);
- }
- }
- fclose(qf);
-
- return queries;
-}
-
-template <typename QP>
-static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
- std::vector<QP> queries;
-
- FILE *qf = fopen(fname.c_str(), "r");
- char *line = NULL;
- size_t len = 0;
-
- while (getline(&line, &len, qf) > 0) {
- char *token;
- QP query;
- size_t idx = 0;
-
- token = strtok(line, " ");
- do {
- query.point.data[idx++] = atof(token);
- } while ((token = strtok(NULL, " ")));
-
- query.k = k;
- queries.emplace_back(query);
- }
-
- free(line);
- fclose(qf);
-
- return queries;
-}
-
-static bool next_vector_record(std::fstream &file, Word2VecRec &record, bool binary=false) {
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- for (size_t i=0; i<300; i++) {
- std::string dimension;
-
- std::getline(line_stream, dimension, ' ');
- record.data[i] = atof(dimension.c_str());
- }
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-
-}
-
-
-template <de::KVPInterface R>
-static bool next_record(std::fstream &file, R &record, bool binary=false)
-{
- static value_type value = 1;
- if (g_reccnt >= g_max_record_cnt) return false;
-
- if (binary) {
- if (file.good()) {
- decltype(R::key) key;
-
- file.read((char*) &key, sizeof(key));
- record.key = key;
- record.value = value;
- value++;
-
- if constexpr (de::WeightedRecordInterface<R>) {
- decltype(R::weight) weight;
- file.read((char*) &weight, sizeof(weight));
- record.weight = weight;
- }
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- return true;
- }
-
- return false;
- }
-
- std::string line;
- if (std::getline(file, line, '\n')) {
- std::stringstream line_stream(line);
- std::string key_field;
- std::string value_field;
- std::string weight_field;
-
- std::getline(line_stream, value_field, '\t');
- std::getline(line_stream, key_field, '\t');
- std::getline(line_stream, weight_field, '\t');
-
- record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
- record.value = atol(value_field.c_str());
-
- if constexpr (de::WeightedRecordInterface<R>) {
- record.weight = atof(weight_field.c_str());
- }
-
- if (record.key < g_min_key) g_min_key = record.key;
- if (record.key > g_max_key) g_max_key = record.key;
-
- g_reccnt++;
-
- return true;
- }
-
- return false;
-}
-
template <de::RecordInterface R>
static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
double delete_prop, std::vector<R> &to_delete, bool binary=false) {
@@ -316,39 +115,6 @@ static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
return true;
}
-template <de::RecordInterface R>
-static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
- vec.clear();
-
- size_t cnt = 0;
- while (cnt < n) {
- if (to_delete.size() == 0) {
- return false;
- }
-
- auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
- vec.emplace_back(to_delete[i]);
- to_delete.erase(to_delete.begin() + i);
- }
-td:
- return true;
-}
-
-/*
- * helper routines for displaying progress bars to stderr
- */
-static const char *g_prog_bar = "======================================================================";
-static const size_t g_prog_width = 50;
-
-static void progress_update(double percentage, std::string prompt) {
- int val = (int) (percentage * 100);
- int lpad = (int) (percentage * g_prog_width);
- int rpad = (int) (g_prog_width - lpad);
- fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, "");
- fflush(stderr);
-
- if (percentage >= 1) fprintf(stderr, "\n");
-}
template <typename DE, de::RecordInterface R>
static bool warmup(std::fstream &file, DE &extended_index, size_t count,
diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/include/standalone_utility.h
new file mode 100644
index 0000000..272a332
--- /dev/null
+++ b/benchmarks/include/standalone_utility.h
@@ -0,0 +1,242 @@
+#include <cstdlib>
+#include <cstdio>
+#include <chrono>
+#include <algorithm>
+#include <numeric>
+#include <memory>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <unordered_set>
+#include <set>
+#include <string>
+#include <random>
+#include <gsl/gsl_rng.h>
+#include <cstring>
+
+typedef uint64_t key_type;
+typedef uint64_t value_type;
+typedef uint64_t weight_type;
+
+static gsl_rng *g_rng;
+static bool g_osm_data;
+
+struct btree_record {
+ key_type key;
+ value_type value;
+
+ inline bool operator<(const btree_record& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool operator==(const btree_record& other) const {
+ return key == other.key && value == other.value;
+ }
+};
+
+struct btree_key_extract {
+ static const key_type &get(const btree_record &v) {
+ return v.key;
+ }
+};
+
+static key_type g_min_key = UINT64_MAX;
+static key_type g_max_key = 0;
+
+static size_t g_max_record_cnt = 0;
+static size_t g_reccnt = 0;
+
+static constexpr unsigned int DEFAULT_SEED = 0;
+
+static unsigned int get_random_seed()
+{
+ unsigned int seed = 0;
+ std::fstream urandom;
+ urandom.open("/dev/urandom", std::ios::in|std::ios::binary);
+ urandom.read((char *) &seed, sizeof(seed));
+ urandom.close();
+
+ return seed;
+}
+
+static key_type osm_to_key(const char *key_field) {
+ double tmp_key = (atof(key_field) + 180) * 10e6;
+ return (key_type) tmp_key;
+}
+
+static void init_bench_rng(unsigned int seed, const gsl_rng_type *type)
+{
+ g_rng = gsl_rng_alloc(type);
+ gsl_rng_set(g_rng, seed);
+}
+
+static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true)
+{
+ unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED;
+ init_bench_rng(seed, gsl_rng_mt19937);
+ g_osm_data = osm_correction;
+ g_max_record_cnt = max_reccnt;
+ g_reccnt = 0;
+}
+
+static void delete_bench_env()
+{
+ gsl_rng_free(g_rng);
+}
+
+template <typename QP>
+static std::vector<QP> read_range_queries(std::string fname, double selectivity) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ size_t start, stop;
+ double sel;
+ while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) {
+ if (start < stop && std::abs(sel - selectivity) < 0.1) {
+ QP q;
+ q.lower_bound = start;
+ q.upper_bound = stop;
+ queries.push_back(q);
+ }
+ }
+ fclose(qf);
+
+ return queries;
+}
+
+template <typename QP>
+static std::vector<QP> read_knn_queries(std::string fname, size_t k) {
+ std::vector<QP> queries;
+
+ FILE *qf = fopen(fname.c_str(), "r");
+ char *line = NULL;
+ size_t len = 0;
+
+ while (getline(&line, &len, qf) > 0) {
+ char *token;
+ QP query;
+ size_t idx = 0;
+
+ token = strtok(line, " ");
+ do {
+ query.point.data[idx++] = atof(token);
+ } while ((token = strtok(NULL, " ")));
+
+ query.k = k;
+ queries.emplace_back(query);
+ }
+
+ free(line);
+ fclose(qf);
+
+ return queries;
+}
+
+/*
+ * NOTE: The QP type must have lower_bound and upper_bound attributes, which
+ * this function will initialize. Any other query parameter attributes must
+ * be manually initialized after the call.
+ */
+template <typename R>
+static bool next_vector_record(std::fstream &file, R &record, bool binary=false) {
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ for (size_t i=0; i<300; i++) {
+ std::string dimension;
+
+ std::getline(line_stream, dimension, ' ');
+ record.data[i] = atof(dimension.c_str());
+ }
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+
+}
+
+template <typename R>
+static bool next_record(std::fstream &file, R &record, bool binary=false)
+{
+ static value_type value = 1;
+ if (g_reccnt >= g_max_record_cnt) return false;
+
+ if (binary) {
+ if (file.good()) {
+ decltype(R::key) key;
+
+ file.read((char*) &key, sizeof(key));
+ record.key = key;
+ record.value = value;
+ value++;
+
+ if (record.key < g_min_key) g_min_key = record.key;
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ std::string line;
+ if (std::getline(file, line, '\n')) {
+ std::stringstream line_stream(line);
+ std::string key_field;
+ std::string value_field;
+ std::string weight_field;
+
+ std::getline(line_stream, value_field, '\t');
+ std::getline(line_stream, key_field, '\t');
+ std::getline(line_stream, weight_field, '\t');
+
+ record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str());
+ record.value = atol(value_field.c_str());
+
+ if (record.key < g_min_key) g_min_key = record.key;
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ g_reccnt++;
+
+ return true;
+ }
+
+ return false;
+}
+
+template <typename R>
+static bool build_delete_vec(std::vector<R> &to_delete, std::vector<R> &vec, size_t n) {
+ vec.clear();
+
+ size_t cnt = 0;
+ while (cnt < n) {
+ if (to_delete.size() == 0) {
+ return false;
+ }
+
+ auto i = gsl_rng_uniform_int(g_rng, to_delete.size());
+ vec.emplace_back(to_delete[i]);
+ to_delete.erase(to_delete.begin() + i);
+ }
+td:
+ return true;
+}
+
+/*
+ * helper routines for displaying progress bars to stderr
+ */
+static const char *g_prog_bar = "======================================================================";
+static const size_t g_prog_width = 50;
+
+static void progress_update(double percentage, std::string prompt) {
+ int val = (int) (percentage * 100);
+ int lpad = (int) (percentage * g_prog_width);
+ int rpad = (int) (g_prog_width - lpad);
+ fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, "");
+ fflush(stderr);
+
+ if (percentage >= 1) fprintf(stderr, "\n");
+}