summaryrefslogtreecommitdiffstats
path: root/benchmarks
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-07-23 14:17:38 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-07-23 14:17:38 -0400
commitfc8b4c14bd2814447b5d3180c4ecf3742196c6bf (patch)
tree0496dfead96639d2813f0a364f92dec0d7871cde /benchmarks
parent4c616412f938bc06a12e7526c4e314e4451c083c (diff)
downloaddynamic-extension-fc8b4c14bd2814447b5d3180c4ecf3742196c6bf.tar.gz
Benchmarking updates
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/include/bench.h6
-rw-r--r--benchmarks/include/bench_utility.h42
-rw-r--r--benchmarks/isam_irs_bench.cpp64
-rw-r--r--benchmarks/isam_rq_bench.cpp59
-rw-r--r--benchmarks/pgm_rq_bench.cpp6
5 files changed, 162 insertions, 15 deletions
diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h
index 3e1c6b2..e0f4c1d 100644
--- a/benchmarks/include/bench.h
+++ b/benchmarks/include/bench.h
@@ -12,7 +12,7 @@
template <typename DE, de::RecordInterface R, bool PROGRESS=true, size_t BATCH=1000>
static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt,
- double delete_prop, std::vector<R> &to_delete) {
+ double delete_prop, std::vector<R> &to_delete, bool binary=false) {
size_t delete_cnt = insert_cnt * delete_prop;
@@ -31,7 +31,7 @@ static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cn
size_t total_time = 0;
while (applied_inserts < insert_cnt && continue_benchmark) {
- continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete);
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
if (applied_deletes < delete_cnt) {
build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
delete_idx = 0;
@@ -104,7 +104,6 @@ static bool query_latency_bench(DE &de_index, std::vector<QP> queries, size_t tr
size_t query_latency = total_time / (trial_cnt * queries.size());
fprintf(stdout, "%ld\t", query_latency);
- fprintf(stderr, "%ld\n", total_results);
fflush(stdout);
return true;
@@ -146,7 +145,6 @@ static bool static_latency_bench(Shard *shard, std::vector<QP> queries, size_t t
size_t query_latency = total_time / (trial_cnt * queries.size());
fprintf(stdout, "%ld\t", query_latency);
- fprintf(stderr, "%ld\n", total_results);
fflush(stdout);
return true;
diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h
index 2d31cae..a5f5e0b 100644
--- a/benchmarks/include/bench_utility.h
+++ b/benchmarks/include/bench_utility.h
@@ -30,7 +30,7 @@
#include <random>
typedef uint64_t key_type;
-typedef uint32_t value_type;
+typedef uint64_t value_type;
typedef uint64_t weight_type;
typedef de::WeightedRecord<key_type, value_type, weight_type> WRec;
@@ -39,6 +39,8 @@ typedef de::Record<key_type, value_type> Rec;
typedef de::DynamicExtension<WRec, de::WSS<WRec>, de::WSSQuery<WRec>> ExtendedWSS;
typedef de::DynamicExtension<Rec, de::TrieSpline<Rec>, de::TrieSplineRangeQuery<Rec>> ExtendedTSRQ;
typedef de::DynamicExtension<Rec, de::PGM<Rec>, de::PGMRangeQuery<Rec>> ExtendedPGMRQ;
+typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::IRSQuery<Rec>> ExtendedISAM_IRS;
+typedef de::DynamicExtension<Rec, de::MemISAM<Rec>, de::ISAMRangeQuery<Rec>> ExtendedISAM_RQ;
static gsl_rng *g_rng;
static std::set<WRec> *g_to_delete;
@@ -115,11 +117,36 @@ static std::vector<QP> read_range_queries(std::string fname, double selectivity)
return queries;
}
-template <de::RecordInterface R>
-static bool next_record(std::fstream &file, R &record)
+template <de::KVPInterface R>
+static bool next_record(std::fstream &file, R &record, bool binary=false)
{
+ static value_type value = 1;
if (g_reccnt >= g_max_record_cnt) return false;
+ if (binary) {
+ if (file.good()) {
+ decltype(R::key) key;
+
+ file.read((char*) &key, sizeof(key));
+ record.key = key;
+ record.value = value;
+ value++;
+
+ if constexpr (de::WeightedRecordInterface<R>) {
+ decltype(R::weight) weight;
+ file.read((char*) &weight, sizeof(weight));
+ record.weight = weight;
+ }
+
+ if (record.key < g_min_key) g_min_key = record.key;
+ if (record.key > g_max_key) g_max_key = record.key;
+
+ return true;
+ }
+
+ return false;
+ }
+
std::string line;
if (std::getline(file, line, '\n')) {
std::stringstream line_stream(line);
@@ -139,7 +166,6 @@ static bool next_record(std::fstream &file, R &record)
}
if (record.key < g_min_key) g_min_key = record.key;
-
if (record.key > g_max_key) g_max_key = record.key;
g_reccnt++;
@@ -152,11 +178,11 @@ static bool next_record(std::fstream &file, R &record)
template <de::RecordInterface R>
static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
- double delete_prop, std::vector<R> &to_delete) {
+ double delete_prop, std::vector<R> &to_delete, bool binary=false) {
vec.clear();
for (size_t i=0; i<n; i++) {
R rec;
- if (!next_record(file, rec)) {
+ if (!next_record(file, rec, binary)) {
if (i == 0) {
return false;
}
@@ -210,7 +236,7 @@ static void progress_update(double percentage, std::string prompt) {
template <typename DE, de::RecordInterface R>
static bool warmup(std::fstream &file, DE &extended_index, size_t count,
- double delete_prop, std::vector<R> to_delete, bool progress=true) {
+ double delete_prop, std::vector<R> to_delete, bool progress=true, bool binary=false) {
size_t batch = std::min(.1 * count, 25000.0);
std::vector<R> insert_vec;
@@ -224,7 +250,7 @@ static bool warmup(std::fstream &file, DE &extended_index, size_t count,
double last_percent = 0;
while (inserted < count) {
// Build vector of records to insert and potentially delete
- auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete);
+ auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary);
if (inserted > batch) {
build_delete_vec(to_delete, delete_vec, batch*delete_prop);
delete_idx = 0;
diff --git a/benchmarks/isam_irs_bench.cpp b/benchmarks/isam_irs_bench.cpp
new file mode 100644
index 0000000..96525f0
--- /dev/null
+++ b/benchmarks/isam_irs_bench.cpp
@@ -0,0 +1,64 @@
+#include "include/bench.h"
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage: isam_irs_bench <filename> <record_count> <delete_proportion> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string filename = std::string(argv[1]);
+ size_t record_count = atol(argv[2]);
+ double delete_prop = atof(argv[3]);
+ std::string qfilename = std::string(argv[4]);
+
+ size_t buffer_cap = 12000;
+ size_t scale_factor = 6;
+ double max_delete_prop = delete_prop;
+ bool use_osm = false;
+
+ double insert_batch = 0.1;
+
+ init_bench_env(record_count, true, use_osm);
+ auto queries = read_range_queries<de::irs_query_parms<Rec>>(qfilename, .001);
+
+ for (auto &q: queries) {
+ q.rng = g_rng;
+ q.sample_size = 1000;
+ }
+
+ auto de_irs = ExtendedISAM_IRS(buffer_cap, scale_factor, max_delete_prop);
+
+ std::fstream datafile;
+ datafile.open(filename, std::ios::in | std::ios::binary);
+
+ std::vector<Rec> to_delete;
+
+ // warm up the tree with initial_insertions number of initially inserted
+ // records
+ size_t warmup_cnt = insert_batch * record_count;
+ warmup<ExtendedISAM_IRS, Rec>(datafile, de_irs, warmup_cnt, delete_prop, to_delete, true, true);
+
+ size_t insert_cnt = record_count - warmup_cnt;
+
+ insert_tput_bench<ExtendedISAM_IRS, Rec>(de_irs, datafile, insert_cnt, delete_prop, to_delete, true);
+ fprintf(stdout, "%ld\t", de_irs.get_memory_usage());
+ query_latency_bench<ExtendedISAM_IRS, Rec, de::irs_query_parms<Rec>>(de_irs, queries);
+ fprintf(stdout, "\n");
+
+ auto ts = de_irs.create_static_structure();
+
+ fprintf(stdout, "%ld\t", ts->get_memory_usage());
+ static_latency_bench<de::MemISAM<Rec>, Rec, de::irs_query_parms<Rec>, de::IRSQuery<Rec>>(
+ ts, queries, 1
+ );
+ fprintf(stdout, "\n");
+
+ delete ts;
+
+ delete_bench_env();
+ fflush(stdout);
+ fflush(stderr);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/benchmarks/isam_rq_bench.cpp b/benchmarks/isam_rq_bench.cpp
new file mode 100644
index 0000000..bb5626e
--- /dev/null
+++ b/benchmarks/isam_rq_bench.cpp
@@ -0,0 +1,59 @@
+#include "include/bench.h"
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage: isam_rq_bench <filename> <record_count> <delete_proportion> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string filename = std::string(argv[1]);
+ size_t record_count = atol(argv[2]);
+ double delete_prop = atof(argv[3]);
+ std::string qfilename = std::string(argv[4]);
+
+ size_t buffer_cap = 12000;
+ size_t scale_factor = 6;
+ double max_delete_prop = delete_prop;
+ bool use_osm = false;
+
+ double insert_batch = 0.1;
+
+ init_bench_env(record_count, true, use_osm);
+ auto queries = read_range_queries<de::ISAMRangeQueryParms<Rec>>(qfilename, .0001);
+
+ auto de_isam_rq = ExtendedISAM_RQ(buffer_cap, scale_factor, max_delete_prop);
+
+ std::fstream datafile;
+ datafile.open(filename, std::ios::in | std::ios::binary);
+
+ std::vector<Rec> to_delete;
+
+ // warm up the tree with initial_insertions number of initially inserted
+ // records
+ size_t warmup_cnt = insert_batch * record_count;
+ warmup<ExtendedISAM_RQ, Rec>(datafile, de_isam_rq, warmup_cnt, delete_prop, to_delete, true, true);
+
+ size_t insert_cnt = record_count - warmup_cnt;
+
+ insert_tput_bench<ExtendedISAM_RQ, Rec>(de_isam_rq, datafile, insert_cnt, delete_prop, to_delete, true);
+ fprintf(stdout, "%ld\t", de_isam_rq.get_memory_usage());
+ query_latency_bench<ExtendedISAM_RQ, Rec, de::ISAMRangeQueryParms<Rec>>(de_isam_rq, queries);
+ fprintf(stdout, "\n");
+
+ auto ts = de_isam_rq.create_static_structure();
+
+ fprintf(stdout, "%ld\t", ts->get_memory_usage());
+ static_latency_bench<de::MemISAM<Rec>, Rec, de::ISAMRangeQueryParms<Rec>, de::ISAMRangeQuery<Rec>>(
+ ts, queries, 1
+ );
+ fprintf(stdout, "\n");
+
+ delete ts;
+
+ delete_bench_env();
+ fflush(stdout);
+ fflush(stderr);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/benchmarks/pgm_rq_bench.cpp b/benchmarks/pgm_rq_bench.cpp
index aac6e07..3acc34e 100644
--- a/benchmarks/pgm_rq_bench.cpp
+++ b/benchmarks/pgm_rq_bench.cpp
@@ -32,18 +32,18 @@ int main(int argc, char **argv)
auto queries = read_range_queries<de::pgm_range_query_parms<Rec>>(query_file, .0001);
std::fstream datafile;
- datafile.open(filename, std::ios::in);
+ datafile.open(filename, std::ios::in | std::ios::binary);
std::vector<Rec> to_delete;
// warm up the tree with initial_insertions number of initially inserted
// records
size_t warmup_cnt = insert_batch * record_count;
- warmup<ExtendedPGMRQ, Rec>(datafile, de, warmup_cnt, delete_prop, to_delete);
+ warmup<ExtendedPGMRQ, Rec>(datafile, de, warmup_cnt, delete_prop, to_delete, true, true);
size_t insert_cnt = record_count - warmup_cnt;
- insert_tput_bench<ExtendedPGMRQ, Rec>(de, datafile, insert_cnt, delete_prop, to_delete);
+ insert_tput_bench<ExtendedPGMRQ, Rec>(de, datafile, insert_cnt, delete_prop, to_delete, true);
fprintf(stdout, "%ld\t", de.get_memory_usage());
query_latency_bench<ExtendedPGMRQ, Rec, de::pgm_range_query_parms<Rec>>(de, queries, 1);