From f24fdf2fd310a5f868e15cd9682ca37d740c77af Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Tue, 30 Jan 2024 15:31:03 -0500 Subject: Benchmarking updates --- benchmarks/include/bench.h | 162 ------------- benchmarks/include/bench_utility.h | 181 --------------- benchmarks/include/btree-util.h | 27 +++ benchmarks/include/data-proc.h | 244 ++++++++++++++++++++ benchmarks/include/standalone_utility.h | 262 ---------------------- benchmarks/insert_tail_latency.cpp | 80 +++++++ benchmarks/old-bench/include/bench.h | 162 +++++++++++++ benchmarks/old-bench/include/bench_utility.h | 181 +++++++++++++++ benchmarks/old-bench/include/standalone_utility.h | 240 ++++++++++++++++++++ benchmarks/static_dynamic_comp.cpp | 117 ++++++++++ benchmarks/watermark_testing.cpp | 4 +- include/framework/scheduling/statistics.h | 29 ++- 12 files changed, 1077 insertions(+), 612 deletions(-) delete mode 100644 benchmarks/include/bench.h delete mode 100644 benchmarks/include/bench_utility.h create mode 100644 benchmarks/include/btree-util.h create mode 100644 benchmarks/include/data-proc.h delete mode 100644 benchmarks/include/standalone_utility.h create mode 100644 benchmarks/insert_tail_latency.cpp create mode 100644 benchmarks/old-bench/include/bench.h create mode 100644 benchmarks/old-bench/include/bench_utility.h create mode 100644 benchmarks/old-bench/include/standalone_utility.h create mode 100644 benchmarks/static_dynamic_comp.cpp diff --git a/benchmarks/include/bench.h b/benchmarks/include/bench.h deleted file mode 100644 index 586ff12..0000000 --- a/benchmarks/include/bench.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * benchmarks/include/bench.h - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "bench_utility.h" - -template -static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, - double delete_prop, std::vector &to_delete, bool binary=false) { - - size_t delete_cnt = insert_cnt * delete_prop; - - size_t applied_deletes = 0; - size_t applied_inserts = 0; - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(BATCH); - delete_vec.reserve(BATCH*delete_prop); - - size_t delete_idx = 0; - - bool continue_benchmark = true; - - size_t total_time = 0; - - while (applied_inserts < insert_cnt && continue_benchmark) { - continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); - if (applied_deletes < delete_cnt) { - build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); - delete_idx = 0; - } - - if (insert_vec.size() == 0) { - break; - } - - if constexpr (PROGRESS) { - progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); - } - - auto insert_start = std::chrono::high_resolution_clock::now(); - for (size_t i=0; i) { - de_index.erase_one(delete_vec[delete_idx++].key); - } else if constexpr (std::is_same_v) { - de_index.remove(delete_vec[delete_idx++]); - } else { - de_index.erase(delete_vec[delete_idx++]); - } - applied_deletes++; - } - - // insert the record; - if constexpr (std::is_same_v) { - de_index.add(insert_vec[i]); - } else { - de_index.insert(insert_vec[i]); - } - applied_inserts++; - } - auto insert_stop = std::chrono::high_resolution_clock::now(); - - total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); - } - - if constexpr (PROGRESS) { - progress_update(1.0, "inserting:"); - } - - size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); - - fprintf(stdout, "%ld\t", throughput); - reset_de_perf_metrics(); - - return continue_benchmark; -} - -template -static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} - - -template -static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { - char progbuf[25]; - if constexpr (PROGRESS) { - sprintf(progbuf, "querying:"); - } - - size_t total_time = 0; - size_t total_results = 0; - - for (size_t i=0; i states(1); - - auto start = std::chrono::high_resolution_clock::now(); - for (size_t j=0; j(stop - start).count(); - } - - progress_update(1.0, progbuf); - - size_t query_latency = total_time / (trial_cnt * queries.size()); - - fprintf(stdout, "%ld\t", query_latency); - fflush(stdout); - - return true; -} diff --git a/benchmarks/include/bench_utility.h b/benchmarks/include/bench_utility.h deleted file mode 100644 index e33b93d..0000000 --- a/benchmarks/include/bench_utility.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * benchmarks/include/bench_utility.h - * - * Copyright (C) 2023 Douglas Rumbaugh - * - * All rights reserved. Published under the Modified BSD License. - * - */ -#pragma once - -#include "framework/DynamicExtension.h" -#include "shard/WSS.h" -#include "shard/MemISAM.h" -#include "shard/PGM.h" -#include "shard/TrieSpline.h" -#include "shard/WIRS.h" -#include "ds/BTree.h" -#include "shard/VPTree.h" -#include "mtree.h" -#include "standalone_utility.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef uint64_t key_type; -typedef uint64_t value_type; -typedef uint64_t weight_type; - -typedef de::WeightedRecord WRec; -typedef de::Record Rec; - -const size_t W2V_SIZE = 300; -typedef de::EuclidPoint Word2VecRec; - -typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; -typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; -typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; -typedef de::DynamicExtension, de::PGMPointLookup> ExtendedPGM_PL; -typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; -typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; -typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; - -struct euclidean_distance { - double operator()(const Word2VecRec &first, const Word2VecRec &second) const { - double dist = 0; - for (size_t i=0; i TreeMap; -typedef mt::mtree MTree; - -template -static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, - double delete_prop, std::vector &to_delete, bool binary=false) { - vec.clear(); - for (size_t i=0; i) { - if (!next_vector_record(file, rec)) { - if (i == 0) { - return false; - } - - break; - } - } else { - if (!next_record(file, rec, binary)) { - if (i == 0) { - return false; - } - - break; - } - } - - vec.emplace_back(rec); - - if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { - to_delete.emplace_back(rec); - } - } - - return true; -} - - -template -static bool warmup(std::fstream &file, DE &extended_index, size_t count, - double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { - size_t batch = std::min(.1 * count, 25000.0); - - std::vector insert_vec; - std::vector delete_vec; - insert_vec.reserve(batch); - delete_vec.reserve(batch*delete_prop); - - size_t inserted = 0; - size_t delete_idx = 0; - - double last_percent = 0; - while (inserted < count) { - // Build vector of records to insert and potentially delete - auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); - if (inserted > batch) { - build_delete_vec(to_delete, delete_vec, batch*delete_prop); - delete_idx = 0; - } - - for (size_t i=0; i) { - extended_index.erase_one(delete_vec[delete_idx++].key); - } - else if constexpr (std::is_same_v) { - extended_index.remove(delete_vec[delete_idx++]); - } else { - extended_index.erase(delete_vec[delete_idx++]); - } - } - - // insert the record; - if constexpr (std::is_same_v) { - extended_index.add(insert_vec[i]); - } else { - extended_index.insert(insert_vec[i]); - } - inserted++; - - if (progress) { - progress_update((double) inserted / (double) count, "warming up:"); - } - } - } - - return true; -} - - -static void reset_de_perf_metrics() { - - /* - * rejection counters are zeroed automatically by the - * sampling function itself. - */ - - RESET_IO_CNT(); -} diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h new file mode 100644 index 0000000..571c073 --- /dev/null +++ b/benchmarks/include/btree-util.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include "psu-ds/BTree.h" + +struct btree_record { + int64_t key; + int64_t value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const int64_t &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree BenchBTree; + + diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h new file mode 100644 index 0000000..f758ed4 --- /dev/null +++ b/benchmarks/include/data-proc.h @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "psu-ds/BTree.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +static gsl_rng *g_rng; +static bool g_osm_data; + +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree BenchBTree; + +static key_type g_min_key = UINT64_MAX; +static key_type g_max_key = 0; + +static size_t g_max_record_cnt = 0; +static size_t g_reccnt = 0; + +static constexpr unsigned int DEFAULT_SEED = 0; + +static unsigned int get_random_seed() +{ + unsigned int seed = 0; + std::fstream urandom; + urandom.open("/dev/urandom", std::ios::in|std::ios::binary); + urandom.read((char *) &seed, sizeof(seed)); + urandom.close(); + + return seed; +} + +static key_type osm_to_key(const char *key_field) { + double tmp_key = (atof(key_field) + 180) * 10e6; + return (key_type) tmp_key; +} + +static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) +{ + g_rng = gsl_rng_alloc(type); + gsl_rng_set(g_rng, seed); +} + +static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) +{ + unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; + init_bench_rng(seed, gsl_rng_mt19937); + g_osm_data = osm_correction; + g_max_record_cnt = max_reccnt; + g_reccnt = 0; +} + +static void delete_bench_env() +{ + gsl_rng_free(g_rng); +} + + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +/* + * NOTE: The QP type must have lower_bound and upper_bound attributes, which + * this function will initialize. Any other query parameter attributes must + * be manually initialized after the call. + */ +template +static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + +template +static bool next_record(std::fstream &file, R &record, bool binary=false) +{ + static value_type value = 1; + if (g_reccnt >= g_max_record_cnt) return false; + + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + std::string key_field; + std::string value_field; + std::string weight_field; + + std::getline(line_stream, value_field, '\t'); + std::getline(line_stream, key_field, '\t'); + std::getline(line_stream, weight_field, '\t'); + + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + g_reccnt++; + + return true; + } + + return false; +} + +template +static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} diff --git a/benchmarks/include/standalone_utility.h b/benchmarks/include/standalone_utility.h deleted file mode 100644 index 9876e84..0000000 --- a/benchmarks/include/standalone_utility.h +++ /dev/null @@ -1,262 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef uint64_t key_type; -typedef uint64_t value_type; -typedef uint64_t weight_type; - -static gsl_rng *g_rng; -static bool g_osm_data; - -struct btree_record { - key_type key; - value_type value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const key_type &get(const btree_record &v) { - return v.key; - } -}; - -static key_type g_min_key = UINT64_MAX; -static key_type g_max_key = 0; - -static size_t g_max_record_cnt = 0; -static size_t g_reccnt = 0; - -static constexpr unsigned int DEFAULT_SEED = 0; - -static unsigned int get_random_seed() -{ - unsigned int seed = 0; - std::fstream urandom; - urandom.open("/dev/urandom", std::ios::in|std::ios::binary); - urandom.read((char *) &seed, sizeof(seed)); - urandom.close(); - - return seed; -} - -static key_type osm_to_key(const char *key_field) { - double tmp_key = (atof(key_field) + 180) * 10e6; - return (key_type) tmp_key; -} - -static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) -{ - g_rng = gsl_rng_alloc(type); - gsl_rng_set(g_rng, seed); -} - -static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) -{ - unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; - init_bench_rng(seed, gsl_rng_mt19937); - g_osm_data = osm_correction; - g_max_record_cnt = max_reccnt; - g_reccnt = 0; -} - -static void delete_bench_env() -{ - gsl_rng_free(g_rng); -} - - -template -static std::vector read_lookup_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.target_key = start; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_range_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.lower_bound = start; - q.upper_bound = stop; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_knn_queries(std::string fname, size_t k) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - char *line = NULL; - size_t len = 0; - - while (getline(&line, &len, qf) > 0) { - char *token; - QP query; - size_t idx = 0; - - token = strtok(line, " "); - do { - query.point.data[idx++] = atof(token); - } while ((token = strtok(NULL, " "))); - - query.k = k; - queries.emplace_back(query); - } - - free(line); - fclose(qf); - - return queries; -} - -/* - * NOTE: The QP type must have lower_bound and upper_bound attributes, which - * this function will initialize. Any other query parameter attributes must - * be manually initialized after the call. - */ -template -static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - for (size_t i=0; i<300; i++) { - std::string dimension; - - std::getline(line_stream, dimension, ' '); - record.data[i] = atof(dimension.c_str()); - } - - g_reccnt++; - - return true; - } - - return false; - -} - -template -static bool next_record(std::fstream &file, R &record, bool binary=false) -{ - static value_type value = 1; - if (g_reccnt >= g_max_record_cnt) return false; - - if (binary) { - if (file.good()) { - decltype(R::key) key; - - file.read((char*) &key, sizeof(key)); - record.key = key; - record.value = value; - value++; - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - return true; - } - - return false; - } - - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - std::string key_field; - std::string value_field; - std::string weight_field; - - std::getline(line_stream, value_field, '\t'); - std::getline(line_stream, key_field, '\t'); - std::getline(line_stream, weight_field, '\t'); - - record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record.value = atol(value_field.c_str()); - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - g_reccnt++; - - return true; - } - - return false; -} - -template -static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { - vec.clear(); - - size_t cnt = 0; - while (cnt < n) { - if (to_delete.size() == 0) { - return false; - } - - auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); - vec.emplace_back(to_delete[i]); - to_delete.erase(to_delete.begin() + i); - } -td: - return true; -} - -/* - * helper routines for displaying progress bars to stderr - */ -static const char *g_prog_bar = "======================================================================"; -static const size_t g_prog_width = 50; - -static void progress_update(double percentage, std::string prompt) { - int val = (int) (percentage * 100); - int lpad = (int) (percentage * g_prog_width); - int rpad = (int) (g_prog_width - lpad); - fprintf(stderr, "\r(%3d%%) %20s [%.*s%*s]", val, prompt.c_str(), lpad, g_prog_bar, rpad, ""); - fflush(stderr); - - if (percentage >= 1) fprintf(stderr, "\n"); -} diff --git a/benchmarks/insert_tail_latency.cpp b/benchmarks/insert_tail_latency.cpp new file mode 100644 index 0000000..5e32898 --- /dev/null +++ b/benchmarks/insert_tail_latency.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include +#include + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +std::atomic total_latency = 0; + +void insert_thread(Ext *extension, size_t n, size_t k, size_t rate) { + int64_t delay = (1.0 / (double) rate) * 10e6; // delay in us + TIMER_INIT(); + for (int64_t i=0; iinsert(r)) { + _mm_pause(); + } + + //usleep(delay); + /* + for (size_t i=0; i<10000; i++) { + __asm__ __volatile__ ("":::"memory"); + } + */ + } + TIMER_STOP(); + + auto insert_lat = TIMER_RESULT(); + + total_latency.fetch_add(insert_lat); + fprintf(stdout, "I\t%ld\t%ld\t%ld\n", i+k, insert_lat, k); + } +} + +int main(int argc, char **argv) { + + /* the closeout routine takes _forever_ ... so we'll just leak the memory */ + auto extension = new Ext(100, 1000000, 3); + size_t n = 100000000; + size_t per_trial = 1000; + double selectivity = .001; + size_t rate = 1000000; + + total_latency.store(0); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + std::thread i_thrd1(insert_thread, extension, n/2, per_trial, rate); + std::thread i_thrd2(insert_thread, extension, n/2, per_trial, rate); + + i_thrd1.join(); + i_thrd2.join(); + + auto avg_latency = total_latency.load() / n; + auto throughput = (int64_t) ((double) n / (double) total_latency * 1e9); + + fprintf(stdout, "AVG LAT: %ld\nThroughput: %ld\n", avg_latency, throughput); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/old-bench/include/bench.h b/benchmarks/old-bench/include/bench.h new file mode 100644 index 0000000..586ff12 --- /dev/null +++ b/benchmarks/old-bench/include/bench.h @@ -0,0 +1,162 @@ +/* + * benchmarks/include/bench.h + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include "bench_utility.h" + +template +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, std::vector &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + reset_de_perf_metrics(); + + return continue_benchmark; +} + +template +static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template +static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j(stop - start).count(); + } + + progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h new file mode 100644 index 0000000..e33b93d --- /dev/null +++ b/benchmarks/old-bench/include/bench_utility.h @@ -0,0 +1,181 @@ +/* + * benchmarks/include/bench_utility.h + * + * Copyright (C) 2023 Douglas Rumbaugh + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include "framework/DynamicExtension.h" +#include "shard/WSS.h" +#include "shard/MemISAM.h" +#include "shard/PGM.h" +#include "shard/TrieSpline.h" +#include "shard/WIRS.h" +#include "ds/BTree.h" +#include "shard/VPTree.h" +#include "mtree.h" +#include "standalone_utility.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +typedef de::WeightedRecord WRec; +typedef de::Record Rec; + +const size_t W2V_SIZE = 300; +typedef de::EuclidPoint Word2VecRec; + +typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; +typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; +typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; +typedef de::DynamicExtension, de::PGMPointLookup> ExtendedPGM_PL; +typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; +typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; +typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; + +struct euclidean_distance { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double dist = 0; + for (size_t i=0; i TreeMap; +typedef mt::mtree MTree; + +template +static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, + double delete_prop, std::vector &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i) { + if (!next_vector_record(file, rec)) { + if (i == 0) { + return false; + } + + break; + } + } else { + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } + + break; + } + } + + vec.emplace_back(rec); + + if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { + to_delete.emplace_back(rec); + } + } + + return true; +} + + +template +static bool warmup(std::fstream &file, DE &extended_index, size_t count, + double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(batch); + delete_vec.reserve(batch*delete_prop); + + size_t inserted = 0; + size_t delete_idx = 0; + + double last_percent = 0; + while (inserted < count) { + // Build vector of records to insert and potentially delete + auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); + if (inserted > batch) { + build_delete_vec(to_delete, delete_vec, batch*delete_prop); + delete_idx = 0; + } + + for (size_t i=0; i) { + extended_index.erase_one(delete_vec[delete_idx++].key); + } + else if constexpr (std::is_same_v) { + extended_index.remove(delete_vec[delete_idx++]); + } else { + extended_index.erase(delete_vec[delete_idx++]); + } + } + + // insert the record; + if constexpr (std::is_same_v) { + extended_index.add(insert_vec[i]); + } else { + extended_index.insert(insert_vec[i]); + } + inserted++; + + if (progress) { + progress_update((double) inserted / (double) count, "warming up:"); + } + } + } + + return true; +} + + +static void reset_de_perf_metrics() { + + /* + * rejection counters are zeroed automatically by the + * sampling function itself. + */ + + RESET_IO_CNT(); +} diff --git a/benchmarks/old-bench/include/standalone_utility.h b/benchmarks/old-bench/include/standalone_utility.h new file mode 100644 index 0000000..727daa5 --- /dev/null +++ b/benchmarks/old-bench/include/standalone_utility.h @@ -0,0 +1,240 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef uint64_t key_type; +typedef uint64_t value_type; +typedef uint64_t weight_type; + +static gsl_rng *g_rng; +static bool g_osm_data; + +struct btree_record { + key_type key; + value_type value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +struct btree_key_extract { + static const key_type &get(const btree_record &v) { + return v.key; + } +}; + +static key_type g_min_key = UINT64_MAX; +static key_type g_max_key = 0; + +static size_t g_max_record_cnt = 0; +static size_t g_reccnt = 0; + +static constexpr unsigned int DEFAULT_SEED = 0; + +static unsigned int get_random_seed() +{ + unsigned int seed = 0; + std::fstream urandom; + urandom.open("/dev/urandom", std::ios::in|std::ios::binary); + urandom.read((char *) &seed, sizeof(seed)); + urandom.close(); + + return seed; +} + +static key_type osm_to_key(const char *key_field) { + double tmp_key = (atof(key_field) + 180) * 10e6; + return (key_type) tmp_key; +} + +static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) +{ + g_rng = gsl_rng_alloc(type); + gsl_rng_set(g_rng, seed); +} + +static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) +{ + unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; + init_bench_rng(seed, gsl_rng_mt19937); + g_osm_data = osm_correction; + g_max_record_cnt = max_reccnt; + g_reccnt = 0; +} + +static void delete_bench_env() +{ + gsl_rng_free(g_rng); +} + + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +/* + * NOTE: The QP type must have lower_bound and upper_bound attributes, which + * this function will initialize. Any other query parameter attributes must + * be manually initialized after the call. + */ +template +static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + for (size_t i=0; i<300; i++) { + std::string dimension; + + std::getline(line_stream, dimension, ' '); + record.data[i] = atof(dimension.c_str()); + } + + g_reccnt++; + + return true; + } + + return false; + +} + +template +static bool next_record(std::fstream &file, R &record, bool binary=false) +{ + static value_type value = 1; + if (g_reccnt >= g_max_record_cnt) return false; + + if (binary) { + if (file.good()) { + decltype(R::key) key; + + file.read((char*) &key, sizeof(key)); + record.key = key; + record.value = value; + value++; + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + return true; + } + + return false; + } + + std::string line; + if (std::getline(file, line, '\n')) { + std::stringstream line_stream(line); + std::string key_field; + std::string value_field; + std::string weight_field; + + std::getline(line_stream, value_field, '\t'); + std::getline(line_stream, key_field, '\t'); + std::getline(line_stream, weight_field, '\t'); + + record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); + record.value = atol(value_field.c_str()); + + if (record.key < g_min_key) g_min_key = record.key; + if (record.key > g_max_key) g_max_key = record.key; + + g_reccnt++; + + return true; + } + + return false; +} + +template +static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { + vec.clear(); + + size_t cnt = 0; + while (cnt < n) { + if (to_delete.size() == 0) { + return false; + } + + auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); + vec.emplace_back(to_delete[i]); + to_delete.erase(to_delete.begin() + i); + } +td: + return true; +} diff --git a/benchmarks/static_dynamic_comp.cpp b/benchmarks/static_dynamic_comp.cpp new file mode 100644 index 0000000..2d8f041 --- /dev/null +++ b/benchmarks/static_dynamic_comp.cpp @@ -0,0 +1,117 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "query/rangecount.h" +#include "shard/TrieSpline.h" +#include "shard/ISAMTree.h" + + +#include "framework/interface/Record.h" +#include "framework/interface/Query.h" +#include "include/data-proc.h" + +#include "psu-util/timer.h" + + +typedef de::Record Rec; +typedef de::ISAMTree ISAM; +typedef de::TrieSpline TS; + +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; + +typedef de::MutableBuffer Buffer; + +typedef de::rc::Parms query; + +Buffer *file_to_mbuffer(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto buff = new Buffer(n, n+1); + + Rec rec; + while (next_record(file, rec) && buff->get_record_count() < n) { + buff->append(rec); + } + + return buff; +} + +BenchBTree *file_to_btree(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + auto btree = new BenchBTree(); + Rec rec; + while (next_record(file, rec) && btree->size() < n) { + btree->insert({rec.key, rec.value}); + } + + return btree; +} + +template +void benchmark_shard(S *shard, std::vector &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + auto state = de::rc::Query::get_query_state(shard, &q); + auto res = de::rc::Query::query(shard, state, &q); + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + fprintf(stdout, "%ld %ld\n", latency, shard->get_memory_usage() - shard->get_record_count() * sizeof(de::Wrapped)); +} + +void benchmark_btree(BenchBTree *btree, std::vector &queries) { + TIMER_INIT(); + + TIMER_START(); + for (auto & q : queries) { + size_t c = 0; + auto ptr = btree->find(q.lower_bound); + while(ptr != btree->end() && ptr->key <= q.upper_bound) { + c++; + } + } + TIMER_STOP(); + + auto latency = TIMER_RESULT() / queries.size(); + auto mem = btree->get_stats().inner_nodes * psudb::btree_default_traits::inner_slots * (sizeof(key_type) + sizeof(void*)); + fprintf(stdout, "%ld %ld\n", latency, mem); +} + +int main(int argc, char **argv) { + if (argc < 4) { + fprintf(stderr, "Usage: static_dynamic_comp \n"); + exit(EXIT_FAILURE); + } + + std::string d_fname = std::string(argv[1]); + size_t reccnt = atol(argv[2]); + std::string q_fname = std::string(argv[3]); + + init_bench_env(reccnt, true, false); + auto queries = read_range_queries(q_fname, .001); + + auto buff = file_to_mbuffer(d_fname, reccnt); + + TS *ts = new TS(buff->get_buffer_view()); + benchmark_shard(ts, queries); + delete ts; + + ISAM *isam = new ISAM(buff->get_buffer_view()); + benchmark_shard(isam, queries); + delete isam; + + auto btree = file_to_btree(d_fname, reccnt); + +} + diff --git a/benchmarks/watermark_testing.cpp b/benchmarks/watermark_testing.cpp index 1abe7f5..e016aa4 100644 --- a/benchmarks/watermark_testing.cpp +++ b/benchmarks/watermark_testing.cpp @@ -21,7 +21,7 @@ typedef de::DynamicExtension Ext; int main(int argc, char **argv) { std::vector hwms = {5000l, 10000l, 20000l, 50000l}; - std::vector lwms = {.1, .2, .3, .4, .5}; + std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; size_t n = 1000000; @@ -46,6 +46,8 @@ int main(int argc, char **argv) { fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + extension->print_scheduler_statistics(); + delete extension; } } diff --git a/include/framework/scheduling/statistics.h b/include/framework/scheduling/statistics.h index 8466ffc..50ba196 100644 --- a/include/framework/scheduling/statistics.h +++ b/include/framework/scheduling/statistics.h @@ -67,19 +67,33 @@ public: if (type == 1) { m_type_1_cnt.fetch_add(1); m_type_1_total_time.fetch_add(length); + + if (length > m_type_1_largest_time) { + m_type_1_largest_time.store(length); + } } else { m_type_2_cnt.fetch_add(1); m_type_2_total_time.fetch_add(length); + + if (length > m_type_2_largest_time) { + m_type_2_largest_time.store(length); + } } } void print_statistics() { - fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\n", - m_type_1_cnt.load(), - m_type_1_total_time.load() / m_type_1_cnt.load()); - fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\n", - m_type_2_cnt.load(), - m_type_2_total_time.load() / m_type_2_cnt.load()); + if (m_type_1_cnt > 0) { + fprintf(stdout, "Query Count: %ld\tQuery Avg. Latency: %ld\tMax Query Latency: %ld\n", + m_type_1_cnt.load(), + m_type_1_total_time.load() / m_type_1_cnt.load(), + m_type_1_largest_time.load()); + } + if (m_type_2_cnt > 0) { + fprintf(stdout, "Reconstruction Count: %ld\tReconstruction Avg. Latency: %ld\tMax Recon. Latency:%ld\n", + m_type_2_cnt.load(), + m_type_2_total_time.load() / m_type_2_cnt.load(), + m_type_2_largest_time.load()); + } } private: @@ -92,5 +106,8 @@ private: std::atomic m_type_2_cnt; std::atomic m_type_2_total_time; + + std::atomic m_type_1_largest_time; + std::atomic m_type_2_largest_time; }; } -- cgit v1.2.3