From 69f36c9b4f5df19f09156689b333afe29a017eed Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Fri, 23 Feb 2024 15:40:49 -0500 Subject: Benchmark updates --- benchmarks/CMak | 0 benchmarks/include/benchmark_types.h | 50 ++++++ benchmarks/include/btree-util.h | 27 --- benchmarks/include/data-proc.h | 258 --------------------------- benchmarks/include/file_util.h | 130 ++++++++++++++ benchmarks/include/standard_benchmarks.h | 212 ++++++++++++++++++++++ benchmarks/irs_bench.cpp | 73 ++------ benchmarks/old-bench/include/bench_utility.h | 2 - benchmarks/pgm_bench.cpp | 64 ++----- benchmarks/ts_bench.cpp | 64 ++----- benchmarks/vptree_bench.cpp | 89 +++++++++ benchmarks/watermark_testing_knn.cpp | 61 +++++++ 12 files changed, 583 insertions(+), 447 deletions(-) create mode 100644 benchmarks/CMak create mode 100644 benchmarks/include/benchmark_types.h delete mode 100644 benchmarks/include/btree-util.h delete mode 100644 benchmarks/include/data-proc.h create mode 100644 benchmarks/include/file_util.h create mode 100644 benchmarks/include/standard_benchmarks.h create mode 100644 benchmarks/vptree_bench.cpp create mode 100644 benchmarks/watermark_testing_knn.cpp (limited to 'benchmarks') diff --git a/benchmarks/CMak b/benchmarks/CMak new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h new file mode 100644 index 0000000..85e9565 --- /dev/null +++ b/benchmarks/include/benchmark_types.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include "psu-ds/BTree.h" +#include "mtree.h" +#include "framework/interface/Record.h" + +/* TLX BTree definitions*/ +template +struct btree_record { + K key; + V value; + + inline bool operator<(const btree_record& other) const { + return key < other.key || (key == other.key && value < other.value); + } + + inline bool operator==(const btree_record& other) const { + return key == other.key && value == other.value; + } +}; + +template +struct btree_key_extract { + static const K &get(const btree_record &v) { + return v.key; + } +}; + +typedef psudb::BTree, btree_key_extract> BenchBTree; + + +/*MTree Definitions*/ + +const size_t W2V_SIZE = 300; +typedef de::EuclidPoint Word2VecRec; + +struct euclidean_distance { + double operator()(const Word2VecRec &first, const Word2VecRec &second) const { + double dist = 0; + for (size_t i=0; i MTree; + diff --git a/benchmarks/include/btree-util.h b/benchmarks/include/btree-util.h deleted file mode 100644 index 571c073..0000000 --- a/benchmarks/include/btree-util.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include "psu-ds/BTree.h" - -struct btree_record { - int64_t key; - int64_t value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const int64_t &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - - diff --git a/benchmarks/include/data-proc.h b/benchmarks/include/data-proc.h deleted file mode 100644 index 444cb94..0000000 --- a/benchmarks/include/data-proc.h +++ /dev/null @@ -1,258 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "psu-ds/BTree.h" - -#pragma once - -typedef int64_t key_type; -typedef int64_t value_type; -typedef uint64_t weight_type; - -static gsl_rng *g_rng; -static bool g_osm_data; - -struct btree_record { - key_type key; - value_type value; - - inline bool operator<(const btree_record& other) const { - return key < other.key || (key == other.key && value < other.value); - } - - inline bool operator==(const btree_record& other) const { - return key == other.key && value == other.value; - } -}; - -struct btree_key_extract { - static const key_type &get(const btree_record &v) { - return v.key; - } -}; - -typedef psudb::BTree BenchBTree; - -static key_type g_min_key = UINT64_MAX; -static key_type g_max_key = 0; - -static size_t g_max_record_cnt = 0; -static size_t g_reccnt = 0; - -static constexpr unsigned int DEFAULT_SEED = 0; - -static unsigned int get_random_seed() -{ - unsigned int seed = 0; - std::fstream urandom; - urandom.open("/dev/urandom", std::ios::in|std::ios::binary); - urandom.read((char *) &seed, sizeof(seed)); - urandom.close(); - - return seed; -} - -static key_type osm_to_key(const char *key_field) { - double tmp_key = (atof(key_field) + 180) * 10e6; - return (key_type) tmp_key; -} - -static void init_bench_rng(unsigned int seed, const gsl_rng_type *type) -{ - g_rng = gsl_rng_alloc(type); - gsl_rng_set(g_rng, seed); -} - -static void init_bench_env(size_t max_reccnt, bool random_seed, bool osm_correction=true) -{ - unsigned int seed = (random_seed) ? get_random_seed() : DEFAULT_SEED; - init_bench_rng(seed, gsl_rng_mt19937); - g_osm_data = osm_correction; - g_max_record_cnt = max_reccnt; - g_reccnt = 0; -} - -static void delete_bench_env() -{ - gsl_rng_free(g_rng); -} - - -template -static std::vector read_lookup_queries(std::string fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.target_key = start; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_range_queries(std::string &fname, double selectivity) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - size_t start, stop; - double sel; - while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { - if (start < stop && std::abs(sel - selectivity) < 0.1) { - QP q; - q.lower_bound = start; - q.upper_bound = stop; - queries.push_back(q); - } - } - fclose(qf); - - return queries; -} - -template -static std::vector read_knn_queries(std::string fname, size_t k) { - std::vector queries; - - FILE *qf = fopen(fname.c_str(), "r"); - char *line = NULL; - size_t len = 0; - - while (getline(&line, &len, qf) > 0) { - char *token; - QP query; - size_t idx = 0; - - token = strtok(line, " "); - do { - query.point.data[idx++] = atof(token); - } while ((token = strtok(NULL, " "))); - - query.k = k; - queries.emplace_back(query); - } - - free(line); - fclose(qf); - - return queries; -} - -/* - * NOTE: The QP type must have lower_bound and upper_bound attributes, which - * this function will initialize. Any other query parameter attributes must - * be manually initialized after the call. - */ -template -static bool next_vector_record(std::fstream &file, R &record, bool binary=false) { - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - for (size_t i=0; i<300; i++) { - std::string dimension; - - std::getline(line_stream, dimension, ' '); - record.data[i] = atof(dimension.c_str()); - } - - g_reccnt++; - - return true; - } - - return false; - -} - -template -static bool next_record(std::fstream &file, R &record, bool binary=false) -{ - static value_type value = 1; - if (g_reccnt >= g_max_record_cnt) return false; - - if (binary) { - if (file.good()) { - decltype(R::key) key; - - file.read((char*) &key, sizeof(key)); - record.key = key; - record.value = value; - value++; - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - return true; - } - - return false; - } - - std::string line; - if (std::getline(file, line, '\n')) { - std::stringstream line_stream(line); - std::string key_field; - std::string value_field; - std::string weight_field; - - std::getline(line_stream, value_field, '\t'); - std::getline(line_stream, key_field, '\t'); - std::getline(line_stream, weight_field, '\t'); - - record.key = (g_osm_data) ? osm_to_key(key_field.c_str()) : atol(key_field.c_str()); - record.value = atol(value_field.c_str()); - - if (record.key < g_min_key) g_min_key = record.key; - if (record.key > g_max_key) g_max_key = record.key; - - g_reccnt++; - - return true; - } - - return false; -} - -template -static bool build_delete_vec(std::vector &to_delete, std::vector &vec, size_t n) { - vec.clear(); - - size_t cnt = 0; - while (cnt < n) { - if (to_delete.size() == 0) { - return false; - } - - auto i = gsl_rng_uniform_int(g_rng, to_delete.size()); - vec.emplace_back(to_delete[i]); - to_delete.erase(to_delete.begin() + i); - } -td: - return true; -} - -static std::vector read_sosd_file(std::string &fname, size_t n) { - std::fstream file; - file.open(fname, std::ios::in | std::ios::binary); - - std::vector records(n); - for (size_t i=0; i +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/interface/Record.h" +#include "query/irs.h" + +#pragma once + +template +static std::vector read_lookup_queries(std::string fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.target_key = start; + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_range_queries(std::string &fname, double selectivity) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + size_t start, stop; + double sel; + while (fscanf(qf, "%zu%zu%lf\n", &start, &stop, &sel) != EOF) { + if (start < stop && std::abs(sel - selectivity) < 0.1) { + QP q; + q.lower_bound = start; + q.upper_bound = stop; + + queries.push_back(q); + } + } + fclose(qf); + + return queries; +} + +template +static std::vector read_knn_queries(std::string fname, size_t k) { + std::vector queries; + + FILE *qf = fopen(fname.c_str(), "r"); + char *line = NULL; + size_t len = 0; + + while (getline(&line, &len, qf) > 0) { + char *token; + QP query; + size_t idx = 0; + + token = strtok(line, " "); + do { + query.point.data[idx++] = atof(token); + } while ((token = strtok(NULL, " "))); + + query.k = k; + queries.emplace_back(query); + } + + free(line); + fclose(qf); + + return queries; +} + +template +static std::vector read_sosd_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in | std::ios::binary); + + std::vector records(n); + for (size_t i=0; i +static std::vector read_vector_file(std::string &fname, size_t n) { + std::fstream file; + file.open(fname, std::ios::in); + + std::vector records; + records.reserve(n); + + for (size_t i=0; i + * + * All rights reserved. Published under the Modified BSD License. + * + */ +#pragma once + +#include +#include +#include + +#include "framework/DynamicExtension.h" +#include "framework/interface/Query.h" +#include "psu-util/progress.h" +#include "benchmark_types.h" + +static size_t g_deleted_records = 0; +static double delete_proportion = 0.05; + +template +static void run_queries(DE *extension, std::vector &queries, gsl_rng *rng) { + size_t total; + for (size_t i=0; iquery(q); + auto r = res.get(); + total += r.size(); + } +} + + +template +static void insert_records(DE *extension, size_t start, size_t stop, + std::vector &records, std::vector &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; iinsert(records[i])) { + psudb::progress_update((double) i / (double)(stop - start), "Insert Progress"); + usleep(1); + } + + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { + + while (!extension->erase(records[to_delete[delete_idx]])) { + usleep(1); + } + + delete_idx++; + g_deleted_records++; + } + } + + psudb::progress_update(1, "Insert Progress"); +} + +template +static bool insert_tput_bench(DE &de_index, std::fstream &file, size_t insert_cnt, + double delete_prop, gsl_rng *rng, std::vector &to_delete, bool binary=false) { + + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + std::vector insert_vec; + std::vector delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + if constexpr (PROGRESS) { + psudb::progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i) { + de_index.erase_one(delete_vec[delete_idx++].key); + } else if constexpr (std::is_same_v) { + de_index.remove(delete_vec[delete_idx++]); + } else { + de_index.erase(delete_vec[delete_idx++]); + } + applied_deletes++; + } + + // insert the record; + if constexpr (std::is_same_v) { + de_index.add(insert_vec[i]); + } else { + de_index.insert(insert_vec[i]); + } + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast(insert_stop - insert_start).count(); + } + + if constexpr (PROGRESS) { + psudb::progress_update(1.0, "inserting:"); + } + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); + + return continue_benchmark; +} + +template +static bool query_latency_bench(DE &de_index, std::vector queries, size_t trial_cnt=1) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} + + +template Q, bool PROGRESS=true> +static bool static_latency_bench(Shard *shard, std::vector queries, size_t trial_cnt=100) { + char progbuf[25]; + if constexpr (PROGRESS) { + sprintf(progbuf, "querying:"); + } + + size_t total_time = 0; + size_t total_results = 0; + + for (size_t i=0; i states(1); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j(stop - start).count(); + } + + psudb::progress_update(1.0, progbuf); + + size_t query_latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", query_latency); + fflush(stdout); + + return true; +} diff --git a/benchmarks/irs_bench.cpp b/benchmarks/irs_bench.cpp index 49b1630..9895295 100644 --- a/benchmarks/irs_bench.cpp +++ b/benchmarks/irs_bench.cpp @@ -4,76 +4,32 @@ #define ENABLE_TIMER -#include - #include "framework/DynamicExtension.h" #include "shard/ISAMTree.h" #include "query/irs.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" #include #include "psu-util/timer.h" +#include "include/standard_benchmarks.h" typedef de::Record Rec; -typedef de::ISAMTree ISAM; -typedef de::irs::Query Q; -typedef de::DynamicExtension Ext; +typedef de::ISAMTree Shard; +typedef de::irs::Query Q; +typedef de::DynamicExtension Ext; typedef de::irs::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; irng = rng; - q->sample_size = 1000; - - auto res = extension->query(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "irs_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -84,7 +40,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(q_fname, .001); + for (auto q : queries) { + q.sample_size = 1000; + q.rng = rng; + } /* warmup structure w/ 10% of records */ size_t warmup = .3 * n; size_t delete_idx = 0; - insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/old-bench/include/bench_utility.h b/benchmarks/old-bench/include/bench_utility.h index e33b93d..f495f18 100644 --- a/benchmarks/old-bench/include/bench_utility.h +++ b/benchmarks/old-bench/include/bench_utility.h @@ -79,8 +79,6 @@ struct cosine_similarity { } }; -typedef tlx::BTree TreeMap; -typedef mt::mtree MTree; template static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/pgm_bench.cpp index 72d3b52..3643abb 100644 --- a/benchmarks/pgm_bench.cpp +++ b/benchmarks/pgm_bench.cpp @@ -10,7 +10,8 @@ #include "shard/PGM.h" #include "query/rangecount.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" #include @@ -18,60 +19,19 @@ typedef de::Record Rec; -typedef de::PGM S; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::PGM Shard; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; iquery(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "pgm_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -82,7 +42,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/ts_bench.cpp b/benchmarks/ts_bench.cpp index 3df3371..3dc619e 100644 --- a/benchmarks/ts_bench.cpp +++ b/benchmarks/ts_bench.cpp @@ -10,7 +10,8 @@ #include "shard/TrieSpline.h" #include "query/rangecount.h" #include "framework/interface/Record.h" -#include "include/data-proc.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" #include @@ -18,60 +19,19 @@ typedef de::Record Rec; -typedef de::TrieSpline TS; -typedef de::rc::Query Q; -typedef de::DynamicExtension Ext; +typedef de::TrieSpline PGM; +typedef de::rc::Query Q; +typedef de::DynamicExtension Ext; typedef de::rc::Parms QP; -void run_queries(Ext *extension, std::vector &queries, gsl_rng *rng) { - size_t total; - for (size_t i=0; iquery(q); - auto r = res.get(); - total += r.size(); - } - - fprintf(stderr, "%ld\n", total); -} - -size_t g_deleted_records = 0; -double delete_proportion = 0.05; - -void insert_records(Ext *extension, size_t start, - size_t stop, - std::vector &records, - std::vector &to_delete, - size_t &delete_idx, - bool delete_records, - gsl_rng *rng) { - size_t reccnt = 0; - Rec r; - for (size_t i=start; iinsert(r)) { - usleep(1); - } - - if (delete_records && gsl_rng_uniform(rng) <= delete_proportion && to_delete[delete_idx] <= i) { - r.key = records[to_delete[delete_idx]]; - r.value = (int64_t) (to_delete[delete_idx]); - while (!extension->erase(r)) { - usleep(1); - } - delete_idx++; - g_deleted_records++; - } - } +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); } int main(int argc, char **argv) { if (argc < 4) { - fprintf(stderr, "ts_bench reccnt datafile queryfile\n"); + usage(argv[0]); exit(EXIT_FAILURE); } @@ -82,7 +42,7 @@ int main(int argc, char **argv) { auto extension = new Ext(12000, 12001, 8, 0, 64); gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); - auto data = read_sosd_file(d_fname, n); + auto data = read_sosd_file(d_fname, n); std::vector to_delete(n * delete_proportion); size_t j=0; for (size_t i=0; i(extension, 0, warmup, data, to_delete, delete_idx, false, rng); extension->await_next_epoch(); TIMER_INIT(); TIMER_START(); - insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); TIMER_STOP(); auto insert_latency = TIMER_RESULT(); size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); TIMER_START(); - run_queries(extension, queries, rng); + run_queries(extension, queries, rng); TIMER_STOP(); auto query_latency = TIMER_RESULT() / queries.size(); diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vptree_bench.cpp new file mode 100644 index 0000000..f4c7d0e --- /dev/null +++ b/benchmarks/vptree_bench.cpp @@ -0,0 +1,89 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "include/file_util.h" +#include "include/standard_benchmarks.h" + +#include + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; +typedef de::knn::Parms QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(100, 1000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries(extension, queries, rng); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "T\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, g_deleted_records); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/watermark_testing_knn.cpp b/benchmarks/watermark_testing_knn.cpp new file mode 100644 index 0000000..7cea594 --- /dev/null +++ b/benchmarks/watermark_testing_knn.cpp @@ -0,0 +1,61 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" + +#include "psu-util/timer.h" + +constexpr size_t D = 100; + +typedef de::EuclidPoint Rec; +typedef de::VPTree Shard; +typedef de::knn::Query Q; +typedef de::DynamicExtension Ext; + +int main(int argc, char **argv) { + std::vector hwms = {1000l, 2000l, 4000l, 10000l}; + std::vector lwms = {.1, .2, .3, .4, .5, .6, .7, .8, .9}; + + size_t n = 1000000; + + std::vector records(n); + for (size_t i=0; iinsert(records[i])) { + _mm_pause(); + } + } + TIMER_STOP(); + + auto insert_time = TIMER_RESULT(); + double insert_throughput = (double) n / (double) insert_time * 1e9; + + fprintf(stdout, "%ld\t%ld\t%lf\n", lwm, hwm, insert_throughput); + extension->print_scheduler_statistics(); + + fflush(stdout); + delete extension; + } + } +} + -- cgit v1.2.3