diff options
| author | Douglas Rumbaugh <dbr4@psu.edu> | 2024-04-22 15:09:07 -0400 |
|---|---|---|
| committer | Douglas Rumbaugh <dbr4@psu.edu> | 2024-04-22 15:09:07 -0400 |
| commit | 4a1dde3148e0e84b47c884bc0bb69c60678b4558 (patch) | |
| tree | 65ad80ce5850bc7b03227b0beb46b86595737d86 /benchmarks/vldb | |
| parent | 0bb5b46ec2b64be17f6269631915e62d02e315e4 (diff) | |
| download | dynamic-extension-4a1dde3148e0e84b47c884bc0bb69c60678b4558.tar.gz | |
Benchmark update+reorganization
The Alex benchmark isn't updated yet.
Diffstat (limited to 'benchmarks/vldb')
| -rw-r--r-- | benchmarks/vldb/alex_bench.cpp | 205 | ||||
| -rw-r--r-- | benchmarks/vldb/btree_bench.cpp | 90 | ||||
| -rw-r--r-- | benchmarks/vldb/dynamic_pgm_bench.cpp | 77 | ||||
| -rw-r--r-- | benchmarks/vldb/irs_bench.cpp | 97 | ||||
| -rw-r--r-- | benchmarks/vldb/mtree_bench.cpp | 80 | ||||
| -rw-r--r-- | benchmarks/vldb/pgm_bench.cpp | 94 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_bench.cpp | 94 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_bsm_bench.cpp | 70 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_mdsp_bench.cpp | 70 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bench.cpp | 100 |
10 files changed, 977 insertions, 0 deletions
diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp new file mode 100644 index 0000000..f75afa6 --- /dev/null +++ b/benchmarks/vldb/alex_bench.cpp @@ -0,0 +1,205 @@ +#include "alex.h" +#include "include/standalone_utility.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex<key_type, value_type> Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +template <typename R> +static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n, + double delete_prop, std::vector<R> &to_delete, bool binary=false) { + vec.clear(); + for (size_t i=0; i<n; i++) { + R rec; + if (!next_record(file, rec, binary)) { + if (i == 0) { + return false; + } + + break; + } + + vec.emplace_back(rec); + + if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { + to_delete.emplace_back(rec); + } + } + + return true; +} + + +static Alex *warmup(std::fstream &file, size_t count, + double delete_prop, std::vector<record> to_delete, bool progress=true, bool binary=false) { + size_t batch = std::min(.1 * count, 25000.0); + + std::pair<key_type, value_type> *insert_vec = new std::pair<key_type, value_type>[count]; + Alex *alex = new Alex(); + + size_t cnt = 0; + record rec; + while (cnt < count && next_record(file, rec)) { + insert_vec[cnt] = {rec.key, rec.value}; + cnt++; + } + + std::sort(insert_vec, insert_vec + count); + + alex->bulk_load(insert_vec, count); + delete[] insert_vec; + + return alex; +} + + +static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector<record> &to_delete, bool binary=false) { + size_t delete_cnt = insert_cnt * delete_prop; + + size_t applied_deletes = 0; + size_t applied_inserts = 0; + + size_t BATCH=1000; + + std::vector<record> insert_vec; + std::vector<record> delete_vec; + insert_vec.reserve(BATCH); + delete_vec.reserve(BATCH*delete_prop); + + size_t delete_idx = 0; + + bool continue_benchmark = true; + + size_t total_time = 0; + + while (applied_inserts < insert_cnt && continue_benchmark) { + continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary); + progress_update((double) applied_inserts / (double) insert_cnt, "inserting:"); + if (applied_deletes < delete_cnt) { + build_delete_vec(to_delete, delete_vec, BATCH*delete_prop); + delete_idx = 0; + } + + if (insert_vec.size() == 0) { + break; + } + + auto insert_start = std::chrono::high_resolution_clock::now(); + for (size_t i=0; i<insert_vec.size(); i++) { + // process a delete if necessary + if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) { + alex.erase_one(delete_vec[delete_idx++].key); + applied_deletes++; + } + + // insert the record; + alex.insert(insert_vec[i].key, insert_vec[i].value); + applied_inserts++; + } + auto insert_stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count(); + } + + progress_update(1.0, "inserting:"); + + size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9); + + fprintf(stdout, "%ld\t", throughput); +} + + + +static void alex_rq_bench(Alex &alex, std::vector<query> queries, size_t trial_cnt=1) +{ + char progbuf[25]; + sprintf(progbuf, "sampling:"); + + size_t batch_size = 100; + size_t batches = trial_cnt / batch_size; + size_t total_time = 0; + + std::vector<record> result_set; + + for (int i=0; i<trial_cnt; i++) { + auto start = std::chrono::high_resolution_clock::now(); + for (size_t j=0; j<queries.size(); j++) { + auto ptr = alex.find(queries[j].lower_bound); + while (ptr != alex.end() && ptr.key() <= queries[j].upper_bound) { + result_set.push_back({ptr.key(), ptr.payload()}); + ptr++; + } + result_set.clear(); + } + auto stop = std::chrono::high_resolution_clock::now(); + + total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count(); + } + + size_t latency = total_time / (trial_cnt * queries.size()); + + fprintf(stdout, "%ld\t", latency); +} + +int main(int argc, char **argv) +{ + if (argc < 5) { + fprintf(stderr, "Usage: alex_rq_bench <filename> <record_count> <delete_proportion> <query_file>\n"); + exit(EXIT_FAILURE); + } + + std::string filename = std::string(argv[1]); + size_t record_count = atol(argv[2]); + double delete_prop = atof(argv[3]); + std::string qfilename = std::string(argv[4]); + + size_t buffer_cap = 12000; + size_t scale_factor = 6; + double max_delete_prop = delete_prop; + bool use_osm = false; + + double insert_batch = 0.8; + + init_bench_env(record_count, true, use_osm); + auto queries = read_range_queries<query>(qfilename, .0001); + + std::fstream datafile; + datafile.open(filename, std::ios::in | std::ios::binary); + + std::vector<record> to_delete; + + // warm up the tree with initial_insertions number of initially inserted + // records + size_t warmup_cnt = insert_batch * record_count; + auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true); + + fprintf(stderr, "Size: %ld\n", alex->size()); + size_t insert_cnt = record_count - warmup_cnt; + + alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true); + size_t memory_usage = alex->model_size() + alex->data_size(); + + fprintf(stderr, "Size: %ld\n", alex->size()); + fprintf(stdout, "%ld\t", memory_usage); + + alex_rq_bench(*alex, queries); + fprintf(stdout, "\n"); + + delete_bench_env(); + delete alex; + fflush(stdout); + fflush(stderr); + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp new file mode 100644 index 0000000..12107c6 --- /dev/null +++ b/benchmarks/vldb/btree_bench.cpp @@ -0,0 +1,90 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "benchmark_types.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" +#include "psu-ds/BTree.h" + +typedef btree_record<int64_t, int64_t> Rec; + +typedef de::ISAMTree<Rec> Shard; +typedef de::irs::Query<Rec, Shard> Q; +typedef de::irs::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto btree = BenchBTree(); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + /* read in the range queries and add sample size and rng for sampling */ + auto queries = read_range_queries<QP>(q_fname, .001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .3 * n; + size_t delete_idx = 0; + insert_records<BenchBTree, Rec>(&btree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records<BenchBTree, Rec>(&btree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_btree_queries<Rec>(&btree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto btree_size = btree.get_stats().inner_nodes * psudb::btree_default_traits<int64_t, Rec>::inner_slots * (sizeof(int64_t) + sizeof(void*)); + + /* account for memory wasted on gaps in the structure */ + btree_size += btree.get_stats().leaves * psudb::btree_default_traits<int64_t, Rec>::leaf_slots * sizeof(Rec); + btree_size -= btree.size() * sizeof(Rec); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, btree_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp new file mode 100644 index 0000000..249bc92 --- /dev/null +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -0,0 +1,77 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + std::vector<std::pair<uint64_t, uint64_t>> tmp_data; + PGM pgm(tmp_data.begin(), tmp_data.end()); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<PGM, Rec>(&pgm, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records<PGM, Rec>(&pgm, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<PGM, QP>(&pgm, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = pgm.size_in_bytes(); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp new file mode 100644 index 0000000..ca1e555 --- /dev/null +++ b/benchmarks/vldb/irs_bench.cpp @@ -0,0 +1,97 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::ISAMTree<Rec> Shard; +typedef de::irs::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::irs::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + /* read in the range queries and add sample size and rng for sampling */ + auto queries = read_range_queries<QP>(q_fname, .001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .3 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp new file mode 100644 index 0000000..35f56be --- /dev/null +++ b/benchmarks/vldb/mtree_bench.cpp @@ -0,0 +1,80 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file<Rec, 300>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_knn_queries<QP>(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<MTree, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<MTree, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<MTree, QP>(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp new file mode 100644 index 0000000..f63ec8e --- /dev/null +++ b/benchmarks/vldb/pgm_bench.cpp @@ -0,0 +1,94 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/PGM.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::PGM<Rec> Shard; +typedef de::rc::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp new file mode 100644 index 0000000..a84635f --- /dev/null +++ b/benchmarks/vldb/ts_bench.cpp @@ -0,0 +1,94 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::TrieSpline<Rec> Shard; +typedef de::rc::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp new file mode 100644 index 0000000..706433d --- /dev/null +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + +typedef std::pair<uint64_t, uint64_t> Rec; +typedef de::Record<uint64_t, uint64_t> FRec; + +typedef BSMTrieSpline<uint64_t, uint64_t> Shard; +typedef de::rc::Parms<FRec> QP; +typedef psudb::bsm::BentleySaxe<Rec, Shard> Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe<Rec, Shard>(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records<Shard, Rec>(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Shard, Rec>(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP, true>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp new file mode 100644 index 0000000..4c5bf1e --- /dev/null +++ b/benchmarks/vldb/ts_mdsp_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + +typedef std::pair<uint64_t, uint64_t> Rec; +typedef de::Record<uint64_t, uint64_t> FRec; + +typedef BSMTrieSpline<uint64_t, uint64_t> Shard; +typedef de::rc::Parms<FRec> QP; +typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records<Shard, Rec, true>(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Shard, Rec, true>(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP, true>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp new file mode 100644 index 0000000..613c556 --- /dev/null +++ b/benchmarks/vldb/vptree_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(100, 1000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file<Rec, 300>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_knn_queries<QP>(q_fname, 10); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + |