diff options
| author | Douglas B. Rumbaugh <dbr4@psu.edu> | 2024-05-14 16:31:05 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-14 16:31:05 -0400 |
| commit | 47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc (patch) | |
| tree | ee5613ce182b2c9caa228d3abeb65dc27fef2db3 /benchmarks/vldb | |
| parent | 4a834497d5f82c817d634925250158d85ca825c2 (diff) | |
| parent | 8643fe194dec05b4e3f3ea31e162ac0b2b00e162 (diff) | |
| download | dynamic-extension-47916da2ba5ed5bee2dda3cbcc58d39e1e931bfc.tar.gz | |
Merge pull request #4 from dbrumbaugh/master
Updates for VLDB revision
Diffstat (limited to 'benchmarks/vldb')
| -rw-r--r-- | benchmarks/vldb/alex_bench.cpp | 144 | ||||
| -rw-r--r-- | benchmarks/vldb/btree_bench.cpp | 90 | ||||
| -rw-r--r-- | benchmarks/vldb/btree_thread_scaling_bench.cpp | 120 | ||||
| -rw-r--r-- | benchmarks/vldb/dynamic_pgm_bench.cpp | 77 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bench.cpp | 100 | ||||
| -rw-r--r-- | benchmarks/vldb/fst_bsm_bench.cpp | 100 | ||||
| -rw-r--r-- | benchmarks/vldb/irs_bench.cpp | 97 | ||||
| -rw-r--r-- | benchmarks/vldb/mtree_bench.cpp | 82 | ||||
| -rw-r--r-- | benchmarks/vldb/mtree_bench_alt.cpp | 82 | ||||
| -rw-r--r-- | benchmarks/vldb/pgm_bench.cpp | 94 | ||||
| -rw-r--r-- | benchmarks/vldb/thread_scaling_bench.cpp | 128 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_bench.cpp | 95 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_bsm_bench.cpp | 95 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_mdsp_bench.cpp | 70 | ||||
| -rw-r--r-- | benchmarks/vldb/ts_parmsweep.cpp | 124 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bench.cpp | 102 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bench_alt.cpp | 102 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bsm_bench.cpp | 102 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_bsm_bench_alt.cpp | 92 | ||||
| -rw-r--r-- | benchmarks/vldb/vptree_parmsweep.cpp | 129 |
20 files changed, 2025 insertions, 0 deletions
diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp new file mode 100644 index 0000000..ba687f3 --- /dev/null +++ b/benchmarks/vldb/alex_bench.cpp @@ -0,0 +1,144 @@ +#define ENABLE_TIMER + +#include "alex.h" + +#include "file_util.h" +#include "psu-util/progress.h" +#include "psu-util/timer.h" + +typedef uint64_t key_type; +typedef uint64_t value_type; + +typedef alex::Alex<key_type, value_type> Alex; + +struct record { + key_type key; + value_type value; +}; + +struct query { + key_type lower_bound; + key_type upper_bound; +}; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +static size_t g_deleted_records = 0; +static double delete_proportion = 0.05; + +static void insert_records(Alex *structure, size_t start, size_t stop, + std::vector<record> &records, std::vector<size_t> &to_delete, + size_t &delete_idx, bool delete_records, gsl_rng *rng) { + + psudb::progress_update(0, "Insert Progress"); + size_t reccnt = 0; + for (size_t i=start; i<stop; i++) { + structure->insert(records[i].key, records[i].value); + + if (delete_records && gsl_rng_uniform(rng) <= + delete_proportion && to_delete[delete_idx] <= i) { + + structure->erase_one(records[i].key); + delete_idx++; + g_deleted_records++; + } + } + + psudb::progress_update(1, "Insert Progress"); +} + +size_t g_global_cnt = 0; + +static void run_queries(Alex *alex, std::vector<query> &queries) { + for (size_t i=0; i<queries.size(); i++) { + size_t cnt=0; + auto ptr = alex->find(queries[i].lower_bound); + while (ptr != alex->end() && ptr.key() <= queries[i].upper_bound) { + cnt++; + ptr++; + } + + g_global_cnt += cnt; + } +} + +Alex *warmup_alex(std::vector<record> records, size_t cnt) { + if (cnt >= records.size()) { + fprintf(stderr, "[E] Requesting warmup with more records than are available.\n"); + exit(EXIT_FAILURE); + } + + auto alex = new Alex(); + std::pair<key_type, value_type> *insert_vec = new std::pair<key_type, value_type>[cnt]; + + for (size_t i=0; i<cnt; i++) { + insert_vec[i] = {records[i].key, records[i].value}; + } + + std::sort(insert_vec, insert_vec + cnt); + alex->bulk_load(insert_vec, cnt); + delete[] insert_vec; + + return alex; +} + +int main(int argc, char **argv) +{ + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + + auto data = read_sosd_file<record>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + auto queries = read_range_queries<query>(q_fname, .0001); + + + size_t warmup = .1 * n; + size_t delete_idx = 0; + + auto alex = warmup_alex(data, warmup); + + TIMER_INIT(); + + TIMER_START(); + insert_records(alex, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries(alex, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = alex->model_size() + alex->data_size() - (alex->size() * sizeof(record)); + + fprintf(stdout, "%ld\t%ld\t%lld\t%ld\n", insert_throughput, query_latency, ext_size, g_global_cnt); + fflush(stdout); + + gsl_rng_free(rng); + fflush(stderr); + + delete alex; + + exit(EXIT_SUCCESS); +} diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp new file mode 100644 index 0000000..fa72831 --- /dev/null +++ b/benchmarks/vldb/btree_bench.cpp @@ -0,0 +1,90 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "benchmark_types.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" +#include "psu-ds/BTree.h" + +typedef btree_record<int64_t, int64_t> Rec; + +typedef de::ISAMTree<Rec> Shard; +typedef de::irs::Query<Rec, Shard> Q; +typedef de::irs::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto btree = BenchBTree(); + + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + /* read in the range queries and add sample size and rng for sampling */ + auto queries = read_range_queries<QP>(q_fname, .0001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<BenchBTree, Rec>(&btree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records<BenchBTree, Rec>(&btree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_btree_queries<Rec>(&btree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto btree_size = btree.get_stats().inner_nodes * psudb::btree_default_traits<int64_t, Rec>::inner_slots * (sizeof(int64_t) + sizeof(void*)); + + /* account for memory wasted on gaps in the structure */ + btree_size += btree.get_stats().leaves * psudb::btree_default_traits<int64_t, Rec>::leaf_slots * sizeof(Rec); + btree_size -= btree.size() * sizeof(Rec); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, btree_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/btree_thread_scaling_bench.cpp b/benchmarks/vldb/btree_thread_scaling_bench.cpp new file mode 100644 index 0000000..557e966 --- /dev/null +++ b/benchmarks/vldb/btree_thread_scaling_bench.cpp @@ -0,0 +1,120 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "query/irs.h" +#include "benchmark_types.h" +#include "file_util.h" +#include <mutex> + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef btree_record<int64_t, int64_t> Rec; +typedef de::irs::Parms<Rec> QP; + +std::atomic<bool> inserts_done = false; + +std::mutex g_btree_lock; + +void query_thread(BenchBTree *tree, std::vector<QP> *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + + std::vector<int64_t> result; + g_btree_lock.lock(); + tree->range_sample(q.lower_bound, q.upper_bound, 1000, result, rng); + g_btree_lock.unlock(); + + total += result.size(); + usleep(1); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(BenchBTree *tree, size_t start, std::vector<Rec> *records) { + size_t reccnt = 0; + for (size_t i=start; i<records->size(); i++) { + btree_record<int64_t, int64_t> r; + r.key = (*records)[i].key; + r.value = i; + + g_btree_lock.lock(); + tree->insert(r); + g_btree_lock.unlock(); + + if (i % 100000 == 0) { + fprintf(stderr, "Inserted %ld records\n", i); + } + } + + inserts_done.store(true); +} + +int main(int argc, char **argv) { + + if (argc < 5) { + fprintf(stderr, "btree_insert_query_tput reccnt query_threads datafile queryfile\n"); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t qthread_cnt = atol(argv[2]); + std::string d_fname = std::string(argv[3]); + std::string q_fname = std::string(argv[4]); + + auto tree = new BenchBTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; i<warmup; i++) { + btree_record<int64_t, int64_t> r; + r.key = data[i].key; + r.value = data[i].value; + + tree->insert(r); + } + + TIMER_INIT(); + + std::vector<std::thread> qthreads(qthread_cnt); + + TIMER_START(); + std::thread i_thrd(insert_thread, tree, warmup, &data); + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i] = std::thread(query_thread, tree, &queries); + } + i_thrd.join(); + TIMER_STOP(); + + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i].join(); + } + + auto total_latency = TIMER_RESULT(); + size_t throughput = (size_t) ((double) (n - warmup) / (double) total_latency * 1e9); + fprintf(stdout, "T\t%ld\t%ld\n", total_latency, throughput); + + gsl_rng_free(rng); + delete tree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp new file mode 100644 index 0000000..15b130f --- /dev/null +++ b/benchmarks/vldb/dynamic_pgm_bench.cpp @@ -0,0 +1,77 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + std::vector<std::pair<uint64_t, uint64_t>> tmp_data; + PGM pgm(tmp_data.begin(), tmp_data.end()); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .0001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<PGM, Rec>(&pgm, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + TIMER_START(); + insert_records<PGM, Rec>(&pgm, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<PGM, QP>(&pgm, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = pgm.index_size_in_bytes(); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bench.cpp b/benchmarks/vldb/fst_bench.cpp new file mode 100644 index 0000000..276a922 --- /dev/null +++ b/benchmarks/vldb/fst_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/fst_bsm_bench.cpp b/benchmarks/vldb/fst_bsm_bench.cpp new file mode 100644 index 0000000..15a441a --- /dev/null +++ b/benchmarks/vldb/fst_bsm_bench.cpp @@ -0,0 +1,100 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/FSTrie.h" +#include "query/pointlookup.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<const char *, uint64_t> Rec; +typedef de::FSTrie<Rec> Shard; +typedef de::pl::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::pl::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 3) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto strings = read_string_file(d_fname, n); + auto queries = generate_string_lookup_queries<QP>(strings, 1000, rng); + + std::vector<Rec> data; + for (size_t i=0; i<strings.size(); i++) { + data.push_back({strings[i].get(), i, strlen(strings[i].get())}); + } + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<strings.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp new file mode 100644 index 0000000..e062e80 --- /dev/null +++ b/benchmarks/vldb/irs_bench.cpp @@ -0,0 +1,97 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::ISAMTree<Rec> Shard; +typedef de::irs::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::irs::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + /* read in the range queries and add sample size and rng for sampling */ + auto queries = read_range_queries<QP>(q_fname, .0001); + for (auto &q : queries) { + q.sample_size = 1000; + q.rng = rng; + } + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage();// + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp new file mode 100644 index 0000000..cc2f41f --- /dev/null +++ b/benchmarks/vldb/mtree_bench.cpp @@ -0,0 +1,82 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file<Rec, 300>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_knn_queries<QP>(q_fname, 1000); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<MTree, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<MTree, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<MTree, QP>(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size()); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/mtree_bench_alt.cpp b/benchmarks/vldb/mtree_bench_alt.cpp new file mode 100644 index 0000000..50c6117 --- /dev/null +++ b/benchmarks/vldb/mtree_bench_alt.cpp @@ -0,0 +1,82 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "query/knn.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto mtree = new MTree_alt(); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<MTree_alt, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<MTree_alt, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<MTree_alt, QP>(mtree, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto size = mtree->size() - sizeof(Rec)*(data.size() - to_delete.size()); + + fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, size); + + gsl_rng_free(rng); + delete mtree; + fflush(stderr); +} + diff --git a/benchmarks/vldb/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp new file mode 100644 index 0000000..cec95df --- /dev/null +++ b/benchmarks/vldb/pgm_bench.cpp @@ -0,0 +1,94 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/PGM.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::PGM<Rec> Shard; +typedef de::rc::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(12000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .0001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/thread_scaling_bench.cpp b/benchmarks/vldb/thread_scaling_bench.cpp new file mode 100644 index 0000000..b679e92 --- /dev/null +++ b/benchmarks/vldb/thread_scaling_bench.cpp @@ -0,0 +1,128 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/ISAMTree.h" +#include "query/irs.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include <ctime> + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<int64_t, int64_t> Rec; +typedef de::ISAMTree<Rec> ISAM; +typedef de::irs::Query<Rec, ISAM> Q; +typedef de::DynamicExtension<Rec, ISAM, Q> Ext; +typedef de::irs::Parms<Rec> QP; + +std::atomic<bool> inserts_done = false; + +struct timespec delay = {0, 500}; + +void query_thread(Ext *extension, std::vector<QP> *queries) { + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + size_t total = 0; + + while (!inserts_done.load()) { + auto q_idx = gsl_rng_uniform_int(rng, queries->size()); + + auto q = (*queries)[q_idx]; + q.rng = rng; + q.sample_size = 1000; + + auto res = extension->query(&q); + auto r = res.get(); + total += r.size(); + nanosleep(&delay, nullptr); + } + + fprintf(stderr, "%ld\n", total); + + gsl_rng_free(rng); +} + +void insert_thread(Ext *extension, size_t start, size_t stop, std::vector<Rec> *records) { + fprintf(stderr, "%ld\t%ld\n", start, stop); + for (size_t i=start; i<stop; i++) { + while (!extension->insert((*records)[i])) { + nanosleep(&delay, nullptr); + } + } +} + +int main(int argc, char **argv) { + + if (argc < 6) { + fprintf(stderr, "Usage:\n"); + fprintf(stderr, "%s reccnt insert_threads query_threads datafile queryfile\n", argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + size_t ithread_cnt = atol(argv[2]); + size_t qthread_cnt = atol(argv[3]); + std::string d_fname = std::string(argv[4]); + std::string q_fname = std::string(argv[5]); + + auto extension = new Ext(1000, 12000, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + for (size_t i=0; i<warmup; i++) { + while (!extension->insert(data[i])) { + usleep(1); + } + } + + extension->await_next_epoch(); + + TIMER_INIT(); + + std::vector<std::thread> ithreads(ithread_cnt); + std::vector<std::thread> qthreads(qthread_cnt); + + TIMER_START(); + size_t start = warmup; + size_t per_thread = (n - warmup) / ithread_cnt; + for (size_t i=0; i<ithread_cnt; i++) { + ithreads[i] = std::thread(insert_thread, extension, start, start + per_thread, &data); + start += per_thread; + } + + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i] = std::thread(query_thread, extension, &queries); + } + + for (size_t i=0; i<ithread_cnt; i++) { + ithreads[i].join(); + } + + inserts_done.store(true); + TIMER_STOP(); + + for (size_t i=0; i<qthread_cnt; i++) { + qthreads[i].join(); + } + + auto total_latency = TIMER_RESULT(); + size_t throughput = (size_t) ((double) (n - warmup) / (double) total_latency * 1e9); + fprintf(stdout, "T\t%ld\t%ld\n", total_latency, throughput); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp new file mode 100644 index 0000000..81a430a --- /dev/null +++ b/benchmarks/vldb/ts_bench.cpp @@ -0,0 +1,95 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::TrieSpline<Rec> Shard; +typedef de::rc::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(8000, 12001, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .0001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp new file mode 100644 index 0000000..4511350 --- /dev/null +++ b/benchmarks/vldb/ts_bsm_bench.cpp @@ -0,0 +1,95 @@ +/* + * + */ + +#define ENABLE_TIMER +#define TS_TEST + +#include <thread> + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::TrieSpline<Rec> Shard; +typedef de::rc::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1, 12001, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .0001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); //+ shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp new file mode 100644 index 0000000..cc0cd99 --- /dev/null +++ b/benchmarks/vldb/ts_mdsp_bench.cpp @@ -0,0 +1,70 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include <thread> + +#include "triespline_bsm.h" +#include "psu-util/bentley-saxe.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "query/rangecount.h" +#include "psu-util/timer.h" +#include "standard_benchmarks.h" + +typedef std::pair<uint64_t, uint64_t> Rec; +typedef de::Record<uint64_t, uint64_t> FRec; + +typedef BSMTrieSpline<uint64_t, uint64_t> Shard; +typedef de::rc::Parms<FRec> QP; +typedef psudb::bsm::BentleySaxe<Rec, Shard, true> Ext; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new psudb::bsm::BentleySaxe<Rec, Shard, true>(); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file_pair<uint64_t, uint64_t>(d_fname, n); + auto queries = read_range_queries<QP>(q_fname, .0001); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + insert_records<Shard, Rec, true>(extension, 0, warmup, data); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Shard, Rec, true>(extension, warmup, data.size(), data); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP, true>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); +} + diff --git a/benchmarks/vldb/ts_parmsweep.cpp b/benchmarks/vldb/ts_parmsweep.cpp new file mode 100644 index 0000000..2c9412a --- /dev/null +++ b/benchmarks/vldb/ts_parmsweep.cpp @@ -0,0 +1,124 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/TrieSpline.h" +#include "query/rangecount.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef de::Record<uint64_t, uint64_t> Rec; +typedef de::TrieSpline<Rec> Shard; +typedef de::rc::Query<Rec, Shard, true> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TOMBSTONE, de::SerialScheduler> Ext2; +typedef de::rc::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_sosd_file<Rec>(d_fname, n); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_range_queries<QP>(q_fname, .001); + + const std::vector<de::LayoutPolicy> policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING}; + const std::vector<size_t> buffer_sizes = {1000, 4000, 8000, 12000, 15000, 20000}; + const std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12}; + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext(bs, bs, sf, 0, 64); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext2(bs, bs, sf, 0, 64); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext2, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext2, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext2, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + gsl_rng_free(rng); + fflush(stderr); +} + diff --git a/benchmarks/vldb/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp new file mode 100644 index 0000000..0b98a52 --- /dev/null +++ b/benchmarks/vldb/vptree_bench.cpp @@ -0,0 +1,102 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1400, 1400, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file<Rec, 300>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_knn_queries<QP>(q_fname, 1000); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_bench_alt.cpp b/benchmarks/vldb/vptree_bench_alt.cpp new file mode 100644 index 0000000..b09ee7d --- /dev/null +++ b/benchmarks/vldb/vptree_bench_alt.cpp @@ -0,0 +1,102 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1400, 1400, 8, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_bsm_bench.cpp b/benchmarks/vldb/vptree_bsm_bench.cpp new file mode 100644 index 0000000..4a7fcb6 --- /dev/null +++ b/benchmarks/vldb/vptree_bsm_bench.cpp @@ -0,0 +1,102 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1, 1400, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_vector_file<Rec, 300>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_knn_queries<QP>(q_fname, 1000); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto shard = extension->create_static_structure(); + + fprintf(stderr, "Running Static query tests\n\n"); + TIMER_START(); + run_static_queries<Shard, QP, Q>(shard, queries); + TIMER_STOP(); + + auto static_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + auto static_size = shard->get_memory_usage(); // + shard->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t\t%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size, static_latency, static_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_bsm_bench_alt.cpp b/benchmarks/vldb/vptree_bsm_bench_alt.cpp new file mode 100644 index 0000000..63baf8b --- /dev/null +++ b/benchmarks/vldb/vptree_bsm_bench_alt.cpp @@ -0,0 +1,92 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef ANNRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::BSM, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + auto extension = new Ext(1, 1400, 2, 0, 64); + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + fprintf(stderr, "[I] Reading data file...\n"); + auto data = read_binary_vector_file<Rec>(d_fname, n); + + fprintf(stderr, "[I] Generating delete vector\n"); + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + fprintf(stderr, "[I] Reading Queries\n"); + auto queries = read_binary_knn_queries<QP>(q_fname, 1000, 100); + + fprintf(stderr, "[I] Warming up structure...\n"); + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + fprintf(stderr, "[I] Running Insertion Benchmark\n"); + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + fprintf(stderr, "[I] Running Query Benchmark\n"); + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "%ld\t%ld\t\t%ld\n", insert_throughput, query_latency, ext_size); + + gsl_rng_free(rng); + delete extension; + fflush(stderr); + fflush(stdout); +} + diff --git a/benchmarks/vldb/vptree_parmsweep.cpp b/benchmarks/vldb/vptree_parmsweep.cpp new file mode 100644 index 0000000..2cbd521 --- /dev/null +++ b/benchmarks/vldb/vptree_parmsweep.cpp @@ -0,0 +1,129 @@ +/* + * + */ + +#define ENABLE_TIMER + +#include "framework/DynamicExtension.h" +#include "shard/VPTree.h" +#include "query/knn.h" +#include "framework/interface/Record.h" +#include "file_util.h" +#include "standard_benchmarks.h" + +#include <gsl/gsl_rng.h> + +#include "psu-util/timer.h" + + +typedef Word2VecRec Rec; + +typedef de::VPTree<Rec, 100, true> Shard; +typedef de::knn::Query<Rec, Shard> Q; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::TEIRING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext; +typedef de::DynamicExtension<Rec, Shard, Q, de::LayoutPolicy::LEVELING, de::DeletePolicy::TAGGING, de::SerialScheduler> Ext2; +typedef de::knn::Parms<Rec> QP; + +void usage(char *progname) { + fprintf(stderr, "%s reccnt datafile queryfile\n", progname); +} + +int main(int argc, char **argv) { + + if (argc < 4) { + usage(argv[0]); + exit(EXIT_FAILURE); + } + + size_t n = atol(argv[1]); + std::string d_fname = std::string(argv[2]); + std::string q_fname = std::string(argv[3]); + + gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937); + + auto data = read_vector_file<Rec, 300>(d_fname, n); + + std::vector<size_t> to_delete(n * delete_proportion); + size_t j=0; + for (size_t i=0; i<data.size() && j<to_delete.size(); i++) { + if (gsl_rng_uniform(rng) <= delete_proportion) { + to_delete[j++] = i; + } + } + auto queries = read_knn_queries<QP>(q_fname, 10); + + + const std::vector<de::LayoutPolicy> policies = {de::LayoutPolicy::LEVELING, de::LayoutPolicy::TEIRING}; + const std::vector<size_t> buffer_sizes = {100, 400, 800, 1200, 1500, 2000}; + const std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12}; + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext(bs, bs, sf, 0, 64); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "TIERING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + for (const auto &bs : buffer_sizes) { + for (const auto &sf : scale_factors) { + auto extension = new Ext2(bs, bs, sf, 0, 64); + + /* warmup structure w/ 10% of records */ + size_t warmup = .1 * n; + size_t delete_idx = 0; + insert_records<Ext2, Rec>(extension, 0, warmup, data, to_delete, delete_idx, false, rng); + + extension->await_next_epoch(); + + TIMER_INIT(); + + TIMER_START(); + insert_records<Ext2, Rec>(extension, warmup, data.size(), data, to_delete, delete_idx, true, rng); + TIMER_STOP(); + + auto insert_latency = TIMER_RESULT(); + size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9); + + TIMER_START(); + run_queries<Ext2, QP>(extension, queries); + TIMER_STOP(); + + auto query_latency = TIMER_RESULT() / queries.size(); + + auto ext_size = extension->get_memory_usage() + extension->get_aux_memory_usage(); + + fprintf(stdout, "LEVELING\t%ld\t%ld\t%ld\t%ld\t%ld\n", bs, sf, insert_throughput, query_latency, ext_size); + delete extension; + } + } + + gsl_rng_free(rng); + fflush(stderr); +} + |