summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt111
-rw-r--r--benchmarks/include/benchmark_types.h5
-rw-r--r--benchmarks/include/standard_benchmarks.h69
-rw-r--r--benchmarks/string_insertion_tput.cpp32
-rw-r--r--benchmarks/vldb/alex_bench.cpp205
-rw-r--r--benchmarks/vldb/btree_bench.cpp90
-rw-r--r--benchmarks/vldb/dynamic_pgm_bench.cpp77
-rw-r--r--benchmarks/vldb/irs_bench.cpp (renamed from benchmarks/irs_bench.cpp)4
-rw-r--r--benchmarks/vldb/mtree_bench.cpp80
-rw-r--r--benchmarks/vldb/pgm_bench.cpp (renamed from benchmarks/pgm_bench.cpp)4
-rw-r--r--benchmarks/vldb/ts_bench.cpp (renamed from benchmarks/ts_bench.cpp)4
-rw-r--r--benchmarks/vldb/ts_bsm_bench.cpp (renamed from benchmarks/ts_bsm_bench.cpp)6
-rw-r--r--benchmarks/vldb/ts_mdsp_bench.cpp (renamed from benchmarks/ts_mdsp_bench.cpp)6
-rw-r--r--benchmarks/vldb/vptree_bench.cpp (renamed from benchmarks/vptree_bench.cpp)4
14 files changed, 612 insertions, 85 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c62085f..8e6cdfc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,8 @@ project("Practical Dynamic Extension" VERSION 0.1.0)
set(debug false)
set(tests True)
-set(bench true)
+set(bench false)
+set(vldb_bench true)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -147,96 +148,136 @@ if (tests)
target_include_directories(louds_tests PRIVATE include external/psudb-common/cpp/include external/PLEX/include external/fast_succinct_trie/include external/louds-patricia)
endif()
+if (vldb_bench)
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/benchmarks")
+
+ add_executable(irs_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/irs_bench.cpp)
+ target_link_libraries(irs_bench PUBLIC gsl pthread atomic)
+ target_include_directories(irs_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(irs_bench PUBLIC -mcx16)
+
+ add_executable(vptree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/vptree_bench.cpp)
+ target_link_libraries(vptree_bench PUBLIC gsl pthread atomic)
+ target_include_directories(vptree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(vptree_bench PUBLIC -mcx16)
+
+ add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bench.cpp)
+ target_link_libraries(ts_bench PUBLIC gsl pthread atomic)
+ target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(ts_bench PUBLIC -mcx16)
+
+ add_executable(ts_bsm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_bsm_bench.cpp)
+ target_link_libraries(ts_bsm_bench PUBLIC gsl pthread atomic)
+ target_include_directories(ts_bsm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(ts_bsm_bench PUBLIC -mcx16)
+
+ add_executable(ts_mdsp_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/ts_mdsp_bench.cpp)
+ target_link_libraries(ts_mdsp_bench PUBLIC gsl pthread atomic)
+ target_include_directories(ts_mdsp_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(ts_mdsp_bench PUBLIC -mcx16)
+
+ add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/pgm_bench.cpp)
+ target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp)
+ target_include_directories(pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(pgm_bench PUBLIC -mcx16)
+ target_compile_options(pgm_bench PUBLIC -fopenmp)
+
+ add_executable(dynamic_pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/dynamic_pgm_bench.cpp)
+ target_link_libraries(dynamic_pgm_bench PUBLIC gsl pthread atomic gomp)
+ target_include_directories(dynamic_pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(dynamic_pgm_bench PUBLIC -mcx16)
+ target_compile_options(dynamic_pgm_bench PUBLIC -fopenmp)
+
+ add_executable(btree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/btree_bench.cpp)
+ target_link_libraries(btree_bench PUBLIC gsl pthread atomic gomp)
+ target_include_directories(btree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(btree_bench PUBLIC -mcx16)
+ target_compile_options(btree_bench PUBLIC -fopenmp)
+
+ #add_executable(alex_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/alex_bench.cpp)
+ #target_link_libraries(alex_bench PUBLIC gsl pthread atomic gomp)
+ #target_include_directories(alex_bench PRIVATE include external/benchmarks/include external/psudb-common/cpp/include external/alex/src/core/)
+ #target_link_options(alex_bench PUBLIC -mcx16)
+ #target_compile_options(alex_bench PUBLIC -fopenmp -std=c++17)
+
+ add_executable(mtree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vldb/mtree_bench.cpp)
+ target_link_libraries(mtree_bench PUBLIC gsl pthread atomic gomp)
+ target_include_directories(mtree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
+ target_link_options(mtree_bench PUBLIC -mcx16)
+ target_compile_options(mtree_bench PUBLIC -fopenmp)
+endif()
+
+
+
if (bench)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/bin/benchmarks")
add_executable(reconstruction_interference ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/reconstruction_interference.cpp)
target_link_libraries(reconstruction_interference PUBLIC gsl pthread atomic)
target_link_options(reconstruction_interference PUBLIC -mcx16)
- target_include_directories(reconstruction_interference PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(reconstruction_interference PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
add_executable(insertion_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insertion_tput.cpp)
target_link_libraries(insertion_tput PUBLIC gsl pthread atomic)
- target_include_directories(insertion_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(insertion_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(insertion_tput PUBLIC -mcx16)
add_executable(string_insertion_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/string_insertion_tput.cpp)
target_link_libraries(string_insertion_tput PUBLIC gsl pthread atomic)
- target_include_directories(string_insertion_tput PRIVATE include external external/fast_succinct_trie/include external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(string_insertion_tput PRIVATE include external external/fast_succinct_trie/include external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(string_insertion_tput PUBLIC -mcx16)
add_executable(louds_insertion_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/louds_insertion_tput.cpp)
target_link_libraries(louds_insertion_tput PUBLIC gsl pthread atomic)
- target_include_directories(louds_insertion_tput PRIVATE include external external/fast_succinct_trie/include external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include external/louds-patricia)
+ target_include_directories(louds_insertion_tput PRIVATE include external external/fast_succinct_trie/include external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include external/louds-patricia)
target_link_options(louds_insertion_tput PUBLIC -mcx16)
add_executable(query_workload_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/query_workload_bench.cpp)
target_link_libraries(query_workload_bench PUBLIC gsl pthread atomic)
- target_include_directories(query_workload_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(query_workload_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(query_workload_bench PUBLIC -mcx16)
add_executable(insert_query_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insert_query_tput.cpp)
target_link_libraries(insert_query_tput PUBLIC gsl pthread atomic)
- target_include_directories(insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(insert_query_tput PUBLIC -mcx16)
add_executable(poplar_trie ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/poplar_trie.cpp)
target_link_libraries(poplar_trie PUBLIC gsl pthread atomic)
- target_include_directories(poplar_trie PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include external/poplar-trie/include)
+ target_include_directories(poplar_trie PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include external/poplar-trie/include)
target_link_options(poplar_trie PUBLIC -mcx16)
add_executable(hat_trie ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/hat_trie.cpp)
target_link_libraries(hat_trie PUBLIC gsl pthread atomic)
target_include_directories(hat_trie PRIVATE include external
- external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include external/hat-trie/include/tsl)
+ external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include external/hat-trie/include/tsl)
target_link_options(hat_trie PUBLIC -mcx16)
add_executable(cedar_trie ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/cedar_trie.cpp)
target_link_libraries(cedar_trie PUBLIC gsl pthread atomic)
- target_include_directories(cedar_trie PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include external/hat-trie/include/tsl)
+ target_include_directories(cedar_trie PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include external/hat-trie/include/tsl)
target_link_options(cedar_trie PUBLIC -mcx16)
#add_executable(btree_insert_query_tput ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/btree_insert_query_tput.cpp)
#target_link_libraries(btree_insert_query_tput PUBLIC gsl cblas pthread atomic)
- #target_include_directories(btree_insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ #target_include_directories(btree_insert_query_tput PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
#target_link_options(btree_insert_query_tput PUBLIC -mcx16)
add_executable(watermark_testing ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/watermark_testing.cpp)
target_link_libraries(watermark_testing PUBLIC gsl pthread atomic)
- target_include_directories(watermark_testing PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(watermark_testing PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(watermark_testing PUBLIC -mcx16)
- add_executable(irs_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/irs_bench.cpp)
- target_link_libraries(irs_bench PUBLIC gsl pthread atomic)
- target_include_directories(irs_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
- target_link_options(irs_bench PUBLIC -mcx16)
-
- add_executable(vptree_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/vptree_bench.cpp)
- target_link_libraries(vptree_bench PUBLIC gsl pthread atomic)
- target_include_directories(vptree_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
- target_link_options(vptree_bench PUBLIC -mcx16)
-
- add_executable(ts_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/ts_bench.cpp)
- target_link_libraries(ts_bench PUBLIC gsl pthread atomic)
- target_include_directories(ts_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
- target_link_options(ts_bench PUBLIC -mcx16)
-
- add_executable(pgm_bench ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/pgm_bench.cpp)
- target_link_libraries(pgm_bench PUBLIC gsl pthread atomic gomp)
- target_include_directories(pgm_bench PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
- target_link_options(pgm_bench PUBLIC -mcx16)
- target_compile_options(pgm_bench PUBLIC -fopenmp)
-
#add_executable(static_dynamic_comp ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/static_dynamic_comp.cpp)
#target_link_libraries(static_dynamic_comp PUBLIC gsl cblas pthread atomic)
- #target_include_directories(static_dynamic_comp PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ #target_include_directories(static_dynamic_comp PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
#target_link_options(static_dynamic_comp PUBLIC -mcx16)
add_executable(insert_tail_latency ${CMAKE_CURRENT_SOURCE_DIR}/benchmarks/insert_tail_latency.cpp)
target_link_libraries(insert_tail_latency PUBLIC gsl pthread atomic)
- target_include_directories(insert_tail_latency PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include bench/include external/psudb-common/cpp/include)
+ target_include_directories(insert_tail_latency PRIVATE include external external/m-tree/cpp external/PGM-index/include external/PLEX/include benchmarks/include external/psudb-common/cpp/include)
target_link_options(insert_tail_latency PUBLIC -mcx16)
endif()
diff --git a/benchmarks/include/benchmark_types.h b/benchmarks/include/benchmark_types.h
index fcdeac6..13964e8 100644
--- a/benchmarks/include/benchmark_types.h
+++ b/benchmarks/include/benchmark_types.h
@@ -3,8 +3,9 @@
#include <cstdlib>
#include "psu-ds/BTree.h"
#include "framework/interface/Record.h"
+#include "pgm/pgm_index_dynamic.hpp"
-/* TLX BTree definitions*/
+/* BTree definitions*/
template <typename K, typename V>
struct btree_record {
K key;
@@ -50,3 +51,5 @@ struct euclidean_distance {
typedef mt::mtree<Word2VecRec, euclidean_distance> MTree;
#endif
+typedef pgm::DynamicPGMIndex<uint64_t, uint64_t, pgm::PGMIndex<uint64_t, 64>> PGM;
+
diff --git a/benchmarks/include/standard_benchmarks.h b/benchmarks/include/standard_benchmarks.h
index 74bf93f..aaef679 100644
--- a/benchmarks/include/standard_benchmarks.h
+++ b/benchmarks/include/standard_benchmarks.h
@@ -14,6 +14,7 @@
#include "framework/DynamicExtension.h"
#include "framework/interface/Query.h"
+#include "query/irs.h"
#include "psu-util/progress.h"
#include "benchmark_types.h"
#include "psu-util/bentley-saxe.h"
@@ -24,15 +25,41 @@ static double delete_proportion = 0.05;
template<typename DE, typename QP, bool BSM=false>
static void run_queries(DE *extension, std::vector<QP> &queries) {
for (size_t i=0; i<queries.size(); i++) {
- auto q = &queries[i];
-
- auto res = extension->query(q);
- if constexpr (!BSM) {
- auto r = res.get();
+ if constexpr (std::is_same_v<MTree, DE>) {
+ std::vector<Word2VecRec> result;
+ auto res = extension->get_nearest_by_limit(queries[i].point, queries[i].k);
+
+ auto itr = res.begin();
+ while (itr != res.end()) {
+ result.emplace_back(itr->data);
+ itr++;
+ }
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ size_t tot = 0;
+ auto ptr = extension->find(queries[i].lower_bound);
+ while (ptr != extension->end() && ptr->first <= queries[i].upper_bound) {
+ tot++;
+ ++ptr;
+ }
+ } else {
+ auto res = extension->query(&queries[i]);
+ if constexpr (!BSM) {
+ auto r = res.get();
+ }
}
}
}
+template <typename R>
+static void run_btree_queries(BenchBTree *btree, std::vector<de::irs::Parms<R>> &queries) {
+ std::vector<int64_t> sample_set;
+ sample_set.reserve(queries[0].sample_size);
+
+ for (size_t i=0; i<queries.size(); i++) {
+ btree->range_sample(queries[i].lower_bound, queries[i].upper_bound, queries[i].sample_size, sample_set, queries[i].rng);
+ }
+}
+
template<typename S, typename QP, typename Q>
static void run_static_queries(S *shard, std::vector<QP> &queries) {
@@ -68,26 +95,42 @@ static void insert_records(psudb::bsm::BentleySaxe<R, DS, MDSP> *extension,
}
-template<typename DE, de::RecordInterface R>
-static void insert_records(DE *extension, size_t start, size_t stop,
+template<typename DE, typename R>
+static void insert_records(DE *structure, size_t start, size_t stop,
std::vector<R> &records, std::vector<size_t> &to_delete,
size_t &delete_idx, bool delete_records, gsl_rng *rng) {
psudb::progress_update(0, "Insert Progress");
size_t reccnt = 0;
for (size_t i=start; i<stop; i++) {
- while (!extension->insert(records[i])) {
- psudb::progress_update((double) i / (double)(stop - start), "Insert Progress");
- usleep(1);
+
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->insert(records[i]);
+ } else if constexpr (std::is_same_v<MTree, DE>) {
+ structure->add(records[i]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->insert_or_assign(records[i].key, records[i].value);
+ } else {
+ while (!structure->insert(records[i])) {
+ psudb::progress_update((double) i / (double)(stop - start), "Insert Progress");
+ usleep(1);
+ }
}
if (delete_records && gsl_rng_uniform(rng) <=
delete_proportion && to_delete[delete_idx] <= i) {
- while (!extension->erase(records[to_delete[delete_idx]])) {
- usleep(1);
+ if constexpr (std::is_same_v<BenchBTree, DE>) {
+ structure->erase_one(records[to_delete[delete_idx]].key);
+ } else if constexpr (std::is_same_v<MTree, DE>) {
+ structure->remove(records[to_delete[delete_idx]]);
+ } else if constexpr (std::is_same_v<PGM, DE>) {
+ structure->erase(records[to_delete[delete_idx]].key);
+ } else {
+ while (!structure->erase(records[to_delete[delete_idx]])) {
+ usleep(1);
+ }
}
-
delete_idx++;
g_deleted_records++;
}
diff --git a/benchmarks/string_insertion_tput.cpp b/benchmarks/string_insertion_tput.cpp
index f4a519a..8fa7f44 100644
--- a/benchmarks/string_insertion_tput.cpp
+++ b/benchmarks/string_insertion_tput.cpp
@@ -69,7 +69,13 @@ int main(int argc, char **argv) {
fprintf(stderr, "Finished reading from file.\n");
}
- auto extension = new Ext(1000, 12000, 8);
+ std::vector<size_t> scale_factors = {2, 4, 6, 8, 10, 12};
+ std::vector<size_t> buffer_sizes = {1000, 2000, 5000, 10000, 12000, 15000};
+
+ for (auto &sf : scale_factors) {
+ for (auto &bf_sz : buffer_sizes) {
+
+ auto extension = new Ext(bf_sz, bf_sz, sf);
TIMER_INIT();
TIMER_START();
@@ -97,33 +103,15 @@ int main(int argc, char **argv) {
auto query_time = TIMER_RESULT();
-
- auto shard = extension->create_static_structure();
- TIMER_START();
- for (size_t i=0; i<m; i++) {
- size_t j = rand() % strings.size();
- de::pl::Parms<Rec> parms = {strings[j].get()};
-
- auto res = Q::query(shard, nullptr, &parms);
-
- if (res[0].rec.value != j) {
- fprintf(stderr, "static:\t%ld %ld %s\n", res[0].rec.value, j, strings[j].get());
- }
- }
- TIMER_STOP();
-
- auto shard_query_time = TIMER_RESULT();
-
double i_tput = (double) n / (double) total_time * 1e9;
size_t q_lat = query_time / m;
- size_t s_q_lat = shard_query_time / m;
- fprintf(stdout, "%ld\t\t%lf\t%ld\t%ld\t%ld\t%ld\n", extension->get_record_count(),
- i_tput, q_lat, s_q_lat, extension->get_memory_usage(), shard->get_memory_usage());
+ fprintf(stdout, "%ld\t%ld\t%ld\t%lf\t%ld\t%ld\n", extension->get_record_count(),
+ bf_sz, sf, i_tput, q_lat, extension->get_memory_usage());
delete extension;
- delete shard;
+ }}
fflush(stderr);
}
diff --git a/benchmarks/vldb/alex_bench.cpp b/benchmarks/vldb/alex_bench.cpp
new file mode 100644
index 0000000..f75afa6
--- /dev/null
+++ b/benchmarks/vldb/alex_bench.cpp
@@ -0,0 +1,205 @@
+#include "alex.h"
+#include "include/standalone_utility.h"
+
+typedef uint64_t key_type;
+typedef uint64_t value_type;
+
+typedef alex::Alex<key_type, value_type> Alex;
+
+struct record {
+ key_type key;
+ value_type value;
+};
+
+struct query {
+ key_type lower_bound;
+ key_type upper_bound;
+};
+
+template <typename R>
+static bool build_insert_vec(std::fstream &file, std::vector<R> &vec, size_t n,
+ double delete_prop, std::vector<R> &to_delete, bool binary=false) {
+ vec.clear();
+ for (size_t i=0; i<n; i++) {
+ R rec;
+ if (!next_record(file, rec, binary)) {
+ if (i == 0) {
+ return false;
+ }
+
+ break;
+ }
+
+ vec.emplace_back(rec);
+
+ if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) {
+ to_delete.emplace_back(rec);
+ }
+ }
+
+ return true;
+}
+
+
+static Alex *warmup(std::fstream &file, size_t count,
+ double delete_prop, std::vector<record> to_delete, bool progress=true, bool binary=false) {
+ size_t batch = std::min(.1 * count, 25000.0);
+
+ std::pair<key_type, value_type> *insert_vec = new std::pair<key_type, value_type>[count];
+ Alex *alex = new Alex();
+
+ size_t cnt = 0;
+ record rec;
+ while (cnt < count && next_record(file, rec)) {
+ insert_vec[cnt] = {rec.key, rec.value};
+ cnt++;
+ }
+
+ std::sort(insert_vec, insert_vec + count);
+
+ alex->bulk_load(insert_vec, count);
+ delete[] insert_vec;
+
+ return alex;
+}
+
+
+static void alex_rq_insert(Alex &alex, std::fstream &file, size_t insert_cnt, double delete_prop, std::vector<record> &to_delete, bool binary=false) {
+ size_t delete_cnt = insert_cnt * delete_prop;
+
+ size_t applied_deletes = 0;
+ size_t applied_inserts = 0;
+
+ size_t BATCH=1000;
+
+ std::vector<record> insert_vec;
+ std::vector<record> delete_vec;
+ insert_vec.reserve(BATCH);
+ delete_vec.reserve(BATCH*delete_prop);
+
+ size_t delete_idx = 0;
+
+ bool continue_benchmark = true;
+
+ size_t total_time = 0;
+
+ while (applied_inserts < insert_cnt && continue_benchmark) {
+ continue_benchmark = build_insert_vec(file, insert_vec, BATCH, delete_prop, to_delete, binary);
+ progress_update((double) applied_inserts / (double) insert_cnt, "inserting:");
+ if (applied_deletes < delete_cnt) {
+ build_delete_vec(to_delete, delete_vec, BATCH*delete_prop);
+ delete_idx = 0;
+ }
+
+ if (insert_vec.size() == 0) {
+ break;
+ }
+
+ auto insert_start = std::chrono::high_resolution_clock::now();
+ for (size_t i=0; i<insert_vec.size(); i++) {
+ // process a delete if necessary
+ if (applied_deletes < delete_cnt && delete_idx < delete_vec.size() && gsl_rng_uniform(g_rng) < delete_prop) {
+ alex.erase_one(delete_vec[delete_idx++].key);
+ applied_deletes++;
+ }
+
+ // insert the record;
+ alex.insert(insert_vec[i].key, insert_vec[i].value);
+ applied_inserts++;
+ }
+ auto insert_stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(insert_stop - insert_start).count();
+ }
+
+ progress_update(1.0, "inserting:");
+
+ size_t throughput = (((double) (applied_inserts + applied_deletes) / (double) total_time) * 1e9);
+
+ fprintf(stdout, "%ld\t", throughput);
+}
+
+
+
+static void alex_rq_bench(Alex &alex, std::vector<query> queries, size_t trial_cnt=1)
+{
+ char progbuf[25];
+ sprintf(progbuf, "sampling:");
+
+ size_t batch_size = 100;
+ size_t batches = trial_cnt / batch_size;
+ size_t total_time = 0;
+
+ std::vector<record> result_set;
+
+ for (int i=0; i<trial_cnt; i++) {
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t j=0; j<queries.size(); j++) {
+ auto ptr = alex.find(queries[j].lower_bound);
+ while (ptr != alex.end() && ptr.key() <= queries[j].upper_bound) {
+ result_set.push_back({ptr.key(), ptr.payload()});
+ ptr++;
+ }
+ result_set.clear();
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+
+ total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
+ }
+
+ size_t latency = total_time / (trial_cnt * queries.size());
+
+ fprintf(stdout, "%ld\t", latency);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 5) {
+ fprintf(stderr, "Usage: alex_rq_bench <filename> <record_count> <delete_proportion> <query_file>\n");
+ exit(EXIT_FAILURE);
+ }
+
+ std::string filename = std::string(argv[1]);
+ size_t record_count = atol(argv[2]);
+ double delete_prop = atof(argv[3]);
+ std::string qfilename = std::string(argv[4]);
+
+ size_t buffer_cap = 12000;
+ size_t scale_factor = 6;
+ double max_delete_prop = delete_prop;
+ bool use_osm = false;
+
+ double insert_batch = 0.8;
+
+ init_bench_env(record_count, true, use_osm);
+ auto queries = read_range_queries<query>(qfilename, .0001);
+
+ std::fstream datafile;
+ datafile.open(filename, std::ios::in | std::ios::binary);
+
+ std::vector<record> to_delete;
+
+ // warm up the tree with initial_insertions number of initially inserted
+ // records
+ size_t warmup_cnt = insert_batch * record_count;
+ auto alex = warmup(datafile, warmup_cnt, delete_prop, to_delete, true, true);
+
+ fprintf(stderr, "Size: %ld\n", alex->size());
+ size_t insert_cnt = record_count - warmup_cnt;
+
+ alex_rq_insert(*alex, datafile, insert_cnt, delete_prop, to_delete, true);
+ size_t memory_usage = alex->model_size() + alex->data_size();
+
+ fprintf(stderr, "Size: %ld\n", alex->size());
+ fprintf(stdout, "%ld\t", memory_usage);
+
+ alex_rq_bench(*alex, queries);
+ fprintf(stdout, "\n");
+
+ delete_bench_env();
+ delete alex;
+ fflush(stdout);
+ fflush(stderr);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/benchmarks/vldb/btree_bench.cpp b/benchmarks/vldb/btree_bench.cpp
new file mode 100644
index 0000000..12107c6
--- /dev/null
+++ b/benchmarks/vldb/btree_bench.cpp
@@ -0,0 +1,90 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "shard/ISAMTree.h"
+#include "query/irs.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "benchmark_types.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+#include "standard_benchmarks.h"
+#include "psu-ds/BTree.h"
+
+typedef btree_record<int64_t, int64_t> Rec;
+
+typedef de::ISAMTree<Rec> Shard;
+typedef de::irs::Query<Rec, Shard> Q;
+typedef de::irs::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto btree = BenchBTree();
+
+ gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ /* read in the range queries and add sample size and rng for sampling */
+ auto queries = read_range_queries<QP>(q_fname, .001);
+ for (auto &q : queries) {
+ q.sample_size = 1000;
+ q.rng = rng;
+ }
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .3 * n;
+ size_t delete_idx = 0;
+ insert_records<BenchBTree, Rec>(&btree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<BenchBTree, Rec>(&btree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_btree_queries<Rec>(&btree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto btree_size = btree.get_stats().inner_nodes * psudb::btree_default_traits<int64_t, Rec>::inner_slots * (sizeof(int64_t) + sizeof(void*));
+
+ /* account for memory wasted on gaps in the structure */
+ btree_size += btree.get_stats().leaves * psudb::btree_default_traits<int64_t, Rec>::leaf_slots * sizeof(Rec);
+ btree_size -= btree.size() * sizeof(Rec);
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, btree_size);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/vldb/dynamic_pgm_bench.cpp b/benchmarks/vldb/dynamic_pgm_bench.cpp
new file mode 100644
index 0000000..249bc92
--- /dev/null
+++ b/benchmarks/vldb/dynamic_pgm_bench.cpp
@@ -0,0 +1,77 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include <thread>
+
+#include "query/rangecount.h"
+#include "framework/interface/Record.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef de::Record<uint64_t, uint64_t> Rec;
+typedef de::rc::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ std::vector<std::pair<uint64_t, uint64_t>> tmp_data;
+ PGM pgm(tmp_data.begin(), tmp_data.end());
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ auto data = read_sosd_file<Rec>(d_fname, n);
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ auto queries = read_range_queries<QP>(q_fname, .001);
+
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<PGM, Rec>(&pgm, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ TIMER_START();
+ insert_records<PGM, Rec>(&pgm, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ TIMER_START();
+ run_queries<PGM, QP>(&pgm, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ auto ext_size = pgm.size_in_bytes();
+
+ fprintf(stdout, "%ld\t%ld\t%ld\n", insert_throughput, query_latency, ext_size);
+
+ gsl_rng_free(rng);
+ fflush(stderr);
+}
+
diff --git a/benchmarks/irs_bench.cpp b/benchmarks/vldb/irs_bench.cpp
index 36d88f6..ca1e555 100644
--- a/benchmarks/irs_bench.cpp
+++ b/benchmarks/vldb/irs_bench.cpp
@@ -8,12 +8,12 @@
#include "shard/ISAMTree.h"
#include "query/irs.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
+#include "file_util.h"
#include <gsl/gsl_rng.h>
#include "psu-util/timer.h"
-#include "include/standard_benchmarks.h"
+#include "standard_benchmarks.h"
typedef de::Record<uint64_t, uint64_t> Rec;
diff --git a/benchmarks/vldb/mtree_bench.cpp b/benchmarks/vldb/mtree_bench.cpp
new file mode 100644
index 0000000..35f56be
--- /dev/null
+++ b/benchmarks/vldb/mtree_bench.cpp
@@ -0,0 +1,80 @@
+/*
+ *
+ */
+
+#define ENABLE_TIMER
+
+#include "query/knn.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
+
+#include <gsl/gsl_rng.h>
+
+#include "psu-util/timer.h"
+
+
+typedef Word2VecRec Rec;
+typedef de::knn::Parms<Rec> QP;
+
+void usage(char *progname) {
+ fprintf(stderr, "%s reccnt datafile queryfile\n", progname);
+}
+
+int main(int argc, char **argv) {
+
+ if (argc < 4) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ size_t n = atol(argv[1]);
+ std::string d_fname = std::string(argv[2]);
+ std::string q_fname = std::string(argv[3]);
+
+ auto mtree = new MTree();
+ gsl_rng * rng = gsl_rng_alloc(gsl_rng_mt19937);
+
+ fprintf(stderr, "[I] Reading data file...\n");
+ auto data = read_vector_file<Rec, 300>(d_fname, n);
+
+ fprintf(stderr, "[I] Generating delete vector\n");
+ std::vector<size_t> to_delete(n * delete_proportion);
+ size_t j=0;
+ for (size_t i=0; i<data.size() && j<to_delete.size(); i++) {
+ if (gsl_rng_uniform(rng) <= delete_proportion) {
+ to_delete[j++] = i;
+ }
+ }
+ fprintf(stderr, "[I] Reading Queries\n");
+ auto queries = read_knn_queries<QP>(q_fname, 10);
+
+ fprintf(stderr, "[I] Warming up structure...\n");
+ /* warmup structure w/ 10% of records */
+ size_t warmup = .1 * n;
+ size_t delete_idx = 0;
+ insert_records<MTree, Rec>(mtree, 0, warmup, data, to_delete, delete_idx, false, rng);
+
+ TIMER_INIT();
+
+ fprintf(stderr, "[I] Running Insertion Benchmark\n");
+ TIMER_START();
+ insert_records<MTree, Rec>(mtree, warmup, data.size(), data, to_delete, delete_idx, true, rng);
+ TIMER_STOP();
+
+ auto insert_latency = TIMER_RESULT();
+ size_t insert_throughput = (size_t) ((double) (n - warmup) / (double) insert_latency * 1e9);
+
+ fprintf(stderr, "[I] Running Query Benchmark\n");
+ TIMER_START();
+ run_queries<MTree, QP>(mtree, queries);
+ TIMER_STOP();
+
+ auto query_latency = TIMER_RESULT() / queries.size();
+
+ fprintf(stdout, "%ld\t%ld\n", insert_throughput, query_latency);
+
+ gsl_rng_free(rng);
+ delete mtree;
+ fflush(stderr);
+}
+
diff --git a/benchmarks/pgm_bench.cpp b/benchmarks/vldb/pgm_bench.cpp
index e0baab4..f63ec8e 100644
--- a/benchmarks/pgm_bench.cpp
+++ b/benchmarks/vldb/pgm_bench.cpp
@@ -10,8 +10,8 @@
#include "shard/PGM.h"
#include "query/rangecount.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
-#include "include/standard_benchmarks.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
#include <gsl/gsl_rng.h>
diff --git a/benchmarks/ts_bench.cpp b/benchmarks/vldb/ts_bench.cpp
index 3d44ac5..a84635f 100644
--- a/benchmarks/ts_bench.cpp
+++ b/benchmarks/vldb/ts_bench.cpp
@@ -10,8 +10,8 @@
#include "shard/TrieSpline.h"
#include "query/rangecount.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
-#include "include/standard_benchmarks.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
#include <gsl/gsl_rng.h>
diff --git a/benchmarks/ts_bsm_bench.cpp b/benchmarks/vldb/ts_bsm_bench.cpp
index 366abce..706433d 100644
--- a/benchmarks/ts_bsm_bench.cpp
+++ b/benchmarks/vldb/ts_bsm_bench.cpp
@@ -6,13 +6,13 @@
#include <thread>
-#include "include/triespline_bsm.h"
+#include "triespline_bsm.h"
#include "psu-util/bentley-saxe.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
+#include "file_util.h"
#include "query/rangecount.h"
#include "psu-util/timer.h"
-#include "include/standard_benchmarks.h"
+#include "standard_benchmarks.h"
typedef std::pair<uint64_t, uint64_t> Rec;
typedef de::Record<uint64_t, uint64_t> FRec;
diff --git a/benchmarks/ts_mdsp_bench.cpp b/benchmarks/vldb/ts_mdsp_bench.cpp
index 5e5001d..4c5bf1e 100644
--- a/benchmarks/ts_mdsp_bench.cpp
+++ b/benchmarks/vldb/ts_mdsp_bench.cpp
@@ -6,13 +6,13 @@
#include <thread>
-#include "include/triespline_bsm.h"
+#include "triespline_bsm.h"
#include "psu-util/bentley-saxe.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
+#include "file_util.h"
#include "query/rangecount.h"
#include "psu-util/timer.h"
-#include "include/standard_benchmarks.h"
+#include "standard_benchmarks.h"
typedef std::pair<uint64_t, uint64_t> Rec;
typedef de::Record<uint64_t, uint64_t> FRec;
diff --git a/benchmarks/vptree_bench.cpp b/benchmarks/vldb/vptree_bench.cpp
index 1219076..613c556 100644
--- a/benchmarks/vptree_bench.cpp
+++ b/benchmarks/vldb/vptree_bench.cpp
@@ -8,8 +8,8 @@
#include "shard/VPTree.h"
#include "query/knn.h"
#include "framework/interface/Record.h"
-#include "include/file_util.h"
-#include "include/standard_benchmarks.h"
+#include "file_util.h"
+#include "standard_benchmarks.h"
#include <gsl/gsl_rng.h>