/* * benchmarks/include/bench_utility.h * * Copyright (C) 2023 Douglas Rumbaugh * * All rights reserved. Published under the Modified BSD License. * */ #pragma once #include "framework/DynamicExtension.h" #include "shard/WSS.h" #include "shard/MemISAM.h" #include "shard/PGM.h" #include "shard/TrieSpline.h" #include "shard/WIRS.h" #include "ds/BTree.h" #include "shard/VPTree.h" #include "mtree.h" #include "standalone_utility.h" #include #include #include #include #include #include #include #include #include #include #include #include #include typedef uint64_t key_type; typedef uint64_t value_type; typedef uint64_t weight_type; typedef de::WeightedRecord WRec; typedef de::Record Rec; const size_t W2V_SIZE = 300; typedef de::EuclidPoint Word2VecRec; typedef de::DynamicExtension, de::WSSQuery> ExtendedWSS; typedef de::DynamicExtension, de::TrieSplineRangeQuery> ExtendedTSRQ; typedef de::DynamicExtension, de::PGMRangeQuery> ExtendedPGMRQ; typedef de::DynamicExtension, de::PGMPointLookup> ExtendedPGM_PL; typedef de::DynamicExtension, de::IRSQuery> ExtendedISAM_IRS; typedef de::DynamicExtension, de::ISAMRangeQuery> ExtendedISAM_RQ; typedef de::DynamicExtension, de::KNNQuery> ExtendedVPTree_KNN; struct euclidean_distance { double operator()(const Word2VecRec &first, const Word2VecRec &second) const { double dist = 0; for (size_t i=0; i TreeMap; typedef mt::mtree MTree; template static bool build_insert_vec(std::fstream &file, std::vector &vec, size_t n, double delete_prop, std::vector &to_delete, bool binary=false) { vec.clear(); for (size_t i=0; i) { if (!next_vector_record(file, rec)) { if (i == 0) { return false; } break; } } else { if (!next_record(file, rec, binary)) { if (i == 0) { return false; } break; } } vec.emplace_back(rec); if (gsl_rng_uniform(g_rng) < delete_prop + (delete_prop * .1)) { to_delete.emplace_back(rec); } } return true; } template static bool warmup(std::fstream &file, DE &extended_index, size_t count, double delete_prop, std::vector to_delete, bool progress=true, bool binary=false) { size_t batch = std::min(.1 * count, 25000.0); std::vector insert_vec; std::vector delete_vec; insert_vec.reserve(batch); delete_vec.reserve(batch*delete_prop); size_t inserted = 0; size_t delete_idx = 0; double last_percent = 0; while (inserted < count) { // Build vector of records to insert and potentially delete auto continue_warmup = build_insert_vec(file, insert_vec, batch, delete_prop, to_delete, binary); if (inserted > batch) { build_delete_vec(to_delete, delete_vec, batch*delete_prop); delete_idx = 0; } for (size_t i=0; i) { extended_index.erase_one(delete_vec[delete_idx++].key); } else if constexpr (std::is_same_v) { extended_index.remove(delete_vec[delete_idx++]); } else { extended_index.erase(delete_vec[delete_idx++]); } } // insert the record; if constexpr (std::is_same_v) { extended_index.add(insert_vec[i]); } else { extended_index.insert(insert_vec[i]); } inserted++; if (progress) { progress_update((double) inserted / (double) count, "warming up:"); } } } return true; } static void reset_de_perf_metrics() { /* * rejection counters are zeroed automatically by the * sampling function itself. */ RESET_IO_CNT(); }