diff options
Diffstat (limited to 'benchmarks/include/file_util.h')
| -rw-r--r-- | benchmarks/include/file_util.h | 45 |
1 files changed, 44 insertions, 1 deletions
diff --git a/benchmarks/include/file_util.h b/benchmarks/include/file_util.h index ebcf17e..586b44f 100644 --- a/benchmarks/include/file_util.h +++ b/benchmarks/include/file_util.h @@ -1,3 +1,5 @@ +#pragma once + #include <cstdlib> #include <cstdio> #include <iostream> @@ -7,8 +9,10 @@ #include <gsl/gsl_rng.h> #include <cstring> #include <vector> +#include <memory> + +#include "psu-util/progress.h" -#pragma once template <typename QP> static std::vector<QP> read_lookup_queries(std::string fname, double selectivity) { @@ -36,6 +40,20 @@ static std::vector<QP> read_lookup_queries(std::string fname, double selectivity } template <typename QP> +static std::vector<QP> generate_string_lookup_queries(std::vector<std::unique_ptr<char[]>> &strings, size_t cnt, gsl_rng *rng) { + std::vector<QP> queries; + + for (size_t i=0; i<cnt; i++) { + auto idx = gsl_rng_uniform_int(rng, strings.size()); + QP q; + q.search_key = strings[idx].get(); + queries.push_back(q); + } + + return queries; +} + +template <typename QP> static std::vector<QP> read_range_queries(std::string &fname, double selectivity) { std::vector<QP> queries; @@ -173,3 +191,28 @@ static std::vector<R> read_vector_file(std::string &fname, size_t n) { return records; } + + +static std::vector<std::unique_ptr<char[]>>read_string_file(std::string fname, size_t n=10000000) { + + std::fstream file; + file.open(fname, std::ios::in); + + if (!file.is_open()) { + fprintf(stderr, "ERROR: Failed to open file %s\n", fname.c_str()); + exit(EXIT_FAILURE); + } + + std::vector<std::unique_ptr<char[]>> strings; + strings.reserve(n); + + size_t i=0; + std::string line; + while (i < n && std::getline(file, line, '\n')) { + strings.emplace_back(std::unique_ptr<char[]>(strdup(line.c_str()))); + i++; + psudb::progress_update((double) i / (double) n, "Reading file:"); + } + + return strings; +} |