1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
#include "include/bench.h"
#include "mtree.h"
static void mtree_knn_bench(MTree &tree, std::vector<de::KNNQueryParms<Word2VecRec>> queries, size_t trial_cnt=1)
{
char progbuf[25];
sprintf(progbuf, "sampling:");
size_t batch_size = 100;
size_t batches = trial_cnt / batch_size;
size_t total_time = 0;
std::vector<Word2VecRec> result_set;
for (int i=0; i<trial_cnt; i++) {
progress_update((double) (i * batch_size) / (double) trial_cnt, progbuf);
std::vector<Word2VecRec> results;
auto start = std::chrono::high_resolution_clock::now();
for (size_t j=0; j<queries.size(); j++) {
results.clear();
auto query_output = tree.get_nearest_by_limit(queries[j].point, queries[j].k);
auto itr = query_output.begin();
while (itr != query_output.end()) {
results.emplace_back(itr->data);
itr++;
}
}
auto stop = std::chrono::high_resolution_clock::now();
total_time += std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count();
}
progress_update(1.0, progbuf);
size_t latency = total_time / (trial_cnt * queries.size());
fprintf(stdout, "%ld\t", latency);
}
int main(int argc, char **argv)
{
if (argc < 5) {
fprintf(stderr, "Usage: mtree_knn_bench <filename> <record_count> <delete_proportion> <query_file>\n");
exit(EXIT_FAILURE);
}
std::string filename = std::string(argv[1]);
size_t record_count = atol(argv[2]);
double delete_prop = atof(argv[3]);
std::string qfilename = std::string(argv[4]);
size_t buffer_cap = 12000;
size_t scale_factor = 6;
double max_delete_prop = delete_prop;
double insert_batch = 0.1;
init_bench_env(record_count, true);
auto queries = read_knn_queries<de::KNNQueryParms<Word2VecRec>>(qfilename, 10);
auto mtree = MTree();
std::fstream datafile;
datafile.open(filename, std::ios::in | std::ios::binary);
std::vector<Word2VecRec> to_delete;
// warm up the tree with initial_insertions number of initially inserted
// records
size_t warmup_cnt = insert_batch * record_count;
warmup<MTree, Word2VecRec>(datafile, mtree, warmup_cnt, delete_prop, to_delete, true, true);
size_t insert_cnt = record_count - warmup_cnt;
insert_tput_bench<MTree, Word2VecRec>(mtree, datafile, insert_cnt, delete_prop, to_delete, true);
// fprintf(stdout, "%ld\t", mtree.get_memory_usage());
mtree_knn_bench(mtree, queries);
fprintf(stdout, "\n");
delete_bench_env();
fflush(stdout);
fflush(stderr);
exit(EXIT_SUCCESS);
}
|