summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDouglas Rumbaugh <dbr4@psu.edu>2023-05-08 12:59:46 -0400
committerDouglas Rumbaugh <dbr4@psu.edu>2023-05-08 12:59:46 -0400
commitfdf5ef27fec1368a33801a98bbc5ed3556476979 (patch)
tree7ecd9477fed6495d05a44343e19bfb6480785c7a /include
parent2380dd4d73cd6a179567e06e1aa4871ad44ccce3 (diff)
downloaddynamic-extension-fdf5ef27fec1368a33801a98bbc5ed3556476979.tar.gz
Began porting source files over from other repository
Diffstat (limited to 'include')
-rw-r--r--include/ds/Alias.h72
-rw-r--r--include/ds/BitArray.h67
-rw-r--r--include/ds/BloomFilter.h74
-rw-r--r--include/ds/PriorityQueue.h135
-rw-r--r--include/util/Cursor.h89
-rw-r--r--include/util/base.h73
-rw-r--r--include/util/bf_config.h27
-rw-r--r--include/util/hash.h61
-rw-r--r--include/util/internal_record.h96
-rw-r--r--include/util/record.h65
-rw-r--r--include/util/timer.h37
-rw-r--r--include/util/types.h71
12 files changed, 867 insertions, 0 deletions
diff --git a/include/ds/Alias.h b/include/ds/Alias.h
new file mode 100644
index 0000000..855dc75
--- /dev/null
+++ b/include/ds/Alias.h
@@ -0,0 +1,72 @@
+/*
+ * include/ds/Alias.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <gsl/gsl_rng.h>
+#include <vector>
+
+namespace de {
+
+/*
+ * An implementation of Walker's Alias Structure for Weighted Set Sampling. Requires
+ * that the input weight vector is already normalized.
+ */
+class Alias {
+public:
+ Alias(const std::vector<double>& weights)
+ : m_alias(weights.size()), m_cutoff(weights.size()) {
+ size_t n = weights.size();
+ auto overfull = std::vector<size_t>();
+ auto underfull = std::vector<size_t>();
+ overfull.reserve(n);
+ underfull.reserve(n);
+
+ // initialize the probability_table with n*p(i) as well as the overfull and
+ // underfull lists.
+ for (size_t i = 0; i < n; i++) {
+ m_cutoff[i] = (double) n * weights[i];
+ if (m_cutoff[i] > 1) {
+ overfull.emplace_back(i);
+ } else if (m_cutoff[i] < 1) {
+ underfull.emplace_back(i);
+ } else {
+ m_alias[i] = i;
+ }
+ }
+
+ while (overfull.size() > 0 && underfull.size() > 0) {
+ auto i = overfull.back(); overfull.pop_back();
+ auto j = underfull.back(); underfull.pop_back();
+
+ m_alias[j] = i;
+ m_cutoff[i] = m_cutoff[i] + m_cutoff[j] - 1.0;
+
+ if (m_cutoff[i] > 1.0) {
+ overfull.emplace_back(i);
+ } else if (m_cutoff[i] < 1.0) {
+ underfull.emplace_back(i);
+ }
+ }
+ }
+
+ size_t get(const gsl_rng* rng) {
+ double coin1 = gsl_rng_uniform(rng);
+ double coin2 = gsl_rng_uniform(rng);
+
+ size_t k = ((double) m_alias.size()) * coin1;
+ return coin2 < m_cutoff[k] ? k : m_alias[k];
+ }
+
+private:
+ std::vector<size_t> m_alias;
+ std::vector<double> m_cutoff;
+};
+
+}
diff --git a/include/ds/BitArray.h b/include/ds/BitArray.h
new file mode 100644
index 0000000..1d33d5b
--- /dev/null
+++ b/include/ds/BitArray.h
@@ -0,0 +1,67 @@
+/*
+ * include/ds/BloomFilter.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <memory>
+#include <cstring>
+
+#include "util/base.h"
+
+namespace lsm {
+
+class BitArray {
+public:
+ BitArray(size_t bits): m_bits(bits), m_data(nullptr) {
+ if (m_bits > 0) {
+ size_t n_bytes = (m_bits >> 3) << 3;
+ m_data = (char*) std::aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(n_bytes));
+ memset(m_data, 0, n_bytes);
+ }
+ }
+
+ ~BitArray() {
+ if (m_data) free(m_data);
+ }
+
+ bool is_set(size_t bit) {
+ if (bit >= m_bits) return false;
+ return m_data[bit >> 3] & (1 << (bit & 7));
+ }
+
+ int set(size_t bit) {
+ if (bit >= m_bits) return 0;
+ m_data[bit >> 3] |= ((char) 1 << (bit & 7));
+ return 1;
+ }
+
+ int unset(size_t bit) {
+ if (bit >= m_bits) return 0;
+ m_data[bit >> 3] &= ~((char) 1 << (bit & 7));
+ return 1;
+ }
+
+ void clear() {
+ memset(m_data, 0, (m_bits >> 3) << 3);
+ }
+
+ size_t mem_size() {
+ return m_bits >> 3;
+ }
+
+ size_t size() {
+ return m_bits;
+ }
+
+private:
+ size_t m_bits;
+ char* m_data;
+};
+
+}
diff --git a/include/ds/BloomFilter.h b/include/ds/BloomFilter.h
new file mode 100644
index 0000000..dbaab2f
--- /dev/null
+++ b/include/ds/BloomFilter.h
@@ -0,0 +1,74 @@
+/*
+ * include/ds/BloomFilter.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cmath>
+#include <gsl/gsl_rng.h>
+
+#include "ds/BitArray.h"
+#include "util/hash.h"
+#include "util/base.h"
+#include "util/record.h"
+
+namespace lsm {
+
+class BloomFilter {
+public:
+ BloomFilter(size_t n_bits, size_t k, const gsl_rng* rng)
+ : m_n_bits(n_bits), m_n_salts(k), m_bitarray(n_bits) {
+ salt = (uint16_t*) aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(k * sizeof(uint16_t)));
+ for (size_t i = 0; i < k; ++i) {
+ salt[i] = (uint16_t) gsl_rng_uniform_int(rng, 1 << 16);
+ }
+
+ }
+
+ BloomFilter(double max_fpr, size_t n, size_t k, const gsl_rng* rng)
+ : BloomFilter((size_t)(-(double) (k * n) / std::log(1.0 - std::pow(max_fpr, 1.0 / k))), k, rng) {}
+
+ ~BloomFilter() {
+ if (salt) free(salt);
+ }
+
+ int insert(const key_t& key, size_t sz = sizeof(key_t)) {
+ if (m_bitarray.size() == 0) return 0;
+
+ for (size_t i = 0; i < m_n_salts; ++i) {
+ m_bitarray.set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits);
+ }
+
+ return 1;
+ }
+
+ bool lookup(const key_t& key, size_t sz = sizeof(key_t)) {
+ if (m_bitarray.size() == 0) return false;
+ for (size_t i = 0; i < m_n_salts; ++i) {
+ if (!m_bitarray.is_set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits))
+ return false;
+ }
+
+ return true;
+ }
+
+ void clear() {
+ m_bitarray.clear();
+ }
+
+ size_t get_memory_utilization() {
+ return this->m_bitarray.mem_size();
+ }
+private:
+ size_t m_n_salts;
+ size_t m_n_bits;
+ uint16_t* salt;
+
+ BitArray m_bitarray;
+};
+
+}
diff --git a/include/ds/PriorityQueue.h b/include/ds/PriorityQueue.h
new file mode 100644
index 0000000..49f3e41
--- /dev/null
+++ b/include/ds/PriorityQueue.h
@@ -0,0 +1,135 @@
+/*
+ * include/ds/PriorityQueue.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cassert>
+
+#include "util/record.h"
+
+namespace lsm {
+
+struct queue_record {
+ const char *data;
+ size_t version;
+};
+
+
+class PriorityQueue {
+public:
+ PriorityQueue(size_t size) : data(size), tail(0) {}
+ ~PriorityQueue() = default;
+
+ size_t size() const {
+ return tail;
+ }
+
+ void pop() {
+ assert(this->tail != 0);
+
+ // If there is only one element, just decrement the
+ // tail.
+ if (this->size() == 1) {
+ this->tail--;
+ return;
+ }
+
+ swap(0, --this->tail);
+
+ ssize_t idx = 0;
+ ssize_t child_idx;
+
+ while ((child_idx = min_child(idx)) != -1 && heap_cmp(child_idx, idx)) {
+ swap(idx, child_idx);
+ idx = child_idx;
+ }
+ }
+
+ void push(const char* record, size_t version=0) {
+ assert(tail != this->data.size());
+
+ size_t new_idx = this->tail++;
+ this->data[new_idx] = {record, version};
+
+ while (new_idx != 0 && !heap_cmp(parent(new_idx), new_idx)) {
+ swap(parent(new_idx), new_idx);
+ new_idx = parent(new_idx);
+ }
+ }
+
+
+ queue_record peek(size_t depth=0) {
+ ssize_t idx = 0;
+ size_t cur_depth = 0;
+
+ while (cur_depth != depth) {
+ idx = min_child(idx);
+ assert(idx != -1);
+ cur_depth++;
+ }
+
+ return this->data[idx];
+ }
+
+private:
+ std::vector<queue_record> data;
+ size_t tail;
+
+ /*
+ * Swap the elements at position a and position
+ * b within the heap
+ */
+ inline void swap(size_t a, size_t b) {
+ if (a == b) return;
+
+ queue_record temp = this->data[a];
+ this->data[a] = this->data[b];
+ this->data[b] = temp;
+ }
+
+ inline size_t left_child(size_t idx) {
+ return 2 * idx + 1;
+ }
+
+ inline size_t right_child(size_t idx) {
+ return 2 * idx + 2;
+ }
+
+ inline size_t parent(size_t idx) {
+ return (idx - 1) / 2;
+ }
+
+ inline ssize_t min_child(size_t idx) {
+ ssize_t left = left_child(idx) < tail ? left_child(idx) : -1;
+ ssize_t right = right_child(idx) < tail ? right_child(idx) : -1;
+
+ if (left == -1 && right == -1) {
+ return -1;
+ } else if (left == -1) {
+ return right;
+ } else if (right == -1) {
+ return left;
+ }
+
+ return (heap_cmp(left, right)) ? left : right;
+ }
+
+ inline bool heap_cmp(size_t a, size_t b) {
+ auto cmp = record_cmp(this->data[a].data, this->data[b].data);
+ if (cmp == 0) {
+ if (this->data[a].version != this->data[b].version)
+ return this->data[a].version < this->data[b].version;
+ else return is_tombstone(this->data[a].data) && is_tombstone(this->data[b].data);
+ }
+ return cmp == -1;
+ }
+
+};
+}
diff --git a/include/util/Cursor.h b/include/util/Cursor.h
new file mode 100644
index 0000000..3f5f20f
--- /dev/null
+++ b/include/util/Cursor.h
@@ -0,0 +1,89 @@
+/*
+ * include/util/Cursor.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "util/base.h"
+#include "util/record.h"
+#include "io/PagedFile.h"
+
+namespace de {
+struct Cursor {
+ record_t *ptr;
+ const record_t *end;
+ size_t cur_rec_idx;
+ size_t rec_cnt;
+
+ friend bool operator==(const Cursor &a, const Cursor &b) {
+ return a.ptr == b.ptr && a.end == b.end;
+ }
+};
+
+static Cursor g_empty_cursor = {0};
+
+/*
+ * Advance the cursor to the next record. If the cursor is backed by an
+ * iterator, will attempt to advance the iterator once the cursor reaches its
+ * end and reset the cursor to the beginning of the read page.
+ *
+ * If the advance succeeds, ptr will be updated to point to the new record
+ * and true will be returned. If the advance reaches the end, then ptr will
+ * be updated to be equal to end, and false will be returned. Iterators will
+ * not be closed.
+ */
+inline static bool advance_cursor(Cursor *cur, PagedFileIterator *iter = nullptr) {
+ cur->ptr++;
+ cur->cur_rec_idx++;
+
+ if (cur->cur_rec_idx >= cur->rec_cnt) return false;
+
+ if (cur->ptr >= cur->end) {
+ if (iter && iter->next()) {
+ cur->ptr = (record_t*)iter->get_item();
+ cur->end = cur->ptr + (PAGE_SIZE / sizeof(record_t));
+ return true;
+ }
+
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Process the list of cursors to return the cursor containing the next
+ * largest element. Does not advance any of the cursors. If current is
+ * specified, then skip the current head of that cursor during checking.
+ * This allows for "peaking" at the next largest element after the current
+ * largest is processed.
+ */
+inline static Cursor *get_next(std::vector<Cursor> &cursors, Cursor *current=&g_empty_cursor) {
+ const record_t *min_rec = nullptr;
+ Cursor *result = &g_empty_cursor;
+ for (size_t i=0; i< cursors.size(); i++) {
+ if (cursors[i] == g_empty_cursor) continue;
+
+ const record_t *rec = (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr;
+ if (rec >= cursors[i].end) continue;
+
+ if (min_rec == nullptr) {
+ result = &cursors[i];
+ min_rec = rec;
+ continue;
+ }
+
+ if (*rec < *min_rec) {
+ result = &cursors[i];
+ min_rec = rec;
+ }
+ }
+
+ return result;
+}
+
+}
diff --git a/include/util/base.h b/include/util/base.h
new file mode 100644
index 0000000..1729687
--- /dev/null
+++ b/include/util/base.h
@@ -0,0 +1,73 @@
+/*
+ * include/util/base.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+#include <cstddef>
+#include <memory>
+
+namespace de {
+
+// The correct quantity for use in alignment of buffers to be
+// compatible with O_DIRECT
+const size_t SECTOR_SIZE = 512;
+
+// The standard sized block of data (in bytes) for use in IO
+// operations.
+const size_t PAGE_SIZE = 4096;
+
+// The size of a cacheline, for alignment purposes.
+const size_t CACHELINE_SIZE = 64;
+
+// The largest representable PageNum. A given file cannot
+// have more pages than this.
+const size_t MAX_PAGE_COUNT = UINT32_MAX;
+
+// The largest representable FileId. The file manager cannot
+// manage more files than this.
+const size_t MAX_FILE_COUNT = UINT32_MAX;
+
+// The largest representable FrameId. No buffer can be defined with
+// more frames than this.
+const size_t MAX_FRAME_COUNT = UINT32_MAX;
+
+// The number of bytes of zeroes available in ZEROBUF. Will be
+// a multiple of the parm::PAGE_SIZE.
+constexpr size_t ZEROBUF_SIZE = 8 * PAGE_SIZE;
+
+// A large, preallocated, buffer of zeroes used for pre-allocation
+// of pages in a file.
+alignas(SECTOR_SIZE) const char ZEROBUF[ZEROBUF_SIZE] = {0};
+
+
+// alignment code taken from TacoDB (file: tdb_base.h)
+template<class T>
+constexpr T
+TYPEALIGN(uint64_t ALIGNVAL, T LEN) {
+ return (((uint64_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uint64_t) ((ALIGNVAL) - 1)));
+}
+
+#define SHORTALIGN(LEN) TYPEALIGN(2, (LEN))
+#define INTALIGN(LEN) TYPEALIGN(4, (LEN))
+#define LONGALIGN(LEN) TYPEALIGN(8, (LEN))
+#define DOUBLEALIGN(LEN) TYPEALIGN(8, (LEN))
+#define MAXALIGN(LEN) TYPEALIGN(8, (LEN))
+#define CACHELINEALIGN(LEN) TYPEALIGN(CACHELINE_SIZE, (LEN))
+#define MAXALIGN_OF 8
+
+// Returns a pointer to the idx'th page contained within a multi-page
+// buffer. buffer must be page aligned, and idx must be less than the
+// number of pages within the buffer, or the result is undefined.
+static inline char *get_page(char *buffer, size_t idx) {
+ return buffer + (idx * PAGE_SIZE);
+}
+
+}
diff --git a/include/util/bf_config.h b/include/util/bf_config.h
new file mode 100644
index 0000000..b4d68bc
--- /dev/null
+++ b/include/util/bf_config.h
@@ -0,0 +1,27 @@
+/*
+ * include/util/bf_config.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "util/base.h"
+
+namespace de {
+
+static double BF_FPR = .01;
+static size_t BF_HASH_FUNCS = 7;
+
+static void BF_SET_FPR(double fpr) {
+ BF_FPR = fpr;
+}
+
+static void BF_SET_HASHFUNC(size_t func_cnt) {
+ BF_HASH_FUNCS = func_cnt;
+}
+
+}
diff --git a/include/util/hash.h b/include/util/hash.h
new file mode 100644
index 0000000..04871dc
--- /dev/null
+++ b/include/util/hash.h
@@ -0,0 +1,61 @@
+/*
+ * include/util/hash.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace de {
+
+// 40343 is a "magic constant" that works well,
+// 38299 is another good value.
+// Both are primes and have a good distribution of bits.
+const uint64_t kHashMagicNum = 40343;
+
+inline uint64_t rotr64(uint64_t x, size_t n) {
+ return (((x) >> n) | ((x) << (64 - n)));
+}
+
+inline uint64_t hash(uint64_t input) {
+ uint64_t local_rand = input;
+ uint64_t local_rand_hash = 8;
+ local_rand_hash = 40343 * local_rand_hash + ((local_rand) & 0xFFFF);
+ local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 16) & 0xFFFF);
+ local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 32) & 0xFFFF);
+ local_rand_hash = 40343 * local_rand_hash + (local_rand >> 48);
+ local_rand_hash = 40343 * local_rand_hash;
+ return rotr64(local_rand_hash, 43);
+}
+
+inline uint64_t hash_bytes(const char* str, size_t len) {
+ uint64_t hashState = len;
+
+ for(size_t idx = 0; idx < len; ++idx) {
+ hashState = kHashMagicNum * hashState + str[idx];
+ }
+
+ // The final scrambling helps with short keys that vary only on the high order bits.
+ // Low order bits are not always well distributed so shift them to the high end, where they'll
+ // form part of the 14-bit tag.
+ return rotr64(kHashMagicNum * hashState, 6);
+}
+
+inline uint64_t hash_bytes_with_salt(const char* str, size_t len, uint16_t salt) {
+ uint64_t hashState = len;
+
+ for(size_t idx = 0; idx < len; ++idx) {
+ hashState = kHashMagicNum * hashState + str[idx];
+ }
+
+ hashState = kHashMagicNum * hashState + salt;
+
+ return rotr64(kHashMagicNum * hashState, 6);
+}
+
+}
diff --git a/include/util/internal_record.h b/include/util/internal_record.h
new file mode 100644
index 0000000..d898b8b
--- /dev/null
+++ b/include/util/internal_record.h
@@ -0,0 +1,96 @@
+/*
+ * include/util/internal_record.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+#pragma once
+
+#include "util/record.h"
+#include "util/types.h"
+
+/*
+ * Utility functions for use in handling internal nodes within an ISAM
+ * tree.
+ */
+
+namespace de {
+
+struct ISAMTreeInternalNodeHeader {
+ PageNum next_sibling; // INVALID_PNUM this is the last node on a level
+ PageNum prev_sibling; // INVALID_PNUM if this is the first node on a level
+ size_t leaf_rec_cnt; // number of records in leaf nodes under this node
+ size_t internal_rec_cnt; // number of internal records in this node
+};
+
+/*
+ * The total (aligned) size of an ISAMTreeInternalNodeHeader object.
+ */
+static constexpr PageOffset ISAMTreeInternalNodeHeaderSize = MAXALIGN(sizeof(ISAMTreeInternalNodeHeader));
+
+/*
+ * The number of bytes occupied by an ISAM tree internal record, including alignment bytes.
+ */
+static constexpr size_t internal_record_size = sizeof(key_t) + MAXALIGN(sizeof(PageNum));
+
+/*
+ * The maximum number of internal records that can be stored in an internal
+ * node, accounting for alignment and the node header.
+ */
+static constexpr size_t internal_records_per_page = (PAGE_SIZE - ISAMTreeInternalNodeHeaderSize) / internal_record_size;
+
+/*
+ * Format an internal record, starting in the first byte of buffer, with the
+ * specified key and target page number. If either buffer or key refer to
+ * memory regions of insufficient size, the results are undefined.
+ */
+static inline void build_internal_record(char *buffer, const char *key, PageNum target_page) {
+ memcpy(buffer, key, sizeof(key_t));
+ memcpy(buffer + sizeof(key_t), &target_page, sizeof(PageNum));
+}
+
+/*
+ * Return the idx'th internal record within an internal node.
+ * internal_page_buffer must refer to the first byte of a page containing a
+ * full internal node, including the header. The returned value is undefined if
+ * idx is greater than or equal to the number of internal records within the
+ * node.
+ */
+static inline char *get_internal_record(char *internal_page_buffer, size_t idx) {
+ return internal_page_buffer + ISAMTreeInternalNodeHeaderSize + internal_record_size * idx;
+}
+
+/*
+ * Return a pointer to the key contained within an internal record, pointed
+ * to by buffer. Buffer must point to the beginning of a valid internal record,
+ * or the result of this function is undefined.
+ */
+static inline const char *get_internal_key(const char *buffer) {
+ return buffer;
+}
+
+/*
+ * Return the Page Number (value) of an internal record, pointed to by buffer.
+ * Buffer must point to the beginning of a valid internal record, or the result
+ * of this function is undefined.
+ */
+static inline PageNum get_internal_value(const char *buffer) {
+ return *((PageNum *) (buffer + sizeof(key_t)));
+}
+
+/*
+ * Return a pointer to the header of an ISAM Tree internal node, referred to by
+ * buffer. If buffer contains multiple nodes, a specific node can be requested
+ * using the idx parameter, otherwise the header of the first node will be returned.
+ * Buffer must point to the beginning of a valid ISAM Tree internal node, and idx
+ * must be less than the number of internal nodes within the buffer, or the
+ * returned value is undefined.
+ */
+static inline ISAMTreeInternalNodeHeader *get_header(char *buffer, size_t idx=0) {
+ return (ISAMTreeInternalNodeHeader *) get_page(buffer, idx);
+}
+
+}
diff --git a/include/util/record.h b/include/util/record.h
new file mode 100644
index 0000000..0ba0f97
--- /dev/null
+++ b/include/util/record.h
@@ -0,0 +1,65 @@
+/*
+ * include/util/record.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ * Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstring>
+
+#include "util/base.h"
+
+namespace de {
+
+typedef uint32_t hdr_t;
+typedef uint64_t key_t;
+typedef uint32_t value_t;
+typedef uint64_t weight_t;
+
+struct record_t {
+ key_t key;
+ value_t value;
+ hdr_t header;
+ weight_t weight;
+
+ inline bool match(key_t k, value_t v, bool is_tombstone) const {
+ return (key == k) && (value == v) && ((header & 1) == is_tombstone);
+ }
+
+ inline void set_delete_status() {
+ header |= 2;
+ }
+
+ inline bool get_delete_status() const {
+ return header & 2;
+ }
+
+ inline bool is_tombstone() const {
+ return header & 1;
+ }
+
+ inline int match(const record_t* other) const {
+ return key == other->key && value == other->value;
+ }
+
+ inline bool operator<(const record_t& other) const {
+ return key < other.key || (key == other.key && value < other.value);
+ }
+
+ inline bool lt(const key_t& k, const value_t& v) const {
+ return key < k || (key == k && value < v);
+ }
+};
+
+static_assert(sizeof(record_t) == 24, "Record is not 24 bytes long.");
+
+static bool memtable_record_cmp(const record_t& a, const record_t& b) {
+ return (a.key < b.key) || (a.key == b.key && a.value < b.value)
+ || (a.key == b.key && a.value == b.value && a.header < b.header);
+}
+
+}
diff --git a/include/util/timer.h b/include/util/timer.h
new file mode 100644
index 0000000..a9784a8
--- /dev/null
+++ b/include/util/timer.h
@@ -0,0 +1,37 @@
+/*
+ * include/util/timer.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <chrono>
+
+#ifdef ENABLE_TIMER
+#define TIMER_INIT() \
+ auto timer_start = std::chrono::high_resolution_clock::now(); \
+ auto timer_stop = std::chrono::high_resolution_clock::now();
+
+#define TIMER_START() \
+ timer_start = std::chrono::high_resolution_clock::now()
+
+#define TIMER_STOP() \
+ timer_stop = std::chrono::high_resolution_clock::now()
+
+#define TIMER_RESULT() \
+ std::chrono::duration_cast<std::chrono::nanoseconds>(timer_stop - timer_start).count()
+
+#else
+ #define TIMER_INIT() \
+ do {} while(0)
+ #define TIMER_START() \
+ do {} while(0)
+ #define TIMER_STOP() \
+ do {} while(0)
+ #define TIMER_RESULT() \
+ 0l
+#endif
+
diff --git a/include/util/types.h b/include/util/types.h
new file mode 100644
index 0000000..63da3fa
--- /dev/null
+++ b/include/util/types.h
@@ -0,0 +1,71 @@
+/*
+ * include/util/types.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ * A centralized header file for various datatypes used throughout the
+ * code base. There are a few very specific types, such as header formats,
+ * that are defined within the header files that make direct use of them,
+ * but all generally usable, simple types are defined here.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+#include <cstddef>
+#include <string>
+
+#include "util/base.h"
+
+namespace de {
+
+using std::byte;
+
+// Represents a page offset within a specific file (physical or virtual)
+typedef uint32_t PageNum;
+
+// Byte offset within a page. Also used for lengths of records, etc.,
+// within the codebase. size_t isn't necessary, as the maximum offset
+// is only parm::PAGE_SIZE
+typedef uint16_t PageOffset;
+
+// A unique identifier for a frame within a buffer or cache.
+typedef int32_t FrameId;
+
+// A unique timestamp for use in MVCC concurrency control. Currently stored in
+// record headers, but not used by anything.
+typedef uint32_t Timestamp;
+const Timestamp TIMESTAMP_MIN = 0;
+const Timestamp TIMESTAMP_MAX = UINT32_MAX;
+
+// Invalid values for various IDs. Used throughout the code base to indicate
+// uninitialized values and error conditions.
+const PageNum INVALID_PNUM = 0;
+const FrameId INVALID_Fshid = -1;
+
+// An ID for a given shard within the index. The level_idx is the index
+// in the memory_levels and disk_levels vectors corresponding to the
+// shard, and the shard_idx is the index with the level (always 0 in the
+// case of leveling). Note that the two vectors of levels are treated
+// as a contiguous index space.
+struct ShardID {
+ ssize_t level_idx;
+ ssize_t shard_idx;
+
+ friend bool operator==(const ShardID &shid1, const ShardID &shid2) {
+ return shid1.level_idx == shid2.level_idx && shid1.shard_idx == shid2.shard_idx;
+ }
+};
+
+const ShardID INVALID_SHID = {-1, -1};
+
+struct SampleRange {
+ ShardID run_id;
+ size_t low;
+ size_t high;
+};
+
+}