Began porting source files over from other repository

author: Douglas Rumbaugh <dbr4@psu.edu> 2023-05-08 12:59:46 -0400
committer: Douglas Rumbaugh <dbr4@psu.edu> 2023-05-08 12:59:46 -0400
commit: fdf5ef27fec1368a33801a98bbc5ed3556476979 (patch)
tree: 7ecd9477fed6495d05a44343e19bfb6480785c7a /include
parent: 2380dd4d73cd6a179567e06e1aa4871ad44ccce3 (diff)
download: dynamic-extension-fdf5ef27fec1368a33801a98bbc5ed3556476979.tar.gz
12 files changed, 867 insertions, 0 deletions
diff --git a/include/ds/Alias.h b/include/ds/Alias.h
new file mode 100644
index 0000000..855dc75
--- /dev/null
+++ b/include/ds/Alias.h
@@ -0,0 +1,72 @@
+/*
+ * include/ds/Alias.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <gsl/gsl_rng.h>
+#include <vector>
+
+namespace de {
+
+/*
+ * An implementation of Walker's Alias Structure for Weighted Set Sampling. Requires
+ * that the input weight vector is already normalized.
+ */
+class Alias {
+public:
+    Alias(const std::vector<double>& weights)
+    : m_alias(weights.size()), m_cutoff(weights.size()) {
+        size_t n = weights.size();
+        auto overfull = std::vector<size_t>();
+        auto underfull = std::vector<size_t>();
+        overfull.reserve(n);
+        underfull.reserve(n);
+
+        // initialize the probability_table with n*p(i) as well as the overfull and
+        // underfull lists.
+        for (size_t i = 0; i < n; i++) {
+            m_cutoff[i] = (double) n * weights[i];
+            if (m_cutoff[i] > 1) {
+                overfull.emplace_back(i);
+            } else if (m_cutoff[i] < 1) {
+                underfull.emplace_back(i);
+            } else {
+                m_alias[i] = i;
+            }
+        }
+
+        while (overfull.size() > 0 && underfull.size() > 0) {
+            auto i = overfull.back(); overfull.pop_back();
+            auto j = underfull.back(); underfull.pop_back();
+
+            m_alias[j] = i;
+            m_cutoff[i] = m_cutoff[i] + m_cutoff[j] - 1.0;
+
+            if (m_cutoff[i] > 1.0) {
+                overfull.emplace_back(i);
+            } else if (m_cutoff[i] < 1.0) {
+                underfull.emplace_back(i);
+            }
+        }
+    }
+
+    size_t get(const gsl_rng* rng) {
+        double coin1 = gsl_rng_uniform(rng);
+        double coin2 = gsl_rng_uniform(rng);
+
+        size_t k = ((double) m_alias.size()) * coin1;
+        return coin2 < m_cutoff[k] ? k : m_alias[k];
+    }
+
+private:
+    std::vector<size_t> m_alias;
+    std::vector<double> m_cutoff;
+};
+
+}
diff --git a/include/ds/BitArray.h b/include/ds/BitArray.h
new file mode 100644
index 0000000..1d33d5b
--- /dev/null
+++ b/include/ds/BitArray.h
@@ -0,0 +1,67 @@
+/*
+ * include/ds/BloomFilter.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <memory>
+#include <cstring>
+
+#include "util/base.h"
+
+namespace lsm {
+
+class BitArray {
+public:
+    BitArray(size_t bits): m_bits(bits), m_data(nullptr) {
+        if (m_bits > 0) {
+            size_t n_bytes = (m_bits >> 3) << 3;
+            m_data = (char*) std::aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(n_bytes));
+            memset(m_data, 0, n_bytes);
+        }
+    }
+
+    ~BitArray() {
+        if (m_data) free(m_data);
+    }
+
+    bool is_set(size_t bit) {
+        if (bit >= m_bits) return false;
+        return m_data[bit >> 3] & (1 << (bit & 7));
+    }
+
+    int set(size_t bit) {
+        if (bit >= m_bits) return 0;
+        m_data[bit >> 3] |= ((char) 1 << (bit & 7));
+        return 1;
+    }
+
+    int unset(size_t bit) {
+        if (bit >= m_bits) return 0;
+        m_data[bit >> 3] &= ~((char) 1 << (bit & 7));
+        return 1;
+    }
+
+    void clear() {
+        memset(m_data, 0, (m_bits >> 3) << 3);
+    }
+
+    size_t mem_size() {
+        return m_bits >> 3;
+    }
+
+    size_t size() {
+        return m_bits;
+    }
+    
+private:
+    size_t m_bits;
+    char* m_data;
+};
+
+}
diff --git a/include/ds/BloomFilter.h b/include/ds/BloomFilter.h
new file mode 100644
index 0000000..dbaab2f
--- /dev/null
+++ b/include/ds/BloomFilter.h
@@ -0,0 +1,74 @@
+/*
+ * include/ds/BloomFilter.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cmath>
+#include <gsl/gsl_rng.h>
+
+#include "ds/BitArray.h"
+#include "util/hash.h"
+#include "util/base.h"
+#include "util/record.h"
+
+namespace lsm {
+
+class BloomFilter {
+public:
+    BloomFilter(size_t n_bits, size_t k, const gsl_rng* rng)
+    : m_n_bits(n_bits), m_n_salts(k), m_bitarray(n_bits) {
+        salt = (uint16_t*) aligned_alloc(CACHELINE_SIZE, CACHELINEALIGN(k * sizeof(uint16_t)));
+        for (size_t i = 0;  i < k; ++i) {
+            salt[i] = (uint16_t) gsl_rng_uniform_int(rng, 1 << 16);
+        }
+        
+    }
+
+    BloomFilter(double max_fpr, size_t n, size_t k, const gsl_rng* rng)
+    : BloomFilter((size_t)(-(double) (k * n) / std::log(1.0 - std::pow(max_fpr, 1.0 / k))), k, rng) {}
+
+    ~BloomFilter() {
+        if (salt) free(salt);
+    }
+
+    int insert(const key_t& key, size_t sz = sizeof(key_t)) {
+        if (m_bitarray.size() == 0) return 0;
+
+        for (size_t i = 0; i < m_n_salts; ++i) {
+            m_bitarray.set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits);
+        }
+
+        return 1;
+    }
+
+    bool lookup(const key_t& key, size_t sz = sizeof(key_t)) {
+        if (m_bitarray.size() == 0) return false;
+        for (size_t i = 0; i < m_n_salts; ++i) {
+            if (!m_bitarray.is_set(hash_bytes_with_salt((const char*)&key, sz, salt[i]) % m_n_bits))
+                return false;
+        }
+
+        return true;
+    }
+
+    void clear() {
+        m_bitarray.clear();
+    }
+
+    size_t get_memory_utilization() {
+        return this->m_bitarray.mem_size();
+    }
+private: 
+    size_t m_n_salts;
+    size_t m_n_bits;
+    uint16_t* salt;
+
+    BitArray m_bitarray;
+};
+
+}
diff --git a/include/ds/PriorityQueue.h b/include/ds/PriorityQueue.h
new file mode 100644
index 0000000..49f3e41
--- /dev/null
+++ b/include/ds/PriorityQueue.h
@@ -0,0 +1,135 @@
+/*
+ * include/ds/PriorityQueue.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <vector>
+#include <cassert>
+
+#include "util/record.h"
+
+namespace lsm {
+
+struct queue_record {
+    const char *data;
+    size_t version;
+};
+
+
+class PriorityQueue {
+public:
+    PriorityQueue(size_t size) : data(size), tail(0) {}
+    ~PriorityQueue() = default;
+
+    size_t size() const {
+        return tail;
+    }
+
+    void pop() {
+        assert(this->tail != 0);
+
+        // If there is only one element, just decrement the
+        // tail.
+        if (this->size() == 1) {
+            this->tail--;
+            return;
+        }
+
+        swap(0, --this->tail);
+
+        ssize_t idx = 0;
+        ssize_t child_idx;
+
+        while ((child_idx = min_child(idx)) != -1 && heap_cmp(child_idx, idx)) {
+            swap(idx, child_idx);
+            idx = child_idx;
+        }
+    }
+
+    void push(const char* record, size_t version=0) {
+        assert(tail != this->data.size());
+
+        size_t new_idx = this->tail++;
+        this->data[new_idx] = {record, version};
+
+        while (new_idx != 0 && !heap_cmp(parent(new_idx), new_idx)) {
+            swap(parent(new_idx), new_idx);
+            new_idx = parent(new_idx);
+        }
+    }
+
+
+    queue_record peek(size_t depth=0) {
+        ssize_t idx = 0;
+        size_t cur_depth = 0;
+
+        while (cur_depth != depth) {
+            idx = min_child(idx);
+            assert(idx != -1);
+            cur_depth++;
+        }
+
+        return this->data[idx];
+    }
+
+private:
+    std::vector<queue_record> data;
+    size_t tail;
+
+    /*
+     * Swap the elements at position a and position
+     * b within the heap
+     */
+    inline void swap(size_t a, size_t b) {
+        if (a == b) return;
+
+        queue_record temp = this->data[a];
+        this->data[a] = this->data[b];
+        this->data[b] = temp;
+    }
+
+    inline size_t left_child(size_t idx) {
+        return 2 * idx + 1;
+    }
+
+    inline size_t right_child(size_t idx) {
+        return 2 * idx + 2;
+    }
+
+    inline size_t parent(size_t idx) {
+        return (idx - 1) / 2;
+    }
+
+    inline ssize_t min_child(size_t idx) {
+        ssize_t left = left_child(idx) < tail ? left_child(idx) : -1;
+        ssize_t right = right_child(idx) < tail ? right_child(idx) : -1;
+
+        if (left == -1 && right == -1) {
+            return -1;
+        } else if (left == -1) {
+            return right;
+        } else  if (right == -1) {
+            return left;
+        }
+
+        return (heap_cmp(left, right)) ? left : right;
+    }
+
+    inline bool heap_cmp(size_t a, size_t b) {
+        auto cmp = record_cmp(this->data[a].data, this->data[b].data);
+        if (cmp == 0) {
+            if (this->data[a].version != this->data[b].version)
+                return this->data[a].version < this->data[b].version;
+            else return is_tombstone(this->data[a].data) && is_tombstone(this->data[b].data);
+        }
+        return cmp == -1;
+    }
+
+};
+}
diff --git a/include/util/Cursor.h b/include/util/Cursor.h
new file mode 100644
index 0000000..3f5f20f
--- /dev/null
+++ b/include/util/Cursor.h
@@ -0,0 +1,89 @@
+/*
+ * include/util/Cursor.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "util/base.h"
+#include "util/record.h"
+#include "io/PagedFile.h"
+
+namespace de {
+struct Cursor {
+    record_t *ptr;
+    const record_t *end;
+    size_t cur_rec_idx;
+    size_t rec_cnt;
+
+    friend bool operator==(const Cursor &a, const Cursor &b) {
+        return a.ptr == b.ptr && a.end == b.end;
+    }
+};
+
+static Cursor g_empty_cursor = {0};
+
+/*
+ * Advance the cursor to the next record. If the cursor is backed by an
+ * iterator, will attempt to advance the iterator once the cursor reaches its
+ * end and reset the cursor to the beginning of the read page.
+ *
+ * If the advance succeeds, ptr will be updated to point to the new record
+ * and true will be returned. If the advance reaches the end, then ptr will
+ * be updated to be equal to end, and false will be returned. Iterators will
+ * not be closed.
+ */
+inline static bool advance_cursor(Cursor *cur, PagedFileIterator *iter = nullptr) {
+    cur->ptr++;
+    cur->cur_rec_idx++;
+
+    if (cur->cur_rec_idx >= cur->rec_cnt) return false;
+
+    if (cur->ptr >= cur->end) {
+        if (iter && iter->next()) {
+            cur->ptr = (record_t*)iter->get_item();
+            cur->end = cur->ptr + (PAGE_SIZE / sizeof(record_t));
+            return true;
+        }
+
+        return false;
+    }
+    return true;
+}
+
+/*
+ *   Process the list of cursors to return the cursor containing the next
+ *   largest element. Does not advance any of the cursors. If current is
+ *   specified, then skip the current head of that cursor during checking. 
+ *   This allows for "peaking" at the next largest element after the current 
+ *   largest is processed.
+ */
+inline static Cursor *get_next(std::vector<Cursor> &cursors, Cursor *current=&g_empty_cursor) {
+    const record_t *min_rec = nullptr;
+    Cursor *result = &g_empty_cursor;
+    for (size_t i=0; i< cursors.size(); i++) {
+        if (cursors[i] == g_empty_cursor) continue;
+
+        const record_t *rec = (&cursors[i] == current) ? cursors[i].ptr + 1 : cursors[i].ptr;
+        if (rec >= cursors[i].end) continue;
+
+        if (min_rec == nullptr) {
+            result = &cursors[i];
+            min_rec = rec;
+            continue;
+        }
+
+        if (*rec < *min_rec) {
+            result = &cursors[i];
+            min_rec = rec;
+        }
+    }
+
+    return result;
+} 
+
+}
diff --git a/include/util/base.h b/include/util/base.h
new file mode 100644
index 0000000..1729687
--- /dev/null
+++ b/include/util/base.h
@@ -0,0 +1,73 @@
+/*
+ * include/util/base.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+#include <cstddef>
+#include <memory>
+
+namespace de {
+
+// The correct quantity for use in alignment of buffers to be
+// compatible with O_DIRECT
+const size_t SECTOR_SIZE = 512;
+
+// The standard sized block of data (in bytes) for use in IO 
+// operations.
+const size_t PAGE_SIZE = 4096;
+
+// The size of a cacheline, for alignment purposes.
+const size_t CACHELINE_SIZE = 64;
+
+// The largest representable PageNum. A given file cannot
+// have more pages than this.
+const size_t MAX_PAGE_COUNT = UINT32_MAX;
+
+// The largest representable FileId. The file manager cannot
+// manage more files than this.
+const size_t MAX_FILE_COUNT = UINT32_MAX;
+
+// The largest representable FrameId. No buffer can be defined with
+// more frames than this.
+const size_t MAX_FRAME_COUNT = UINT32_MAX;
+
+// The number of bytes of zeroes available in ZEROBUF. Will be
+// a multiple of the parm::PAGE_SIZE.
+constexpr size_t ZEROBUF_SIZE = 8 * PAGE_SIZE;
+
+// A large, preallocated, buffer of zeroes used for pre-allocation
+// of pages in a file.
+alignas(SECTOR_SIZE) const char ZEROBUF[ZEROBUF_SIZE] = {0};
+
+
+// alignment code taken from TacoDB (file: tdb_base.h)
+template<class T>
+constexpr T
+TYPEALIGN(uint64_t ALIGNVAL, T LEN) {
+    return (((uint64_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uint64_t) ((ALIGNVAL) - 1)));
+}
+
+#define SHORTALIGN(LEN)         TYPEALIGN(2, (LEN))
+#define INTALIGN(LEN)           TYPEALIGN(4, (LEN))
+#define LONGALIGN(LEN)          TYPEALIGN(8, (LEN))
+#define DOUBLEALIGN(LEN)        TYPEALIGN(8, (LEN))
+#define MAXALIGN(LEN)           TYPEALIGN(8, (LEN))
+#define CACHELINEALIGN(LEN)     TYPEALIGN(CACHELINE_SIZE, (LEN))
+#define MAXALIGN_OF             8
+
+// Returns a pointer to the idx'th page contained within a multi-page
+// buffer. buffer must be page aligned, and idx must be less than the
+// number of pages within the buffer, or the result is undefined.
+static inline char *get_page(char *buffer, size_t idx) {
+    return buffer + (idx * PAGE_SIZE);
+}
+
+}
diff --git a/include/util/bf_config.h b/include/util/bf_config.h
new file mode 100644
index 0000000..b4d68bc
--- /dev/null
+++ b/include/util/bf_config.h
@@ -0,0 +1,27 @@
+/*
+ * include/util/bf_config.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu> 
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include "util/base.h"
+
+namespace de {
+
+static double BF_FPR = .01;
+static size_t BF_HASH_FUNCS = 7;
+
+static void BF_SET_FPR(double fpr) {
+    BF_FPR = fpr;
+}
+
+static void BF_SET_HASHFUNC(size_t func_cnt) {
+    BF_HASH_FUNCS = func_cnt;
+}
+
+}
diff --git a/include/util/hash.h b/include/util/hash.h
new file mode 100644
index 0000000..04871dc
--- /dev/null
+++ b/include/util/hash.h
@@ -0,0 +1,61 @@
+/*
+ * include/util/hash.h
+ *
+ * Copyright (C) 2023 Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+
+namespace de {
+
+// 40343 is a "magic constant" that works well,
+// 38299 is another good value.
+// Both are primes and have a good distribution of bits.
+const uint64_t kHashMagicNum = 40343;
+
+inline uint64_t rotr64(uint64_t x, size_t n) {
+    return (((x) >> n) | ((x) << (64 - n)));
+}
+
+inline uint64_t hash(uint64_t input) {
+    uint64_t local_rand = input;
+    uint64_t local_rand_hash = 8;
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 16) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + ((local_rand >> 32) & 0xFFFF);
+    local_rand_hash = 40343 * local_rand_hash + (local_rand >> 48);
+    local_rand_hash = 40343 * local_rand_hash;
+    return rotr64(local_rand_hash, 43);
+}
+
+inline uint64_t hash_bytes(const char* str, size_t len) {
+    uint64_t hashState = len;
+
+    for(size_t idx = 0; idx < len; ++idx) {
+      hashState = kHashMagicNum * hashState + str[idx];
+    }
+
+    // The final scrambling helps with short keys that vary only on the high order bits.
+    // Low order bits are not always well distributed so shift them to the high end, where they'll
+    // form part of the 14-bit tag.
+    return rotr64(kHashMagicNum * hashState, 6);
+}
+
+inline uint64_t hash_bytes_with_salt(const char* str, size_t len, uint16_t salt) {
+    uint64_t hashState = len;
+
+    for(size_t idx = 0; idx < len; ++idx) {
+      hashState = kHashMagicNum * hashState + str[idx];
+    }
+
+    hashState = kHashMagicNum * hashState + salt;
+
+    return rotr64(kHashMagicNum * hashState, 6);
+}
+
+}
diff --git a/include/util/internal_record.h b/include/util/internal_record.h
new file mode 100644
index 0000000..d898b8b
--- /dev/null
+++ b/include/util/internal_record.h
@@ -0,0 +1,96 @@
+/*
+ * include/util/internal_record.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+#pragma once
+
+#include "util/record.h"
+#include "util/types.h"
+
+/*
+ * Utility functions for use in handling internal nodes within an ISAM
+ * tree.
+ */
+
+namespace de {
+
+struct ISAMTreeInternalNodeHeader {
+    PageNum next_sibling; // INVALID_PNUM this is the last node on a level
+    PageNum prev_sibling; // INVALID_PNUM if this is the first node on a level
+    size_t leaf_rec_cnt; // number of records in leaf nodes under this node
+    size_t internal_rec_cnt; // number of internal records in this node
+};
+
+/*
+ * The total (aligned) size of an ISAMTreeInternalNodeHeader object. 
+ */
+static constexpr PageOffset ISAMTreeInternalNodeHeaderSize = MAXALIGN(sizeof(ISAMTreeInternalNodeHeader));
+
+/*
+ * The number of bytes occupied by an ISAM tree internal record, including alignment bytes.
+ */
+static constexpr size_t internal_record_size = sizeof(key_t) + MAXALIGN(sizeof(PageNum));
+
+/*
+ * The maximum number of internal records that can be stored in an internal
+ * node, accounting for alignment and the node header.
+ */
+static constexpr size_t internal_records_per_page = (PAGE_SIZE - ISAMTreeInternalNodeHeaderSize) / internal_record_size;
+
+/*
+ * Format an internal record, starting in the first byte of buffer, with the
+ * specified key and target page number. If either buffer or key refer to
+ * memory regions of insufficient size, the results are undefined.
+ */
+static inline void build_internal_record(char *buffer, const char *key, PageNum target_page) {
+    memcpy(buffer, key, sizeof(key_t));
+    memcpy(buffer + sizeof(key_t), &target_page, sizeof(PageNum));
+}
+
+/*
+ * Return the idx'th internal record within an internal node.
+ * internal_page_buffer must refer to the first byte of a page containing a
+ * full internal node, including the header. The returned value is undefined if
+ * idx is greater than or equal to the number of internal records within the
+ * node.
+ */
+static inline char *get_internal_record(char *internal_page_buffer, size_t idx) {
+    return internal_page_buffer + ISAMTreeInternalNodeHeaderSize + internal_record_size * idx;
+}
+
+/*
+ * Return a pointer to the key contained within an internal record, pointed
+ * to by buffer. Buffer must point to the beginning of a valid internal record,
+ * or the result of this function is undefined.
+ */
+static inline const char *get_internal_key(const char *buffer) {
+    return buffer;
+}
+
+/*
+ * Return the Page Number (value) of an internal record, pointed to by buffer.
+ * Buffer must point to the beginning of a valid internal record, or the result
+ * of this function is undefined.
+ */
+static inline PageNum get_internal_value(const char *buffer) {
+    return *((PageNum *) (buffer + sizeof(key_t)));
+}
+
+/*
+ * Return a pointer to the header of an ISAM Tree internal node, referred to by
+ * buffer. If buffer contains multiple nodes, a specific node can be requested 
+ * using the idx parameter, otherwise the header of the first node will be returned.
+ * Buffer must point to the beginning of a valid ISAM Tree internal node, and idx
+ * must be less than the number of internal nodes within the buffer, or the 
+ * returned value is undefined.
+ */
+static inline ISAMTreeInternalNodeHeader *get_header(char *buffer, size_t idx=0) {
+    return (ISAMTreeInternalNodeHeader *) get_page(buffer, idx);
+}
+
+}
diff --git a/include/util/record.h b/include/util/record.h
new file mode 100644
index 0000000..0ba0f97
--- /dev/null
+++ b/include/util/record.h
@@ -0,0 +1,65 @@
+/*
+ * include/util/record.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *                    Dong Xie <dongx@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <cstring>
+
+#include "util/base.h"
+
+namespace de {
+
+typedef uint32_t hdr_t;
+typedef uint64_t key_t;
+typedef uint32_t value_t;
+typedef uint64_t weight_t;
+
+struct record_t {
+    key_t key;
+    value_t value;
+    hdr_t header;
+    weight_t weight;
+
+    inline bool match(key_t k, value_t v, bool is_tombstone) const {
+        return (key == k) && (value == v) && ((header & 1) == is_tombstone);
+    }
+
+    inline void set_delete_status() {
+        header |= 2;
+    }
+
+    inline bool get_delete_status() const {
+        return header & 2;
+    }
+
+    inline bool is_tombstone() const {
+        return header & 1;
+    }
+
+    inline int match(const record_t* other) const {
+        return key == other->key && value == other->value;
+    }
+
+    inline bool operator<(const record_t& other) const {
+        return key < other.key || (key == other.key && value < other.value);
+    }
+
+    inline bool lt(const key_t& k, const value_t& v) const {
+        return key < k || (key == k && value < v);
+    }
+};
+
+static_assert(sizeof(record_t) == 24, "Record is not 24 bytes long.");
+
+static bool memtable_record_cmp(const record_t& a, const record_t& b) {
+    return (a.key < b.key) || (a.key == b.key && a.value < b.value)
+        || (a.key == b.key && a.value == b.value && a.header < b.header);
+}
+
+}
diff --git a/include/util/timer.h b/include/util/timer.h
new file mode 100644
index 0000000..a9784a8
--- /dev/null
+++ b/include/util/timer.h
@@ -0,0 +1,37 @@
+/*
+ * include/util/timer.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ */
+#pragma once
+
+#include <chrono>
+
+#ifdef ENABLE_TIMER
+#define TIMER_INIT() \
+    auto timer_start = std::chrono::high_resolution_clock::now(); \
+    auto timer_stop = std::chrono::high_resolution_clock::now();
+
+#define TIMER_START() \
+    timer_start = std::chrono::high_resolution_clock::now()
+
+#define TIMER_STOP() \
+    timer_stop = std::chrono::high_resolution_clock::now()
+
+#define TIMER_RESULT() \
+    std::chrono::duration_cast<std::chrono::nanoseconds>(timer_stop - timer_start).count()
+
+#else
+    #define TIMER_INIT() \
+        do {} while(0)
+    #define TIMER_START() \
+        do {} while(0)
+    #define TIMER_STOP() \
+        do {} while(0)
+    #define TIMER_RESULT() \
+        0l
+#endif
+
diff --git a/include/util/types.h b/include/util/types.h
new file mode 100644
index 0000000..63da3fa
--- /dev/null
+++ b/include/util/types.h
@@ -0,0 +1,71 @@
+/*
+ * include/util/types.h
+ *
+ * Copyright (C) 2023 Douglas Rumbaugh <drumbaugh@psu.edu>
+ *
+ * All rights reserved. Published under the Modified BSD License.
+ *
+ * A centralized header file for various datatypes used throughout the
+ * code base. There are a few very specific types, such as header formats,
+ * that are defined within the header files that make direct use of them,
+ * but all generally usable, simple types are defined here.
+ *
+ */
+#pragma once
+
+#include <cstdlib>
+#include <cstdint>
+#include <cstddef>
+#include <string>
+
+#include "util/base.h"
+
+namespace de {
+
+using std::byte;
+
+// Represents a page offset within a specific file (physical or virtual)
+typedef uint32_t PageNum;
+
+// Byte offset within a page. Also used for lengths of records, etc.,
+// within the codebase. size_t isn't necessary, as the maximum offset
+// is only parm::PAGE_SIZE 
+typedef uint16_t PageOffset;
+
+// A unique identifier for a frame within a buffer or cache.
+typedef int32_t FrameId;
+
+// A unique timestamp for use in MVCC concurrency control. Currently stored in
+// record headers, but not used by anything.
+typedef uint32_t Timestamp;
+const Timestamp TIMESTAMP_MIN = 0;
+const Timestamp TIMESTAMP_MAX = UINT32_MAX;
+
+// Invalid values for various IDs. Used throughout the code base to indicate
+// uninitialized values and error conditions.
+const PageNum INVALID_PNUM = 0;
+const FrameId INVALID_Fshid = -1;
+
+// An ID for a given shard within the index. The level_idx is the index
+// in the memory_levels and disk_levels vectors corresponding to the
+// shard, and the shard_idx is the index with the level (always 0 in the
+// case of leveling). Note that the two vectors of levels are treated
+// as a contiguous index space.
+struct ShardID {
+    ssize_t level_idx;
+    ssize_t shard_idx;
+
+    friend bool operator==(const ShardID &shid1, const ShardID &shid2) {
+        return shid1.level_idx == shid2.level_idx && shid1.shard_idx == shid2.shard_idx;
+    }
+};
+
+const ShardID INVALID_SHID = {-1, -1};
+
+struct SampleRange {
+    ShardID run_id;
+    size_t low;
+    size_t high;
+};
+
+}
author	Douglas Rumbaugh <dbr4@psu.edu>	2023-05-08 12:59:46 -0400
committer	Douglas Rumbaugh <dbr4@psu.edu>	2023-05-08 12:59:46 -0400
commit	fdf5ef27fec1368a33801a98bbc5ed3556476979 (patch)
tree	7ecd9477fed6495d05a44343e19bfb6480785c7a /include
parent	2380dd4d73cd6a179567e06e1aa4871ad44ccce3 (diff)
download	dynamic-extension-fdf5ef27fec1368a33801a98bbc5ed3556476979.tar.gz