% Encoding: UTF-8 @article{walker74, author = {A.J. Walker}, title = {New fast method for generating discrete random numbers with arbitrary frequency distributions}, journal = {Electronics Letters}, year = {1974}, volume = {10}, pages = {127-128(1)}, issue = {8} } @article{oneil96, author = {Patrick E. O'Neil and Edward Cheng and Dieter Gawlick and Elizabeth J. O'Neil}, title = {The Log-Structured Merge-Tree (LSM-Tree)}, journal = {Acta Informatica}, volume = {33}, number = {4}, pages = {351--385}, year = {1996}, url = {https://doi.org/10.1007/s002360050048}, doi = {10.1007/s002360050048}, timestamp = {Sun, 21 Jun 2020 17:38:20 +0200}, biburl = {https://dblp.org/rec/journals/acta/ONeilCGO96.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{xie21, author = {Dong Xie and Jeff M. Phillips and Michael Matheny and Feifei Li}, editor = {Guoliang Li and Zhanhuai Li and Stratos Idreos and Divesh Srivastava}, title = {Spatial Independent Range Sampling}, booktitle = {{SIGMOD} '21: International Conference on Management of Data, Virtual Event, China, June 20-25, 2021}, pages = {2023--2035}, publisher = {{ACM}}, year = {2021}, url = {https://doi.org/10.1145/3448016.3452806}, doi = {10.1145/3448016.3452806}, timestamp = {Mon, 21 Jun 2021 11:48:44 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/0001PM021.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{bloom70, author = {Burton H. Bloom}, title = {Space/Time Trade-offs in Hash Coding with Allowable Errors}, journal = {Commun. {ACM}}, volume = {13}, number = {7}, pages = {422--426}, year = {1970}, url = {https://doi.org/10.1145/362686.362692}, doi = {10.1145/362686.362692}, timestamp = {Wed, 14 Nov 2018 10:22:32 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Bloom70.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{dayan17, author = {Niv Dayan and Manos Athanassoulis and Stratos Idreos}, editor = {Semih Salihoglu and Wenchao Zhou and Rada Chirkova and Jun Yang and Dan Suciu}, title = {Monkey: Optimal Navigable Key-Value Store}, booktitle = {Proceedings of the 2017 {ACM} International Conference on Management of Data, {SIGMOD} Conference 2017, Chicago, IL, USA, May 14-19, 2017}, pages = {79--94}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3035918.3064054}, doi = {10.1145/3035918.3064054}, timestamp = {Thu, 14 Oct 2021 10:11:38 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/DayanAI17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{dayan18, author = {Niv Dayan and Stratos Idreos}, editor = {Gautam Das and Christopher M. Jermaine and Philip A. Bernstein}, title = {Dostoevsky: Better Space-Time Trade-Offs for LSM-Tree Based Key-Value Stores via Adaptive Removal of Superfluous Merging}, booktitle = {Proceedings of the 2018 International Conference on Management of Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, pages = {505--520}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3183713.3196927}, doi = {10.1145/3183713.3196927}, timestamp = {Wed, 21 Nov 2018 12:44:08 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/DayanI18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{dayan22, author = {Niv Dayan and Tamar Weiss and Shmuel Dashevsky and Michael Pan and Edward Bortnikov and Moshe Twitto}, title = {Spooky: Granulating LSM-Tree Compactions Correctly}, journal = {Proc. {VLDB} Endow.}, volume = {15}, number = {11}, pages = {3071--3084}, year = {2022}, url = {https://www.vldb.org/pvldb/vol15/p3071-dayan.pdf}, timestamp = {Mon, 26 Sep 2022 17:09:16 +0200}, biburl = {https://dblp.org/rec/journals/pvldb/DayanWDPBT22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{zhao22, author = {Zhuoyue Zhao and Dong Xie and Feifei Li}, title = {AB-tree: Index for Concurrent Random Sampling and Updates}, journal = {Proc. {VLDB} Endow.}, volume = {15}, number = {9}, pages = {1835--1847}, year = {2022}, url = {https://www.vldb.org/pvldb/vol15/p1835-zhao.pdf}, timestamp = {Tue, 26 Jul 2022 17:09:52 +0200}, biburl = {https://dblp.org/rec/journals/pvldb/ZhaoXL22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{balmau19, author = {Oana Balmau and Florin Dinu and Willy Zwaenepoel and Karan Gupta and Ravishankar Chandhiramoorthi and Diego Didona}, editor = {Dahlia Malkhi and Dan Tsafrir}, title = {{SILK:} Preventing Latency Spikes in Log-Structured Merge Key-Value Stores}, booktitle = {2019 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2019, Renton, WA, USA, July 10-12, 2019}, pages = {753--766}, publisher = {{USENIX} Association}, year = {2019}, url = {https://www.usenix.org/conference/atc19/presentation/balmau}, timestamp = {Mon, 01 Feb 2021 17:03:06 +0100}, biburl = {https://dblp.org/rec/conf/usenix/BalmauDZGCD19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{silk-plus, author = {Balmau, Oana and Dinu, Florin and Zwaenepoel, Willy and Gupta, Karan and Chandhiramoorthi, Ravishankar and Didona, Diego}, title = {SILK+ Preventing Latency Spikes in Log-Structured Merge Key-Value Stores Running Heterogeneous Workloads}, year = {2020}, issue_date = {November 2018}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {36}, number = {4}, issn = {0734-2071}, url = {https://doi.org/10.1145/3380905}, doi = {10.1145/3380905}, abstract = {Log-Structured Merge Key-Value stores (LSM KVs) are designed to offer good write performance, by capturing client writes in memory, and only later flushing them to storage. Writes are later compacted into a tree-like data structure on disk to improve read performance and to reduce storage space use. It has been widely documented that compactions severely hamper throughput. Various optimizations have successfully dealt with this problem. These techniques include, among others, rate-limiting flushes and compactions, selecting among compactions for maximum effect, and limiting compactions to the highest level by so-called fragmented LSMs.In this article, we focus on latencies rather than throughput. We first document the fact that LSM KVs exhibit high tail latencies. The techniques that have been proposed for optimizing throughput do not address this issue, and, in fact, in some cases, exacerbate it. The root cause of these high tail latencies is interference between client writes, flushes, and compactions. Another major cause for tail latency is the heterogeneous nature of the workloads in terms of operation mix and item sizes whereby a few more computationally heavy requests slow down the vast majority of smaller requests.We introduce the notion of an Input/Output (I/O) bandwidth scheduler for an LSM-based KV store to reduce tail latency caused by interference of flushing and compactions and by workload heterogeneity. We explore three techniques as part of this I/O scheduler: (1) opportunistically allocating more bandwidth to internal operations during periods of low load, (2) prioritizing flushes and compactions at the lower levels of the tree, and (3) separating client requests by size and by data access path. SILK+ is a new open-source LSM KV that incorporates this notion of an I/O scheduler.}, journal = {ACM Trans. Comput. Syst.}, month = may, articleno = {12}, numpages = {27}, keywords = {I/O scheduling, log-structured merge key-value stores, tail latency} } @inproceedings{afshani17, author = {Peyman Afshani and Zhewei Wei}, editor = {Kirk Pruhs and Christian Sohler}, title = {Independent Range Sampling, Revisited}, booktitle = {25th Annual European Symposium on Algorithms, {ESA} 2017, September 4-6, 2017, Vienna, Austria}, series = {LIPIcs}, volume = {87}, pages = {3:1--3:14}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, year = {2017}, url = {https://doi.org/10.4230/LIPIcs.ESA.2017.3}, doi = {10.4230/LIPIcs.ESA.2017.3}, timestamp = {Tue, 11 Feb 2020 15:52:14 +0100}, biburl = {https://dblp.org/rec/conf/esa/AfshaniW17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{tao22, author = {Yufei Tao}, editor = {Leonid Libkin and Pablo Barcel{\'{o}}}, title = {Algorithmic Techniques for Independent Query Sampling}, booktitle = {{PODS} '22: International Conference on Management of Data, Philadelphia, PA, USA, June 12 - 17, 2022}, pages = {129--138}, publisher = {{ACM}}, year = {2022}, url = {https://doi.org/10.1145/3517804.3526068}, doi = {10.1145/3517804.3526068}, timestamp = {Wed, 07 Dec 2022 23:12:46 +0100}, biburl = {https://dblp.org/rec/conf/pods/Tao22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{olken89, author = {Frank Olken and Doron Rotem}, editor = {Peter M. G. Apers and Gio Wiederhold}, title = {Random Sampling from {B+} Trees}, booktitle = {Proceedings of the Fifteenth International Conference on Very Large Data Bases, August 22-25, 1989, Amsterdam, The Netherlands}, pages = {269--277}, publisher = {Morgan Kaufmann}, year = {1989}, url = {http://www.vldb.org/conf/1989/P269.PDF}, timestamp = {Wed, 29 Mar 2017 16:45:23 +0200}, biburl = {https://dblp.org/rec/conf/vldb/OlkenR89.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{allendorf23, author = {Daniel Allendorf}, title = {A Simple Data Structure for Maintaining a Discrete Probability Distribution}, journal = {CoRR}, volume = {abs/2302.05682}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2302.05682}, doi = {10.48550/arXiv.2302.05682}, eprinttype = {arXiv}, eprint = {2302.05682}, timestamp = {Sun, 19 Feb 2023 18:44:53 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2302-05682.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{hagerup93, author = {Torben Hagerup and Kurt Mehlhorn and J. Ian Munro}, editor = {Andrzej Lingas and Rolf G. Karlsson and Svante Carlsson}, title = {Maintaining Discrete Probability Distributions Optimally}, booktitle = {Automata, Languages and Programming, 20nd International Colloquium, ICALP93, Lund, Sweden, July 5-9, 1993, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {700}, pages = {253--264}, publisher = {Springer}, year = {1993}, url = {https://doi.org/10.1007/3-540-56939-1\_77}, doi = {10.1007/3-540-56939-1\_77}, timestamp = {Tue, 14 May 2019 10:00:44 +0200}, biburl = {https://dblp.org/rec/conf/icalp/HagerupMM93.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{matias03, author = {Yossi Matias and Jeffrey Scott Vitter and Wen{-}Chun Ni}, title = {Dynamic Generation of Discrete Random Variates}, journal = {Theory Comput. Syst.}, volume = {36}, number = {4}, pages = {329--358}, year = {2003}, url = {https://doi.org/10.1007/s00224-003-1078-6}, doi = {10.1007/s00224-003-1078-6}, timestamp = {Tue, 21 Mar 2023 21:14:25 +0100}, biburl = {https://dblp.org/rec/journals/mst/MatiasVN03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{vose91, author = {Michael D. Vose}, title = {A Linear Algorithm For Generating Random Numbers With a Given Distribution}, journal = {{IEEE} Trans. Software Eng.}, volume = {17}, number = {9}, pages = {972--975}, year = {1991}, url = {https://doi.org/10.1109/32.92917}, doi = {10.1109/32.92917}, timestamp = {Wed, 17 May 2017 10:56:35 +0200}, biburl = {https://dblp.org/rec/journals/tse/Vose91.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{dong21, author = {Siying Dong and Andrew Kryczka and Yanqin Jin and Michael Stumm}, title = {RocksDB: Evolution of Development Priorities in a Key-value Store Serving Large-scale Applications}, journal = {{ACM} Trans. Storage}, volume = {17}, number = {4}, pages = {26:1--26:32}, year = {2021}, url = {https://doi.org/10.1145/3483840}, doi = {10.1145/3483840}, timestamp = {Sun, 12 Feb 2023 18:49:27 +0100}, biburl = {https://dblp.org/rec/journals/tos/DongKJS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{dayan18-1, author = {Niv Dayan and Manos Athanassoulis and Stratos Idreos}, title = {Optimal Bloom Filters and Adaptive Merging for LSM-Trees}, journal = {{ACM} Trans. Database Syst.}, volume = {43}, number = {4}, pages = {16:1--16:48}, year = {2018}, url = {https://doi.org/10.1145/3276980}, doi = {10.1145/3276980}, timestamp = {Sat, 05 Sep 2020 17:52:22 +0200}, biburl = {https://dblp.org/rec/journals/tods/DayanAI18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{zhu21, author = {Zichen Zhu and Ju Hyoung Mun and Aneesh Raman and Manos Athanassoulis}, editor = {Danica Porobic and Spyros Blanas}, title = {Reducing Bloom Filter {CPU} Overhead in LSM-Trees on Modern Storage Devices}, booktitle = {Proceedings of the 17th International Workshop on Data Management on New Hardware, DaMoN 2021, 21 June 2021, Virtual Event, China}, pages = {1:1--1:10}, publisher = {{ACM}}, year = {2021}, url = {https://doi.org/10.1145/3465998.3466002}, doi = {10.1145/3465998.3466002}, timestamp = {Thu, 14 Oct 2021 09:48:02 +0200}, biburl = {https://dblp.org/rec/conf/damon/ZhuMRA21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{dayan19, author = {Niv Dayan and Stratos Idreos}, editor = {Peter A. Boncz and Stefan Manegold and Anastasia Ailamaki and Amol Deshpande and Tim Kraska}, title = {The Log-Structured Merge-Bush {\&} the Wacky Continuum}, booktitle = {Proceedings of the 2019 International Conference on Management of Data, {SIGMOD} Conference 2019, Amsterdam, The Netherlands, June 30 - July 5, 2019}, pages = {449--466}, publisher = {{ACM}}, year = {2019}, url = {https://doi.org/10.1145/3299869.3319903}, doi = {10.1145/3299869.3319903}, timestamp = {Sat, 22 Jun 2019 17:10:04 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/DayanI19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{hu14, author = {Xiaocheng Hu and Miao Qiao and Yufei Tao}, editor = {Richard Hull and Martin Grohe}, title = {Independent range sampling}, booktitle = {Proceedings of the 33rd {ACM} {SIGMOD-SIGACT-SIGART} Symposium on Principles of Database Systems, PODS'14, Snowbird, UT, USA, June 22-27, 2014}, pages = {246--255}, publisher = {{ACM}}, year = {2014}, url = {https://doi.org/10.1145/2594538.2594545}, doi = {10.1145/2594538.2594545}, timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, biburl = {https://dblp.org/rec/conf/pods/HuQT14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{saxe79, author = {James B. Saxe and Jon Louis Bentley}, title = {Transforming Static Data Structures to Dynamic Structures (Abridged Version)}, booktitle = {20th Annual Symposium on Foundations of Computer Science, San Juan, Puerto Rico, 29-31 October 1979}, pages = {148--168}, publisher = {{IEEE} Computer Society}, year = {1979}, url = {https://doi.org/10.1109/SFCS.1979.47}, doi = {10.1109/SFCS.1979.47}, timestamp = {Thu, 23 Mar 2023 23:57:52 +0100}, biburl = {https://dblp.org/rec/conf/focs/SaxeB79.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{overmars81, author = {Mark H. Overmars and Jan van Leeuwen}, title = {Worst-Case Optimal Insertion and Deletion Methods for Decomposable Searching Problems}, journal = {Inf. Process. Lett.}, volume = {12}, number = {4}, pages = {168--173}, year = {1981}, url = {https://doi.org/10.1016/0020-0190(81)90093-4}, doi = {10.1016/0020-0190(81)90093-4}, timestamp = {Fri, 26 May 2017 22:54:44 +0200}, biburl = {https://dblp.org/rec/journals/ipl/OvermarsL81a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{overmars81-2, author = {Mark H. Overmars and Jan van Leeuwen}, editor = {Peter Deussen}, title = {Dynamization of Decomposable Searching Problems Yielding Good Worsts-Case Bounds}, booktitle = {Theoretical Computer Science, 5th GI-Conference, Karlsruhe, Germany, March 23-25, 1981, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {104}, pages = {224--233}, publisher = {Springer}, year = {1981}, url = {https://doi.org/10.1007/BFb0017314}, doi = {10.1007/BFB0017314}, timestamp = {Tue, 14 May 2019 10:00:39 +0200}, biburl = {https://dblp.org/rec/conf/tcs/OvermarsL81.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{naidan14, author = {Bilegsaikhan Naidan and Magnus Lie Hetland}, title = {Static-to-dynamic transformation for metric indexing structures (extended version)}, journal = {Inf. Syst.}, volume = {45}, pages = {48--60}, year = {2014}, url = {https://doi.org/10.1016/j.is.2013.08.002}, doi = {10.1016/j.is.2013.08.002}, timestamp = {Sat, 20 May 2017 00:24:08 +0200}, biburl = {https://dblp.org/rec/journals/is/NaidanH14.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{almodaresi23, author = {Fatemeh Almodaresi and Jamshed Khan and Sergey Madaminov and Michael Ferdman and Rob Johnson and Prashant Pandey and Rob Patro}, title = {An incrementally updatable and scalable system for large-scale sequence search using the Bentley-Saxe transformation}, journal = {Bioinform.}, volume = {38}, number = {12}, pages = {3155--3163}, year = {2022}, url = {https://doi.org/10.1093/bioinformatics/btac142}, doi = {10.1093/bioinformatics/btac142}, timestamp = {Mon, 22 Aug 2022 08:21:09 +0200}, biburl = {https://dblp.org/rec/journals/bioinformatics/AlmodaresiKMFJP22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{bulmer79, title = {Principles of Statistics}, author = {M.G. Bulmer}, year = {1979}, publisher = {Dover}, address = {New York} } @inproceedings{olken86, author = {Frank Olken and Doron Rotem}, editor = {Wesley W. Chu and Georges Gardarin and Setsuo Ohsuga and Yahiko Kambayashi}, title = {Simple Random Sampling from Relational Databases}, booktitle = {VLDB'86 Twelfth International Conference on Very Large Data Bases, August 25-28, 1986, Kyoto, Japan, Proceedings}, pages = {160--169}, publisher = {Morgan Kaufmann}, year = {1986}, url = {http://www.vldb.org/conf/1986/P160.PDF}, timestamp = {Wed, 29 Mar 2017 16:45:23 +0200}, biburl = {https://dblp.org/rec/conf/vldb/OlkenR86.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @misc {postgres-doc, author = {The PostgreSQL Global Development Group}, title = {PostgreSQL Documentation}, url = {https://www.postgresql.org/docs/15/sql-select.html}, year = {2025} } @misc {pg-gist, author = {The PostgreSQL Global Development Group}, title = {PostgreSQL Documentation: GiST Indexes}, url = {https://www.postgresql.org/docs/8.1/gist.html}, year = {2025} } @misc{pg-gin, author = {The PostgreSQL Global Development Group}, title = {GIN Indexes}, url = {https://www.postgresql.org/docs/16/gin.html}, year = {2024}, lastaccessed = {April, 2024} } @misc {db2-doc, author = {IBM}, title = {IBM DB2 Documentation}, url = {https://www.ibm.com/docs/en/db2/12.1.0?topic=design-data-sampling-in-queries}, year = {2025} } @online {pinecone, title = {Pinecone DB}, url = {https://www.pinecone.io/}, year = {2025} } @online {neptune, title = {Amazon Neptune}, url = {https://aws.amazon.com/neptune/}, year = {2025} } @online {teradata, title = {Teradata}, url = {https://www.teradata.com/}, year = {2025} } @online {pinot, title = {Apache Pinot}, url = {https://pinot.apache.org/}, year = {2025} } @online{neo4j, title = {neo4j}, url = {https://neo4j.com/}, year = {2025} } @online {pinecone-db, title = {Pinecone DB: Hierarchical Navigable Small Worlds}, url = {https://www.pinecone.io/learn/series/faiss/hnsw/}, year = {2025} } @online {postgis-doc, title = {Introduction to PostGIS: Spatial Indexing}, url = {https://postgis.net/workshops/postgis-intro/indexing.html}, year = {2025} } @online {mysql-btree-hash, title = {MySQL Documentation - Comparison of B-tree and Hash Indexes}, url = {https://dev.mysql.com/doc/refman/8.0/en/index-btree-hash.html}, year = {2025} } @article{olken95, title = {Random sampling from databases: a survey}, journal = {Statistics and Computing}, volume = {5}, pages = {25-42}, year = {1995}, doi = {https://doi.org/10.1007/BF00140664}, author = {Frank Olken and Doron Rotem} } @inproceedings{hu15, author = {Xiaocheng Hu and Miao Qiao and Yufei Tao}, editor = {Tova Milo and Diego Calvanese}, title = {External Memory Stream Sampling}, booktitle = {Proceedings of the 34th {ACM} Symposium on Principles of Database Systems, {PODS} 2015, Melbourne, Victoria, Australia, May 31 - June 4, 2015}, pages = {229--239}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2745754.2745757}, doi = {10.1145/2745754.2745757}, timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, biburl = {https://dblp.org/rec/conf/pods/HuQT15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{vitter85, author = {Jeffrey Scott Vitter}, title = {Random Sampling with a Reservoir}, journal = {{ACM} Trans. Math. Softw.}, volume = {11}, number = {1}, pages = {37--57}, year = {1985}, url = {https://doi.org/10.1145/3147.3165}, doi = {10.1145/3147.3165}, timestamp = {Tue, 21 Mar 2023 21:15:05 +0100}, biburl = {https://dblp.org/rec/journals/toms/Vitter85.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{zhang18, author = {Huanchen Zhang and Hyeontaek Lim and Viktor Leis and David G. Andersen and Michael Kaminsky and Kimberly Keeton and Andrew Pavlo}, editor = {Gautam Das and Christopher M. Jermaine and Philip A. Bernstein}, title = {SuRF: Practical Range Query Filtering with Fast Succinct Tries}, booktitle = {Proceedings of the 2018 International Conference on Management of Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, pages = {323--336}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3183713.3196931}, doi = {10.1145/3183713.3196931}, timestamp = {Sun, 25 Oct 2020 22:52:40 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/ZhangLLAKKP18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{siqiang20, author = {Siqiang Luo and Subarna Chatterjee and Rafael Ketsetsidis and Niv Dayan and Wilson Qin and Stratos Idreos}, editor = {David Maier and Rachel Pottinger and AnHai Doan and Wang{-}Chiew Tan and Abdussalam Alawini and Hung Q. Ngo}, title = {Rosetta: {A} Robust Space-Time Optimized Range Filter for Key-Value Stores}, booktitle = {Proceedings of the 2020 International Conference on Management of Data, {SIGMOD} Conference 2020, online conference [Portland, OR, USA], June 14-19, 2020}, pages = {2071--2086}, publisher = {{ACM}}, year = {2020}, url = {https://doi.org/10.1145/3318464.3389731}, doi = {10.1145/3318464.3389731}, timestamp = {Wed, 04 May 2022 13:02:28 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/LuoCKDQI20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{blinkdb, author = {Sameer Agarwal and Barzan Mozafari and Aurojit Panda and Henry Milner and Samuel Madden and Ion Stoica}, editor = {Zdenek Hanz{\'{a}}lek and Hermann H{\"{a}}rtig and Miguel Castro and M. Frans Kaashoek}, title = {BlinkDB: queries with bounded errors and bounded response times on very large data}, booktitle = {Eighth Eurosys Conference 2013, EuroSys '13, Prague, Czech Republic, April 14-17, 2013}, pages = {29--42}, publisher = {{ACM}}, year = {2013}, url = {https://doi.org/10.1145/2465351.2465355}, doi = {10.1145/2465351.2465355}, timestamp = {Wed, 06 Jul 2022 14:43:33 +0200}, biburl = {https://dblp.org/rec/conf/eurosys/AgarwalMPMMS13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{sps, author = {Bolin Ding and Silu Huang and Surajit Chaudhuri and Kaushik Chakrabarti and Chi Wang}, editor = {Fatma {\"{O}}zcan and Georgia Koutrika and Sam Madden}, title = {Sample + Seek: Approximating Aggregates with Distribution Precision Guarantee}, booktitle = {Proceedings of the 2016 International Conference on Management of Data, {SIGMOD} Conference 2016, San Francisco, CA, USA, June 26 - July 01, 2016}, pages = {679--694}, publisher = {{ACM}}, year = {2016}, url = {https://doi.org/10.1145/2882903.2915249}, doi = {10.1145/2882903.2915249}, timestamp = {Wed, 14 Nov 2018 10:56:20 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/DingHCC016.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @phdthesis{olken-thesis, author = {Frank Olken}, title = {Random Sampling from Databases}, school = {University of California at Berkeley}, year = {1993}, timestamp = {Thu, 03 Jan 2002 12:33:26 +0100}, biburl = {https://dblp.org/rec/phd/Olken93.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{ml-sampling, author = {Silu Huang and Chi Wang and Bolin Ding and Surajit Chaudhuri}, title = {Efficient Identification of Approximate Best Configuration of Training in Large Datasets}, booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} 2019, The Thirty-First Innovative Applications of Artificial Intelligence Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, USA, January 27 - February 1, 2019}, pages = {3862--3869}, publisher = {{AAAI} Press}, year = {2019}, url = {https://doi.org/10.1609/aaai.v33i01.33013862}, doi = {10.1609/aaai.v33i01.33013862}, timestamp = {Tue, 02 Feb 2021 08:00:44 +0100}, biburl = {https://dblp.org/rec/conf/aaai/Huang0DC19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{quickr, author = {Srikanth Kandula and Anil Shanbhag and Aleksandar Vitorovic and Matthaios Olma and Robert Grandl and Surajit Chaudhuri and Bolin Ding}, editor = {Fatma {\"{O}}zcan and Georgia Koutrika and Sam Madden}, title = {Quickr: Lazily Approximating Complex AdHoc Queries in BigData Clusters}, booktitle = {Proceedings of the 2016 International Conference on Management of Data, {SIGMOD} Conference 2016, San Francisco, CA, USA, June 26 - July 01, 2016}, pages = {631--646}, publisher = {{ACM}}, year = {2016}, url = {https://doi.org/10.1145/2882903.2882940}, doi = {10.1145/2882903.2882940}, timestamp = {Wed, 14 Nov 2018 10:56:20 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/KandulaSVOGCD16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{verdict, author = {Yongjoo Park and Barzan Mozafari and Joseph Sorenson and Junhao Wang}, editor = {Gautam Das and Christopher M. Jermaine and Philip A. Bernstein}, title = {VerdictDB: Universalizing Approximate Query Processing}, booktitle = {Proceedings of the 2018 International Conference on Management of Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, pages = {1461--1476}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3183713.3196905}, doi = {10.1145/3183713.3196905}, timestamp = {Wed, 21 Nov 2018 12:44:08 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/ParkMSW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{irsra, author = {Peyman Afshani and Jeff M. Phillips}, editor = {Gill Barequet and Yusu Wang}, title = {Independent Range Sampling, Revisited Again}, booktitle = {35th International Symposium on Computational Geometry, SoCG 2019, June 18-21, 2019, Portland, Oregon, {USA}}, series = {LIPIcs}, volume = {129}, pages = {4:1--4:13}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, year = {2019}, url = {https://doi.org/10.4230/LIPIcs.SoCG.2019.4}, doi = {10.4230/LIPIcs.SoCG.2019.4}, timestamp = {Mon, 02 Jan 2023 09:02:13 +0100}, biburl = {https://dblp.org/rec/conf/compgeom/AfshaniP19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{data-twitter1, author = {Haewoon Kwak and Changhyun Lee and Hosung Park and Sue B. Moon}, editor = {Michael Rappa and Paul Jones and Juliana Freire and Soumen Chakrabarti}, title = {What is Twitter, a social network or a news media?}, booktitle = {Proceedings of the 19th International Conference on World Wide Web, {WWW} 2010, Raleigh, North Carolina, USA, April 26-30, 2010}, pages = {591--600}, publisher = {{ACM}}, year = {2010}, url = {https://doi.org/10.1145/1772690.1772751}, doi = {10.1145/1772690.1772751}, timestamp = {Sun, 02 Jun 2019 21:15:56 +0200}, biburl = {https://dblp.org/rec/conf/www/KwakLPM10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @online{data-twitter, title = {Twitter Dataset}, url = {https://github.com/ANLAB-KAIST/traces/releases/tag/twitter_rv.net}, year = {2023} } @online{data-delicious, title = {Delicious Dataset}, url = {http://konect.cc/networks/delicious-ti/}, year = {2023} } @online{data-osm, title = {Open Street Map Dataset}, url = {https://planet.openstreetmap.org/}, year = {2023} } @inproceedings{golan-gueta15, author = {Guy Golan{-}Gueta and Edward Bortnikov and Eshcar Hillel and Idit Keidar}, editor = {Laurent R{\'{e}}veill{\`{e}}re and Tim Harris and Maurice Herlihy}, title = {Scaling concurrent log-structured data stores}, booktitle = {Proceedings of the Tenth European Conference on Computer Systems, EuroSys 2015, Bordeaux, France, April 21-24, 2015}, pages = {32:1--32:14}, publisher = {{ACM}}, year = {2015}, url = {https://doi.org/10.1145/2741948.2741973}, doi = {10.1145/2741948.2741973}, timestamp = {Wed, 14 Nov 2018 10:57:04 +0100}, biburl = {https://dblp.org/rec/conf/eurosys/Golan-GuetaBHK15.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{custers19, author = {Bram Custers and Mees van de Kerkhof and Wouter Meulemans and Bettina Speckmann and Frank Staals}, editor = {Farnoush Banaei Kashani and Goce Trajcevski and Ralf Hartmut G{\"{u}}ting and Lars Kulik and Shawn D. Newsam}, title = {Maximum Physically Consistent Trajectories}, booktitle = {Proceedings of the 27th {ACM} {SIGSPATIAL} International Conference on Advances in Geographic Information Systems, {SIGSPATIAL} 2019, Chicago, IL, USA, November 5-8, 2019}, pages = {79--88}, publisher = {{ACM}}, year = {2019}, url = {https://doi.org/10.1145/3347146.3359363}, doi = {10.1145/3347146.3359363}, timestamp = {Thu, 14 Nov 2019 10:14:43 +0100}, biburl = {https://dblp.org/rec/conf/gis/CustersKMSS19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{aumuller20, author = {Martin Aum{\"{u}}ller and Rasmus Pagh and Francesco Silvestri}, editor = {Dan Suciu and Yufei Tao and Zhewei Wei}, title = {Fair Near Neighbor Search: Independent Range Sampling in High Dimensions}, booktitle = {Proceedings of the 39th {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles of Database Systems, {PODS} 2020, Portland, OR, USA, June 14-19, 2020}, pages = {191--204}, publisher = {{ACM}}, year = {2020}, url = {https://doi.org/10.1145/3375395.3387648}, doi = {10.1145/3375395.3387648}, timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, biburl = {https://dblp.org/rec/conf/pods/0001P020.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{cohen23, author = {Edith Cohen}, editor = {Floris Geerts and Hung Q. Ngo and Stavros Sintos}, title = {Sampling Big Ideas in Query Optimization}, booktitle = {Proceedings of the 42nd {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles of Database Systems, {PODS} 2023, Seattle, WA, USA, June 18-23, 2023}, pages = {361--371}, publisher = {{ACM}}, year = {2023}, url = {https://doi.org/10.1145/3584372.3589935}, doi = {10.1145/3584372.3589935}, timestamp = {Thu, 15 Jun 2023 21:57:01 +0200}, biburl = {https://dblp.org/rec/conf/pods/Cohen23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{gryz04, author = {Jarek Gryz and Junjie Guo and Linqi Liu and Calisto Zuzarte}, editor = {Gerhard Weikum and Arnd Christian K{\"{o}}nig and Stefan De{\ss}loch}, title = {Query Sampling in {DB2} Universal Database}, booktitle = {Proceedings of the {ACM} {SIGMOD} International Conference on Management of Data, Paris, France, June 13-18, 2004}, pages = {839--843}, publisher = {{ACM}}, year = {2004}, url = {https://doi.org/10.1145/1007568.1007664}, doi = {10.1145/1007568.1007664}, timestamp = {Thu, 11 Mar 2021 15:20:15 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/GryzGLZ04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{ben-eliezer20, author = {Omri Ben{-}Eliezer and Eylon Yogev}, editor = {Dan Suciu and Yufei Tao and Zhewei Wei}, title = {The Adversarial Robustness of Sampling}, booktitle = {Proceedings of the 39th {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles of Database Systems, {PODS} 2020, Portland, OR, USA, June 14-19, 2020}, pages = {49--62}, publisher = {{ACM}}, year = {2020}, url = {https://doi.org/10.1145/3375395.3387643}, doi = {10.1145/3375395.3387643}, timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, biburl = {https://dblp.org/rec/conf/pods/Ben-EliezerY20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{li19, author = {Feifei Li and Bin Wu and Ke Yi and Zhuoyue Zhao}, title = {Wander Join and {XDB:} Online Aggregation via Random Walks}, journal = {{ACM} Trans. Database Syst.}, volume = {44}, number = {1}, pages = {2:1--2:41}, year = {2019}, url = {https://doi.org/10.1145/3284551}, doi = {10.1145/3284551}, timestamp = {Sun, 02 Oct 2022 15:51:46 +0200}, biburl = {https://dblp.org/rec/journals/tods/LiWYZ19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{rdd, author = {Matei Zaharia and Mosharaf Chowdhury and Tathagata Das and Ankur Dave and Justin Ma and Murphy McCauly and Michael J. Franklin and Scott Shenker and Ion Stoica}, editor = {Steven D. Gribble and Dina Katabi}, title = {Resilient Distributed Datasets: {A} Fault-Tolerant Abstraction for In-Memory Cluster Computing}, booktitle = {Proceedings of the 9th {USENIX} Symposium on Networked Systems Design and Implementation, {NSDI} 2012, San Jose, CA, USA, April 25-27, 2012}, pages = {15--28}, publisher = {{USENIX} Association}, year = {2012}, url = {https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/zaharia}, timestamp = {Tue, 21 Mar 2023 21:02:49 +0100}, biburl = {https://dblp.org/rec/conf/nsdi/ZahariaCDDMMFSS12.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{hdfs, author = {Konstantin Shvachko and Hairong Kuang and Sanjay Radia and Robert Chansler}, editor = {Mohammed G. Khatib and Xubin He and Michael Factor}, title = {The Hadoop Distributed File System}, booktitle = {{IEEE} 26th Symposium on Mass Storage Systems and Technologies, {MSST} 2012, Lake Tahoe, Nevada, USA, May 3-7, 2010}, pages = {1--10}, publisher = {{IEEE} Computer Society}, year = {2010}, url = {https://doi.org/10.1109/MSST.2010.5496972}, doi = {10.1109/MSST.2010.5496972}, timestamp = {Fri, 24 Mar 2023 00:01:51 +0100}, biburl = {https://dblp.org/rec/conf/mss/ShvachkoKRC10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{overmars83, author = {Mark H. Overmars}, title = {The Design of Dynamic Data Structures}, series = {Lecture Notes in Computer Science}, volume = {156}, publisher = {Springer}, year = {1983}, url = {https://doi.org/10.1007/BFb0014927}, doi = {10.1007/BFb0014927}, isbn = {3-540-12330-X}, timestamp = {Tue, 14 May 2019 10:00:35 +0200}, biburl = {https://dblp.org/rec/books/sp/Overmars83.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{merge-dsp, author = {Mark H. Overmars and Jan van Leeuwen}, title = {Two general methods for dynamizing decomposable searching problems}, journal = {Computing}, volume = {26}, number = {2}, pages = {155--166}, year = {1981}, url = {https://doi.org/10.1007/BF02241781}, doi = {10.1007/BF02241781}, timestamp = {Thu, 06 Aug 2020 13:50:37 +0200}, biburl = {https://dblp.org/rec/journals/computing/OvermarsL81.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{vptree, author = {Peter N. Yianilos}, editor = {Vijaya Ramachandran}, title = {Data Structures and Algorithms for Nearest Neighbor Search in General Metric Spaces}, booktitle = {Proceedings of the Fourth Annual {ACM/SIGACT-SIAM} Symposium on Discrete Algorithms, 25-27 January 1993, Austin, Texas, {USA}}, year = {1993}, } @inproceedings{mtree, author = {Paolo Ciaccia and Marco Patella and Pavel Zezula}, title = {M-tree: An Efficient Access Method for Similarity Search in Metric Spaces}, booktitle = {VLDB'97, Proceedings of 23rd International Conference on Very Large Data Bases}, year = {1997}, } @article{pgm, author = {Paolo Ferragina and Giorgio Vinciguerra}, title = {The PGM-index: a fully-dynamic compressed learned index with provable worst-case bounds}, journal = {Proc. {VLDB} Endow.}, volume = {13}, number = {8}, year = {2020}, } @article{sosd-datasets, author = {Ryan Marcus and Andreas Kipf and Alexander van Renen and Mihail Stoian and Sanchit Misra and Alfons Kemper and Thomas Neumann and Tim Kraska}, title = {Benchmarking Learned Indexes}, journal = {Proc. {VLDB} Endow.}, volume = {14}, number = {1}, pages = {1--13}, year = {2020} } @inproceedings{RMI, author = {Kraska, Tim and Beutel, Alex and Chi, Ed H. and Dean, Jeffrey and Polyzotis, Neoklis}, title = {The Case for Learned Index Structures}, year = {2018}, booktitle = {Proceedings of the 2018 International Conference on Management of Data}, series = {SIGMOD '18} } @article{10.14778/2850583.2850584, author = {Wang, Lu and Christensen, Robert and Li, Feifei and Yi, Ke}, title = {Spatial Online Sampling and Aggregation}, year = {2015}, publisher = {VLDB Endowment}, volume = {9}, number = {3}, journal = {Proc. VLDB Endow.}, } @article{plex, author = {Mihail Stoian and Andreas Kipf and Ryan Marcus and Tim Kraska}, title = {{PLEX:} Towards Practical Learned Indexing}, journal = {CoRR}, volume = {abs/2108.05117}, year = {2021}, } @misc{sbw, author = {Cardellino, Cristian}, title = {Spanish {B}illion {W}ords {C}orpus and {E}mbeddings}, url = {https://crscardellino.github.io/SBWCE/}, month = {August}, year = {2019} } @article{DBLP:journals/corr/abs-1712-01208, author = {Tim Kraska and Alex Beutel and Ed H. Chi and Jeffrey Dean and Neoklis Polyzotis}, title = {The Case for Learned Index Structures}, journal = {CoRR}, year = {2017}, } @article{DBLP:journals/corr/abs-1903-00507, author = {Giorgio Vinciguerra and Paolo Ferragina and Michele Miccinesi}, title = {Superseding traditional indexes by orchestrating learning and geometry}, journal = {CoRR}, year = {2019}, } @inproceedings{alex, author = {Jialin Ding and Umar Farooq Minhas and Jia Yu and Chi Wang and Jaeyoung Do and Yinan Li and Hantian Zhang and Badrish Chandramouli and Johannes Gehrke and Donald Kossmann and David B. Lomet and Tim Kraska}, editor = {David Maier and Rachel Pottinger and AnHai Doan and Wang{-}Chiew Tan and Abdussalam Alawini and Hung Q. Ngo}, title = {{ALEX:} An Updatable Adaptive Learned Index}, booktitle = {Proceedings of the 2020 International Conference on Management of Data, {SIGMOD} Conference 2020, online conference [Portland, OR, USA], June 14-19, 2020}, pages = {969--984}, publisher = {{ACM}}, year = {2020}, url = {https://doi.org/10.1145/3318464.3389711}, doi = {10.1145/3318464.3389711}, timestamp = {Thu, 15 Sep 2022 14:00:48 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/DingMYWDLZCGKLK20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{10.14778/3489496.3489512, author = {Li, Pengfei and Hua, Yu and Jia, Jingnan and Zuo, Pengfei}, title = {FINEdex: A Fine-Grained Learned Index Scheme for Scalable and Concurrent Memory Systems}, year = {2021}, publisher = {VLDB Endowment}, volume = {15}, number = {2}, journal = {Proc. VLDB Endow.}, } @inproceedings{10.1145/2933349.2933352, author = {Leis, Viktor and Scheibner, Florian and Kemper, Alfons and Neumann, Thomas}, title = {The ART of Practical Synchronization}, year = {2016}, booktitle = {Proceedings of the 12th International Workshop on Data Management on New Hardware}, series = {DaMoN '16} } @article{byods-datalog, author = {Sahebolamri, Arash and Barrett, Langston and Moore, Scott and Micinski, Kristopher}, title = {Bring Your Own Data Structures to Datalog}, year = {2023}, issue_date = {October 2023}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {7}, number = {OOPSLA2}, url = {https://doi.org/10.1145/3622840}, doi = {10.1145/3622840}, abstract = {The restricted logic programming language Datalog has become a popular implementation target for deductive-analytic workloads including social-media analytics and program analysis. Modern Datalog engines compile Datalog rules to joins over explicit representations of relations—often B-trees or hash maps. While these modern engines have enabled high scalability in many application domains, they have a crucial weakness: achieving the desired algorithmic complexity may be impossible due to representation-imposed overhead of the engine’s data structures. In this paper, we present the "Bring Your Own Data Structures" (Byods) approach, in the form of a DSL embedded in Rust. Using Byods, an engineer writes logical rules which are implicitly parametric on the concrete data structure representation; our implementation provides an interface to enable "bringing their own" data structures to represent relations, which harmoniously interact with code generated by our compiler (implemented as Rust procedural macros). We formalize the semantics of Byods as an extension of Datalog’s; our formalization captures the key properties demanded of data structures compatible with Byods, including properties required for incrementalized (semi-na\"{\i}ve) evaluation. We detail many applications of the Byods approach, implementing analyses requiring specialized data structures for transitive and equivalence relations to scale, including an optimized version of the Rust borrow checker Polonius; highly-parallel PageRank made possible by lattices; and a large-scale analysis of LLVM utilizing index-sharing to scale. Our results show that Byods offers both improved algorithmic scalability (reduced time and/or space complexity) and runtimes competitive with state-of-the-art parallelizing Datalog solvers.}, journal = {Proc. ACM Program. Lang.}, month = {oct}, articleno = {264}, numpages = {26}, keywords = {Program Analysis, Logic Programming, Static Analysis, Datalog} } @inproceedings{avl, title={An algorithm for organization of information}, author={Adelson-Velskii, Georgii Maksimovich and Landis, Evgenii Mikhailovich}, booktitle={Doklady Akademii Nauk}, volume={146}, number={2}, pages={263--266}, year={1962}, organization={Russian Academy of Sciences} } @book{cowbook, title={Database Management Systems}, author={Raghu Ramakrishnan and Johannes Gehrke}, edition={3}, publisher={McGraw-Hill}, year = {2003} } @book{intro-analysis, title={Introduction to Real Analysis}, author={Christopher Heil}, edition={1}, publisher={Springer}, year = {2019} } @inproceedings{wavesofmisery, author = {Nikolaus Glombiewski and Bernhard Seeger and Goetz Graefe}, editor = {Torsten Grust and Felix Naumann and Alexander B{\"{o}}hm and Wolfgang Lehner and Theo H{\"{a}}rder and Erhard Rahm and Andreas Heuer and Meike Klettke and Holger Meyer}, title = {Waves of Misery After Index Creation}, booktitle = {Datenbanksysteme f{\"{u}}r Business, Technologie und Web {(BTW} 2019), 18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme" (DBIS), 4.-8. M{\"{a}}rz 2019, Rostock, Germany, Proceedings}, series = {{LNI}}, volume = {{P-289}}, pages = {77--96}, publisher = {Gesellschaft f{\"{u}}r Informatik, Bonn}, year = {2019}, url = {https://doi.org/10.18420/btw2019-06}, doi = {10.18420/BTW2019-06}, timestamp = {Wed, 13 Jan 2021 11:37:30 +0100}, biburl = {https://dblp.org/rec/conf/btw/GlombiewskiSG19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{wavesofmisery-rtree, author = {Lu Xing and Eric Lee and Tong An and Bo{-}Cheng Chu and Ahmed Mahmood and Ahmed M. Aly and Jianguo Wang and Walid G. Aref}, title = {An Experimental Evaluation and Investigation of Waves of Misery in R-trees}, journal = {Proc. {VLDB} Endow.}, volume = {15}, number = {3}, pages = {478--490}, year = {2021}, url = {http://www.vldb.org/pvldb/vol15/p478-aref.pdf}, doi = {10.14778/3494124.3494132}, timestamp = {Sun, 12 Nov 2023 02:17:29 +0100}, biburl = {https://dblp.org/rec/journals/pvldb/XingLACMAWA21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{ubiq-btree, author = {Comer, Douglas}, title = {Ubiquitous B-Tree}, year = {1979}, issue_date = {June 1979}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {11}, number = {2}, issn = {0360-0300}, url = {https://doi.org/10.1145/356770.356776}, doi = {10.1145/356770.356776}, journal = {ACM Comput. Surv.}, month = {jun}, pages = {121–137}, numpages = {17} } @article{rocksdb, author = {Dong, Siying and Kryczka, Andrew and Jin, Yanqin and Stumm, Michael}, title = {RocksDB: Evolution of Development Priorities in a Key-Value Store Serving Large-Scale Applications}, year = {2021}, issue_date = {November 2021}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {17}, number = {4}, issn = {1553-3077}, url = {https://doi.org/10.1145/3483840}, doi = {10.1145/3483840}, abstract = {This article is an eight-year retrospective on development priorities for RocksDB, a key-value store developed at Facebook that targets large-scale distributed systems and that is optimized for Solid State Drives (SSDs). We describe how the priorities evolved over time as a result of hardware trends and extensive experiences running RocksDB at scale in production at a number of organizations: from optimizing write amplification, to space amplification, to CPU utilization. We describe lessons from running large-scale applications, including that resource allocation needs to be managed across different RocksDB instances, that data formats need to remain backward- and forward-compatible to allow incremental software rollouts, and that appropriate support for database replication and backups are needed. Lessons from failure handling taught us that data corruption errors needed to be detected earlier and that data integrity protection mechanisms are needed at every layer of the system. We describe improvements to the key-value interface. We describe a number of efforts that in retrospect proved to be misguided. Finally, we describe a number of open problems that could benefit from future research.}, journal = {ACM Trans. Storage}, month = {oct}, articleno = {26}, numpages = {32}, keywords = {large-scale applications, RocksDB, SSD, compaction, databases, Key-value stores} } @techreport{ubiq-rtree, title={R-trees have grown everywhere}, author={Manolopoulos, Yannis and Nanopoulos, Alexandros and Papadopoulos, Apostolos N and Theodoridis, Yannis}, year={2003}, institution={Technical Report available at http://www. rtreeportal. org} } @article{mergeable-summaries, author = {Pankaj K. Agarwal and Graham Cormode and Zengfeng Huang and Jeff M. Phillips and Zhewei Wei and Ke Yi}, title = {Mergeable summaries}, journal = {{ACM} Trans. Database Syst.}, volume = {38}, number = {4}, pages = {26}, year = {2013}, url = {https://doi.org/10.1145/2500128}, doi = {10.1145/2500128}, timestamp = {Tue, 21 Mar 2023 21:14:49 +0100}, biburl = {https://dblp.org/rec/journals/tods/AgarwalCHPWY13.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{countmin-sketch, author = {Graham Cormode and S. Muthukrishnan}, editor = {Martin Farach{-}Colton}, title = {An Improved Data Stream Summary: The Count-Min Sketch and Its Applications}, booktitle = {{LATIN} 2004: Theoretical Informatics, 6th Latin American Symposium, Buenos Aires, Argentina, April 5-8, 2004, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {2976}, pages = {29--38}, publisher = {Springer}, year = {2004}, url = {https://doi.org/10.1007/978-3-540-24698-5\_7}, doi = {10.1007/978-3-540-24698-5\_7}, timestamp = {Fri, 07 May 2021 12:53:47 +0200}, biburl = {https://dblp.org/rec/conf/latin/CormodeM04.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{isam-overflow, author = {Larson, Per-\r{A}ke}, title = {Analysis of Index-Sequential Files with Overflow Chaining}, year = {1981}, issue_date = {Dec. 1981}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {6}, number = {4}, issn = {0362-5915}, url = {https://doi.org/10.1145/319628.319665}, doi = {10.1145/319628.319665}, abstract = {The gradual performance deterioration caused by deletions from and insertions into an index-sequential file after loading is analyzed. The model developed assumes that overflow records are handled by chaining. Formulas for computing the expected number of overflow records and the expected number of additional accesses caused by the overflow records for both successful and unsuccessful searches are derived.}, journal = {ACM Trans. Database Syst.}, month = {dec}, pages = {671–680}, numpages = {10}, keywords = {analytic model, analysis of algorithms, overflow chaining, performance analysis, ISAM, overflow, overflow handling, indexed sequential access method, index sequential files, file organization, file structure} } @inproceedings{dynamize-succinct, author = {Ankur Gupta and Wing{-}Kai Hon and Rahul Shah and Jeffrey Scott Vitter}, editor = {Lars Arge and Christian Cachin and Tomasz Jurdzinski and Andrzej Tarlecki}, title = {A Framework for Dynamizing Succinct Data Structures}, booktitle = {Automata, Languages and Programming, 34th International Colloquium, {ICALP} 2007, Wroclaw, Poland, July 9-13, 2007, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {4596}, pages = {521--532}, publisher = {Springer}, year = {2007}, url = {https://doi.org/10.1007/978-3-540-73420-8\_46}, doi = {10.1007/978-3-540-73420-8\_46}, timestamp = {Wed, 28 Feb 2024 08:25:37 +0100}, biburl = {https://dblp.org/rec/conf/icalp/GuptaHSV07.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{batched-decomposable, author = {Herbert Edelsbrunner and Mark H. Overmars}, title = {Batched Dynamic Solutions to Decomposable Searching Problems}, journal = {J. Algorithms}, volume = {6}, number = {4}, pages = {515--542}, year = {1985}, url = {https://doi.org/10.1016/0196-6774(85)90030-6}, doi = {10.1016/0196-6774(85)90030-6}, timestamp = {Sun, 28 May 2017 13:24:58 +0200}, biburl = {https://dblp.org/rec/journals/jal/EdelsbrunnerO85.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @misc{leveldb, author = {Sanjay Ghemawat and Jeff Dean}, title = {LevelDB}, year = {2025}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/google/leveldb}} } @misc{mtree-impl, author = {Eduardo R. D'Avila}, title = {M-Tree}, year = {2013}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/erdavila/M-Tree}} } @misc{fst-impl, author = {Shunsuke Kanda}, title = {Fast Succinct Trie}, year = {2021}, publisher = {GitHub}, journal = {GitHub repository}, howpublished = {\url{https://github.com/kampersanda/fast\_succinct\_trie}} } @inproceedings{monkey, author = {Niv Dayan and Manos Athanassoulis and Stratos Idreos}, editor = {Semih Salihoglu and Wenchao Zhou and Rada Chirkova and Jun Yang and Dan Suciu}, title = {Monkey: Optimal Navigable Key-Value Store}, booktitle = {Proceedings of the 2017 {ACM} International Conference on Management of Data, {SIGMOD} Conference 2017, Chicago, IL, USA, May 14-19, 2017}, pages = {79--94}, publisher = {{ACM}}, year = {2017}, url = {https://doi.org/10.1145/3035918.3064054}, doi = {10.1145/3035918.3064054}, timestamp = {Thu, 14 Oct 2021 10:11:38 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/DayanAI17.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{dostoevsky, author = {Niv Dayan and Stratos Idreos}, editor = {Gautam Das and Christopher M. Jermaine and Philip A. Bernstein}, title = {Dostoevsky: Better Space-Time Trade-Offs for LSM-Tree Based Key-Value Stores via Adaptive Removal of Superfluous Merging}, booktitle = {Proceedings of the 2018 International Conference on Management of Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, pages = {505--520}, publisher = {{ACM}}, year = {2018}, url = {https://doi.org/10.1145/3183713.3196927}, doi = {10.1145/3183713.3196927}, timestamp = {Wed, 21 Nov 2018 12:44:08 +0100}, biburl = {https://dblp.org/rec/conf/sigmod/DayanI18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @misc{autumn, title={Autumn: A Scalable Read Optimized LSM-tree based Key-Value Stores with Fast Point and Range Read Speed}, author={Fuheng Zhao and Zach Miller and Leron Reznikov and Divyakant Agrawal and Amr El Abbadi}, year={2024}, eprint={2305.05074}, archivePrefix={arXiv}, primaryClass={cs.DB}, url={https://arxiv.org/abs/2305.05074}, } @inproceedings{sarkar23, author = {Subhadeep Sarkar and Niv Dayan and Manos Athanassoulis}, title = {The {LSM} Design Space and its Read Optimizations}, booktitle = {39th {IEEE} International Conference on Data Engineering, {ICDE} 2023, Anaheim, CA, USA, April 3-7, 2023}, pages = {3578--3584}, publisher = {{IEEE}}, year = {2023}, url = {https://doi.org/10.1109/ICDE55515.2023.00273}, doi = {10.1109/ICDE55515.2023.00273}, timestamp = {Sun, 12 Nov 2023 02:08:10 +0100}, biburl = {https://dblp.org/rec/conf/icde/SarkarDA23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{hadoop, author = {Konstantin Shvachko and Hairong Kuang and Sanjay Radia and Robert Chansler}, editor = {Mohammed G. Khatib and Xubin He and Michael Factor}, title = {The Hadoop Distributed File System}, booktitle = {{IEEE} 26th Symposium on Mass Storage Systems and Technologies, {MSST} 2012, Lake Tahoe, Nevada, USA, May 3-7, 2010}, pages = {1--10}, publisher = {{IEEE} Computer Society}, year = {2010}, url = {https://doi.org/10.1109/MSST.2010.5496972}, doi = {10.1109/MSST.2010.5496972}, timestamp = {Fri, 24 Mar 2023 00:01:51 +0100}, biburl = {https://dblp.org/rec/conf/mss/ShvachkoKRC10.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Misc{tlx, title = {{TLX}: Collection of Sophisticated {C++} Data Structures, Algorithms, and Miscellaneous Helpers}, author = {Timo Bingmann}, year = 2018, note = {\url{https://panthema.net/tlx}, retrieved {Oct.} 7, 2020}, } @article{codd70, author = {E. F. Codd}, title = {A Relational Model of Data for Large Shared Data Banks}, journal = {Commun. {ACM}}, volume = {13}, number = {6}, pages = {377--387}, year = {1970}, url = {https://doi.org/10.1145/362384.362685}, doi = {10.1145/362384.362685}, timestamp = {Fri, 24 Mar 2023 16:31:07 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Codd70.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{overmars-cn-decomp, title={Searching in the past II- general transformations}, author={Mark H. Overmars}, year={1981}, url={https://api.semanticscholar.org/CorpusID:56886448} } @inproceedings{overmars-art-of-dyn, author = {Jan van Leeuwen and Mark H. Overmars}, editor = {Jozef Gruska and Michal Chytil}, title = {The Art of Dynamizing}, booktitle = {Mathematical Foundations of Computer Science 1981, Strbske Pleso, Czechoslovakia, August 31 - September 4, 1981, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {118}, pages = {121--131}, publisher = {Springer}, year = {1981}, url = {https://doi.org/10.1007/3-540-10856-4\_78}, doi = {10.1007/3-540-10856-4\_78}, timestamp = {Tue, 14 May 2019 10:00:37 +0200}, biburl = {https://dblp.org/rec/conf/mfcs/LeeuwenO81.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{maurer80, title={Dynamic systems of static data structures, Bericht 42, Inst. f}, author={Van Leeuwen, J and Maurer, HA}, journal={Informationsverarbeitung, TU Graz, Austria}, year={1980} } @book{maurer79, title={Dynamic solutions of decomposable searching problems}, author={Maurer, HA and Ottmann, Th}, year={1979}, publisher={Technische Universit{\"a}t Graz/Forschungszentrum Graz. Institut f{\"u}r~…} } @article{lsmgraph, author = {Song Yu and Shufeng Gong and Qian Tao and Sijie Shen and Yanfeng Zhang and Wenyuan Yu and Pengxi Liu and Zhixin Zhang and Hongfu Li and Xiaojian Luo and Ge Yu and Jingren Zhou}, title = {LSMGraph: {A} High-Performance Dynamic Graph Storage System with Multi-Level {CSR}}, journal = {Proc. {ACM} Manag. Data}, volume = {2}, number = {6}, pages = {243:1--243:28}, year = {2024}, url = {https://doi.org/10.1145/3698818}, doi = {10.1145/3698818}, timestamp = {Wed, 19 Mar 2025 21:16:37 +0100}, biburl = {https://dblp.org/rec/journals/pacmmod/YuGTSZYLZLLYZ24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{bkdtree, author = {Octavian Procopiuc and Pankaj K. Agarwal and Lars Arge and Jeffrey Scott Vitter}, editor = {Thanasis Hadzilacos and Yannis Manolopoulos and John F. Roddick and Yannis Theodoridis}, title = {Bkd-Tree: {A} Dznamic Scalable kd-Tree}, booktitle = {Advances in Spatial and Temporal Databases, 8th International Symposium, {SSTD} 2003, Santorini Island, Greece, July 24-27, 2003, Proceedings}, series = {Lecture Notes in Computer Science}, volume = {2750}, pages = {46--65}, publisher = {Springer}, year = {2003}, url = {https://doi.org/10.1007/978-3-540-45072-6\_4}, doi = {10.1007/978-3-540-45072-6\_4}, timestamp = {Tue, 21 Mar 2023 21:00:39 +0100}, biburl = {https://dblp.org/rec/conf/ssd/ProcopiucAAV03.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{perfect-hashing, title={Perfect hashing}, author={Czech, Zbigniew J and Havas, George and Majewski, Bohdan S}, journal={Theoretical Computer Science}, volume={182}, number={1-2}, pages={1--143}, year={1997}, publisher={Elsevier} } @online{ursa, title = {Brown Bear Genome, v1}, url = {https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_023065955.1/}, year = {2024} } @online{erickson-bsm-notes, title ={Lecture 1: Static-to-Dynamic Transformations}, author={Jeff Erickson}, url = {https://jeffe.cs.illinois.edu/teaching/datastructures/2015/notes/01-statictodynamic.pdf}, year = {2015} } @online{bigann, title = {BigANN Dataset}, url = {https://big-ann-benchmarks.com/neurips21.html}, year = {2024} } @online{english-words, title = {English Words Dataset}, url = {https://github.com/dwyl/english-words?tab=readme-ov-file}, year = {2024} } @article{quickselect, author = {C. A. R. Hoare}, title = {Algorithm 65: find}, journal = {Commun. {ACM}}, volume = {4}, number = {7}, pages = {321--322}, year = {1961}, url = {https://doi.org/10.1145/366622.366647}, doi = {10.1145/366622.366647}, timestamp = {Fri, 24 Mar 2023 16:31:07 +0100}, biburl = {https://dblp.org/rec/journals/cacm/Hoare61a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/jea/KandaKTMF20, author = {Shunsuke Kanda and Dominik K{\"{o}}ppl and Yasuo Tabei and Kazuhiro Morita and Masao Fuketa}, title = {Dynamic Path-decomposed Tries}, journal = {{ACM} J. Exp. Algorithmics}, volume = {25}, pages = {1--28}, year = {2020}, url = {https://doi.org/10.1145/3418033}, doi = {10.1145/3418033}, timestamp = {Sat, 08 Jan 2022 02:22:56 +0100}, biburl = {https://dblp.org/rec/journals/jea/KandaKTMF20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{m-bonsai, author = {Andreas Poyias and Simon J. Puglisi and Rajeev Raman}, title = {m-Bonsai: {A} Practical Compact Dynamic Trie}, journal = {Int. J. Found. Comput. Sci.}, volume = {29}, number = {8}, pages = {1257--1278}, year = {2018}, url = {https://doi.org/10.1142/S0129054118430025}, doi = {10.1142/S0129054118430025}, timestamp = {Sun, 19 Jan 2025 14:44:51 +0100}, biburl = {https://dblp.org/rec/journals/ijfcs/PoyiasPR18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{dynamic-trie, author = {Shunsuke Kanda and Dominik K{\"{o}}ppl and Yasuo Tabei and Kazuhiro Morita and Masao Fuketa}, title = {Dynamic Path-decomposed Tries}, journal = {{ACM} J. Exp. Algorithmics}, volume = {25}, pages = {1--28}, year = {2020}, url = {https://doi.org/10.1145/3418033}, doi = {10.1145/3418033}, timestamp = {Sat, 08 Jan 2022 02:22:56 +0100}, biburl = {https://dblp.org/rec/journals/jea/KandaKTMF20.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{alex-aca, author = {Rui Yang and Evgenios M. Kornaropoulos and Yue Cheng}, title = {Algorithmic Complexity Attacks on Dynamic Learned Indexes}, journal = {PVLDB}, volume = {17}, number = {4}, pages = {780--793}, year = {2023}, url = {https://www.vldb.org/pvldb/vol17/p780-yang.pdf}, timestamp = {Tue, 26 Mar 2024 22:14:29 +0100}, biburl = {https://dblp.org/rec/journals/pvldb/YangKC23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{cosine, author = {Subarna Chatterjee and Meena Jagadeesan and Wilson Qin and Stratos Idreos}, title = {Cosine: {A} Cloud-Cost Optimized Self-Designing Key-Value Storage Engine}, journal = {PVLDB}, volume = {15}, number = {1}, pages = {112--126}, year = {2021}, url = {http://www.vldb.org/pvldb/vol15/p112-chatterjee.pdf}, doi = {10.14778/3485450.3485461}, timestamp = {Thu, 21 Apr 2022 17:09:21 +0200}, biburl = {https://dblp.org/rec/journals/pvldb/ChatterjeeJQI21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{gist, author = {Joseph M. Hellerstein and Jeffrey F. Naughton and Avi Pfeffer}, editor = {Umeshwar Dayal and Peter M. D. Gray and Shojiro Nishio}, title = {Generalized Search Trees for Database Systems}, booktitle = {VLDB}, pages = {562--573}, publisher = {Morgan Kaufmann}, year = {1995}, url = {http://www.vldb.org/conf/1995/P562.PDF}, timestamp = {Tue, 20 Feb 2018 15:19:44 +0100}, biburl = {https://dblp.org/rec/conf/vldb/HellersteinNP95.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{concurrent-gist, author = {Marcel Kornacker and C. Mohan and Joseph M. Hellerstein}, editor = {Joan Peckham}, title = {Concurrency and Recovery in Generalized Search Trees}, booktitle = {SIGMOD}, pages = {62--72}, publisher = {{ACM} Press}, year = {1997}, url = {https://doi.org/10.1145/253260.253272}, doi = {10.1145/253260.253272}, timestamp = {Mon, 14 Jun 2021 15:39:36 +0200}, biburl = {https://dblp.org/rec/conf/sigmod/KornackerMH97.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{periodic-table, author = {Stratos Idreos and Kostas Zoumpatianos and Manos Athanassoulis and Niv Dayan and Brian Hentschel and Michael S. Kester and Demi Guo and Lukas M. Maas and Wilson Qin and Abdul Wasay and Yiyou Sun}, title = {The Periodic Table of Data Structures}, journal = {{IEEE} Data Eng. Bull.}, volume = {41}, number = {3}, pages = {64--75}, year = {2018}, url = {http://sites.computer.org/debull/A18sept/p64.pdf}, timestamp = {Tue, 10 Mar 2020 16:23:50 +0100}, biburl = {https://dblp.org/rec/journals/debu/IdreosZADHKGMQW18.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{ds-alchemy, author = {Stratos Idreos and Kostas Zoumpatianos and Subarna Chatterjee and Wilson Qin and Abdul Wasay and Brian Hentschel and Mike S. Kester and Niv Dayan and Demi Guo and Minseo Kang and Yiyou Sun}, title = {Learning Data Structure Alchemy}, journal = {{IEEE} Data Eng. Bull.}, volume = {42}, number = {2}, pages = {47--58}, year = {2019}, url = {http://sites.computer.org/debull/A19june/p47.pdf}, timestamp = {Tue, 10 Mar 2020 16:23:49 +0100}, biburl = {https://dblp.org/rec/journals/debu/IdreosZCQWHKDGK19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{gene, author = {Jens Dittrich and Joris Nix and Christian Sch{\"{o}}n}, title = {The next 50 Years in Database Indexing or: The Case for Automatically Generated Index Structures}, journal = {PVLDB}, volume = {15}, number = {3}, pages = {527--540}, year = {2021}, url = {http://www.vldb.org/pvldb/vol15/p527-dittrich.pdf}, doi = {10.14778/3494124.3494136}, timestamp = {Thu, 21 Apr 2022 17:09:21 +0200}, biburl = {https://dblp.org/rec/journals/pvldb/DittrichNS21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{fluid-ds, author = {Darshana Balakrishnan and Lukasz Ziarek and Oliver Kennedy}, editor = {Alvin Cheung and Kim Nguyen}, title = {Fluid data structures}, booktitle = {Proceedings of the 17th {ACM} {SIGPLAN} International Symposium on Database Programming Languages}, pages = {3--17}, publisher = {{ACM}}, year = {2019}, url = {https://doi.org/10.1145/3315507.3330197}, doi = {10.1145/3315507.3330197}, timestamp = {Sun, 12 Nov 2023 02:16:34 +0100}, biburl = {https://dblp.org/rec/conf/dbpl/BalakrishnanZK19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{10.1145/3332466.3374547, author = {Tang, Chuzhe and Wang, Youyun and Dong, Zhiyuan and Hu, Gansen and Wang, Zhaoguo and Wang, Minjie and Chen, Haibo}, title = {XIndex: A Scalable Learned Index for Multicore Data Storage}, year = {2020}, booktitle = {Proceedings of the 25th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, series = {PPoPP '20} } @article{10.14778/3551793.3551848, author = {Wongkham, Chaichon and Lu, Baotong and Liu, Chris and Zhong, Zhicong and Lo, Eric and Wang, Tianzheng}, title = {Are Updatable Learned Indexes Ready?}, year = {2022}, publisher = {VLDB Endowment}, volume = {15}, number = {11}, journal = {PVDLB}, } @article{aries, author = {C. Mohan and Don Haderle and Bruce G. Lindsay and Hamid Pirahesh and Peter M. Schwarz}, title = {{ARIES:} {A} Transaction Recovery Method Supporting Fine-Granularity Locking and Partial Rollbacks Using Write-Ahead Logging}, journal = {{ACM} Trans. Database Syst.}, volume = {17}, number = {1}, pages = {94--162}, year = {1992}, url = {https://doi.org/10.1145/128765.128770}, doi = {10.1145/128765.128770}, timestamp = {Wed, 29 May 2019 10:39:45 +0200}, biburl = {https://dblp.org/rec/journals/tods/MohanHLPS92.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{bsp, author = {Valiant, Leslie G.}, title = {A bridging model for parallel computation}, year = {1990}, issue_date = {Aug. 1990}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {33}, number = {8}, issn = {0001-0782}, url = {https://doi.org/10.1145/79173.79181}, doi = {10.1145/79173.79181}, abstract = {The success of the von Neumann model of sequential computation is attributable to the fact that it is an efficient bridge between software and hardware: high-level languages can be efficiently compiled on to this model; yet it can be effeciently implemented in hardware. The author argues that an analogous bridge between software and hardware in required for parallel computation if that is to become as widely used. This article introduces the bulk-synchronous parallel (BSP) model as a candidate for this role, and gives results quantifying its efficiency both in implementing high-level language features and algorithms, as well as in being implemented in hardware.}, journal = {Commun. ACM}, month = aug, pages = {103–111}, numpages = {9} } @article{userspace-preempt, author = {Huang, Kaisong and Zhou, Jiatang and Zhao, Zhuoyue and Xie, Dong and Wang, Tianzheng}, title = {Low-Latency Transaction Scheduling via Userspace Interrupts: Why Wait or Yield When You Can Preempt?}, year = {2025}, issue_date = {June 2025}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {3}, number = {3}, url = {https://doi.org/10.1145/3725319}, doi = {10.1145/3725319}, abstract = {Traditional non-preemptive scheduling can lead to long latency under workloads that mix long-running and short transactions with varying priorities. This occurs because worker threads tend to monopolize CPU cores until they finish processing long-running transactions. Thus, short transactions must wait for the CPU, leading to long latency. As an alternative, cooperative scheduling allows for transaction yielding, but it is difficult to tune for diverse workloads. Although preemption could potentially alleviate this issue, it has seen limited adoption in DBMSs due to the high delivery latency of software interrupts and concerns on wasting useful work induced by read-write lock conflicts in traditional lock-based DBMSs.In this paper, we propose PreemptDB, a new database engine that leverages recent userspace interrupts available in modern CPUs to enable efficient preemptive scheduling. We present an efficient transaction context switching mechanism purely in userspace and scheduling policies that prioritize short, high-priority transactions without significantly affecting long-running queries. Our evaluation demonstrates that PreemptDB significantly reduces end-to-end latency for high-priority transactions compared to non-preemptive FIFO and cooperative scheduling methods.}, journal = {Proc. ACM Manag. Data}, month = jun, articleno = {182}, numpages = {25}, keywords = {database systems, low-latency transactions, preemptive scheduling, user interrupts} } @article{dsp, author = {Jon Louis Bentley}, title = {Decomposable Searching Problems}, journal = {Inf. Process. Lett.}, volume = {8}, number = {5}, pages = {244--251}, year = {1979}, url = {https://doi.org/10.1016/0020-0190(79)90117-0}, doi = {10.1016/0020-0190(79)90117-0}, timestamp = {Wed, 14 Nov 2018 10:49:25 +0100}, biburl = {https://dblp.org/rec/journals/ipl/Bentley79.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }