From 5e4ad2777acc4c2420514e39fb98b7cf2e200996 Mon Sep 17 00:00:00 2001 From: Douglas Rumbaugh Date: Sun, 27 Apr 2025 17:36:57 -0400 Subject: Initial commit --- references/references.bib | 1419 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1419 insertions(+) create mode 100644 references/references.bib (limited to 'references/references.bib') diff --git a/references/references.bib b/references/references.bib new file mode 100644 index 0000000..b29e8ec --- /dev/null +++ b/references/references.bib @@ -0,0 +1,1419 @@ +% Encoding: UTF-8 + +@article{walker74, + author = {A.J. Walker}, + title = {New fast method for generating discrete random numbers with arbitrary frequency distributions}, + journal = {Electronics Letters}, + year = {1974}, + volume = {10}, + pages = {127-128(1)}, + issue = {8} +} + +@article{oneil96, + author = {Patrick E. O'Neil and + Edward Cheng and + Dieter Gawlick and + Elizabeth J. O'Neil}, + title = {The Log-Structured Merge-Tree (LSM-Tree)}, + journal = {Acta Informatica}, + volume = {33}, + number = {4}, + pages = {351--385}, + year = {1996}, + url = {https://doi.org/10.1007/s002360050048}, + doi = {10.1007/s002360050048}, + timestamp = {Sun, 21 Jun 2020 17:38:20 +0200}, + biburl = {https://dblp.org/rec/journals/acta/ONeilCGO96.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{xie21, + author = {Dong Xie and + Jeff M. Phillips and + Michael Matheny and + Feifei Li}, + editor = {Guoliang Li and + Zhanhuai Li and + Stratos Idreos and + Divesh Srivastava}, + title = {Spatial Independent Range Sampling}, + booktitle = {{SIGMOD} '21: International Conference on Management of Data, Virtual + Event, China, June 20-25, 2021}, + pages = {2023--2035}, + publisher = {{ACM}}, + year = {2021}, + url = {https://doi.org/10.1145/3448016.3452806}, + doi = {10.1145/3448016.3452806}, + timestamp = {Mon, 21 Jun 2021 11:48:44 +0200}, + biburl = {https://dblp.org/rec/conf/sigmod/0001PM021.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{bloom70, + author = {Burton H. Bloom}, + title = {Space/Time Trade-offs in Hash Coding with Allowable Errors}, + journal = {Commun. {ACM}}, + volume = {13}, + number = {7}, + pages = {422--426}, + year = {1970}, + url = {https://doi.org/10.1145/362686.362692}, + doi = {10.1145/362686.362692}, + timestamp = {Wed, 14 Nov 2018 10:22:32 +0100}, + biburl = {https://dblp.org/rec/journals/cacm/Bloom70.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{dayan17, + author = {Niv Dayan and + Manos Athanassoulis and + Stratos Idreos}, + editor = {Semih Salihoglu and + Wenchao Zhou and + Rada Chirkova and + Jun Yang and + Dan Suciu}, + title = {Monkey: Optimal Navigable Key-Value Store}, + booktitle = {Proceedings of the 2017 {ACM} International Conference on Management + of Data, {SIGMOD} Conference 2017, Chicago, IL, USA, May 14-19, 2017}, + pages = {79--94}, + publisher = {{ACM}}, + year = {2017}, + url = {https://doi.org/10.1145/3035918.3064054}, + doi = {10.1145/3035918.3064054}, + timestamp = {Thu, 14 Oct 2021 10:11:38 +0200}, + biburl = {https://dblp.org/rec/conf/sigmod/DayanAI17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{dayan18, + author = {Niv Dayan and + Stratos Idreos}, + editor = {Gautam Das and + Christopher M. Jermaine and + Philip A. Bernstein}, + title = {Dostoevsky: Better Space-Time Trade-Offs for LSM-Tree Based Key-Value + Stores via Adaptive Removal of Superfluous Merging}, + booktitle = {Proceedings of the 2018 International Conference on Management of + Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, + pages = {505--520}, + publisher = {{ACM}}, + year = {2018}, + url = {https://doi.org/10.1145/3183713.3196927}, + doi = {10.1145/3183713.3196927}, + timestamp = {Wed, 21 Nov 2018 12:44:08 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/DayanI18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{dayan22, + author = {Niv Dayan and + Tamar Weiss and + Shmuel Dashevsky and + Michael Pan and + Edward Bortnikov and + Moshe Twitto}, + title = {Spooky: Granulating LSM-Tree Compactions Correctly}, + journal = {Proc. {VLDB} Endow.}, + volume = {15}, + number = {11}, + pages = {3071--3084}, + year = {2022}, + url = {https://www.vldb.org/pvldb/vol15/p3071-dayan.pdf}, + timestamp = {Mon, 26 Sep 2022 17:09:16 +0200}, + biburl = {https://dblp.org/rec/journals/pvldb/DayanWDPBT22.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{zhao22, + author = {Zhuoyue Zhao and + Dong Xie and + Feifei Li}, + title = {AB-tree: Index for Concurrent Random Sampling and Updates}, + journal = {Proc. {VLDB} Endow.}, + volume = {15}, + number = {9}, + pages = {1835--1847}, + year = {2022}, + url = {https://www.vldb.org/pvldb/vol15/p1835-zhao.pdf}, + timestamp = {Tue, 26 Jul 2022 17:09:52 +0200}, + biburl = {https://dblp.org/rec/journals/pvldb/ZhaoXL22.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{balmau19, + author = {Oana Balmau and + Florin Dinu and + Willy Zwaenepoel and + Karan Gupta and + Ravishankar Chandhiramoorthi and + Diego Didona}, + editor = {Dahlia Malkhi and + Dan Tsafrir}, + title = {{SILK:} Preventing Latency Spikes in Log-Structured Merge Key-Value + Stores}, + booktitle = {2019 {USENIX} Annual Technical Conference, {USENIX} {ATC} 2019, Renton, + WA, USA, July 10-12, 2019}, + pages = {753--766}, + publisher = {{USENIX} Association}, + year = {2019}, + url = {https://www.usenix.org/conference/atc19/presentation/balmau}, + timestamp = {Mon, 01 Feb 2021 17:03:06 +0100}, + biburl = {https://dblp.org/rec/conf/usenix/BalmauDZGCD19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{afshani17, + author = {Peyman Afshani and + Zhewei Wei}, + editor = {Kirk Pruhs and + Christian Sohler}, + title = {Independent Range Sampling, Revisited}, + booktitle = {25th Annual European Symposium on Algorithms, {ESA} 2017, September + 4-6, 2017, Vienna, Austria}, + series = {LIPIcs}, + volume = {87}, + pages = {3:1--3:14}, + publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, + year = {2017}, + url = {https://doi.org/10.4230/LIPIcs.ESA.2017.3}, + doi = {10.4230/LIPIcs.ESA.2017.3}, + timestamp = {Tue, 11 Feb 2020 15:52:14 +0100}, + biburl = {https://dblp.org/rec/conf/esa/AfshaniW17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{tao22, + author = {Yufei Tao}, + editor = {Leonid Libkin and + Pablo Barcel{\'{o}}}, + title = {Algorithmic Techniques for Independent Query Sampling}, + booktitle = {{PODS} '22: International Conference on Management of Data, Philadelphia, + PA, USA, June 12 - 17, 2022}, + pages = {129--138}, + publisher = {{ACM}}, + year = {2022}, + url = {https://doi.org/10.1145/3517804.3526068}, + doi = {10.1145/3517804.3526068}, + timestamp = {Wed, 07 Dec 2022 23:12:46 +0100}, + biburl = {https://dblp.org/rec/conf/pods/Tao22.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{olken89, + author = {Frank Olken and + Doron Rotem}, + editor = {Peter M. G. Apers and + Gio Wiederhold}, + title = {Random Sampling from {B+} Trees}, + booktitle = {Proceedings of the Fifteenth International Conference on Very Large + Data Bases, August 22-25, 1989, Amsterdam, The Netherlands}, + pages = {269--277}, + publisher = {Morgan Kaufmann}, + year = {1989}, + url = {http://www.vldb.org/conf/1989/P269.PDF}, + timestamp = {Wed, 29 Mar 2017 16:45:23 +0200}, + biburl = {https://dblp.org/rec/conf/vldb/OlkenR89.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{allendorf23, + author = {Daniel Allendorf}, + title = {A Simple Data Structure for Maintaining a Discrete Probability Distribution}, + journal = {CoRR}, + volume = {abs/2302.05682}, + year = {2023}, + url = {https://doi.org/10.48550/arXiv.2302.05682}, + doi = {10.48550/arXiv.2302.05682}, + eprinttype = {arXiv}, + eprint = {2302.05682}, + timestamp = {Sun, 19 Feb 2023 18:44:53 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2302-05682.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{hagerup93, + author = {Torben Hagerup and + Kurt Mehlhorn and + J. Ian Munro}, + editor = {Andrzej Lingas and + Rolf G. Karlsson and + Svante Carlsson}, + title = {Maintaining Discrete Probability Distributions Optimally}, + booktitle = {Automata, Languages and Programming, 20nd International Colloquium, + ICALP93, Lund, Sweden, July 5-9, 1993, Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {700}, + pages = {253--264}, + publisher = {Springer}, + year = {1993}, + url = {https://doi.org/10.1007/3-540-56939-1\_77}, + doi = {10.1007/3-540-56939-1\_77}, + timestamp = {Tue, 14 May 2019 10:00:44 +0200}, + biburl = {https://dblp.org/rec/conf/icalp/HagerupMM93.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{matias03, + author = {Yossi Matias and + Jeffrey Scott Vitter and + Wen{-}Chun Ni}, + title = {Dynamic Generation of Discrete Random Variates}, + journal = {Theory Comput. Syst.}, + volume = {36}, + number = {4}, + pages = {329--358}, + year = {2003}, + url = {https://doi.org/10.1007/s00224-003-1078-6}, + doi = {10.1007/s00224-003-1078-6}, + timestamp = {Tue, 21 Mar 2023 21:14:25 +0100}, + biburl = {https://dblp.org/rec/journals/mst/MatiasVN03.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{vose91, + author = {Michael D. Vose}, + title = {A Linear Algorithm For Generating Random Numbers With a Given Distribution}, + journal = {{IEEE} Trans. Software Eng.}, + volume = {17}, + number = {9}, + pages = {972--975}, + year = {1991}, + url = {https://doi.org/10.1109/32.92917}, + doi = {10.1109/32.92917}, + timestamp = {Wed, 17 May 2017 10:56:35 +0200}, + biburl = {https://dblp.org/rec/journals/tse/Vose91.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{dong21, + author = {Siying Dong and + Andrew Kryczka and + Yanqin Jin and + Michael Stumm}, + title = {RocksDB: Evolution of Development Priorities in a Key-value Store + Serving Large-scale Applications}, + journal = {{ACM} Trans. Storage}, + volume = {17}, + number = {4}, + pages = {26:1--26:32}, + year = {2021}, + url = {https://doi.org/10.1145/3483840}, + doi = {10.1145/3483840}, + timestamp = {Sun, 12 Feb 2023 18:49:27 +0100}, + biburl = {https://dblp.org/rec/journals/tos/DongKJS21.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{dayan18-1, + author = {Niv Dayan and + Manos Athanassoulis and + Stratos Idreos}, + title = {Optimal Bloom Filters and Adaptive Merging for LSM-Trees}, + journal = {{ACM} Trans. Database Syst.}, + volume = {43}, + number = {4}, + pages = {16:1--16:48}, + year = {2018}, + url = {https://doi.org/10.1145/3276980}, + doi = {10.1145/3276980}, + timestamp = {Sat, 05 Sep 2020 17:52:22 +0200}, + biburl = {https://dblp.org/rec/journals/tods/DayanAI18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{zhu21, + author = {Zichen Zhu and + Ju Hyoung Mun and + Aneesh Raman and + Manos Athanassoulis}, + editor = {Danica Porobic and + Spyros Blanas}, + title = {Reducing Bloom Filter {CPU} Overhead in LSM-Trees on Modern Storage + Devices}, + booktitle = {Proceedings of the 17th International Workshop on Data Management + on New Hardware, DaMoN 2021, 21 June 2021, Virtual Event, China}, + pages = {1:1--1:10}, + publisher = {{ACM}}, + year = {2021}, + url = {https://doi.org/10.1145/3465998.3466002}, + doi = {10.1145/3465998.3466002}, + timestamp = {Thu, 14 Oct 2021 09:48:02 +0200}, + biburl = {https://dblp.org/rec/conf/damon/ZhuMRA21.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{dayan19, + author = {Niv Dayan and + Stratos Idreos}, + editor = {Peter A. Boncz and + Stefan Manegold and + Anastasia Ailamaki and + Amol Deshpande and + Tim Kraska}, + title = {The Log-Structured Merge-Bush {\&} the Wacky Continuum}, + booktitle = {Proceedings of the 2019 International Conference on Management of + Data, {SIGMOD} Conference 2019, Amsterdam, The Netherlands, June 30 + - July 5, 2019}, + pages = {449--466}, + publisher = {{ACM}}, + year = {2019}, + url = {https://doi.org/10.1145/3299869.3319903}, + doi = {10.1145/3299869.3319903}, + timestamp = {Sat, 22 Jun 2019 17:10:04 +0200}, + biburl = {https://dblp.org/rec/conf/sigmod/DayanI19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{hu14, + author = {Xiaocheng Hu and + Miao Qiao and + Yufei Tao}, + editor = {Richard Hull and + Martin Grohe}, + title = {Independent range sampling}, + booktitle = {Proceedings of the 33rd {ACM} {SIGMOD-SIGACT-SIGART} Symposium on + Principles of Database Systems, PODS'14, Snowbird, UT, USA, June 22-27, + 2014}, + pages = {246--255}, + publisher = {{ACM}}, + year = {2014}, + url = {https://doi.org/10.1145/2594538.2594545}, + doi = {10.1145/2594538.2594545}, + timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, + biburl = {https://dblp.org/rec/conf/pods/HuQT14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{saxe79, + author = {James B. Saxe and + Jon Louis Bentley}, + title = {Transforming Static Data Structures to Dynamic Structures (Abridged + Version)}, + booktitle = {20th Annual Symposium on Foundations of Computer Science, San Juan, + Puerto Rico, 29-31 October 1979}, + pages = {148--168}, + publisher = {{IEEE} Computer Society}, + year = {1979}, + url = {https://doi.org/10.1109/SFCS.1979.47}, + doi = {10.1109/SFCS.1979.47}, + timestamp = {Thu, 23 Mar 2023 23:57:52 +0100}, + biburl = {https://dblp.org/rec/conf/focs/SaxeB79.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{overmars81, + author = {Mark H. Overmars and + Jan van Leeuwen}, + title = {Worst-Case Optimal Insertion and Deletion Methods for Decomposable + Searching Problems}, + journal = {Inf. Process. Lett.}, + volume = {12}, + number = {4}, + pages = {168--173}, + year = {1981}, + url = {https://doi.org/10.1016/0020-0190(81)90093-4}, + doi = {10.1016/0020-0190(81)90093-4}, + timestamp = {Fri, 26 May 2017 22:54:44 +0200}, + biburl = {https://dblp.org/rec/journals/ipl/OvermarsL81a.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{naidan14, + author = {Bilegsaikhan Naidan and + Magnus Lie Hetland}, + title = {Static-to-dynamic transformation for metric indexing structures (extended + version)}, + journal = {Inf. Syst.}, + volume = {45}, + pages = {48--60}, + year = {2014}, + url = {https://doi.org/10.1016/j.is.2013.08.002}, + doi = {10.1016/j.is.2013.08.002}, + timestamp = {Sat, 20 May 2017 00:24:08 +0200}, + biburl = {https://dblp.org/rec/journals/is/NaidanH14.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{almodaresi23, + author = {Fatemeh Almodaresi and + Jamshed Khan and + Sergey Madaminov and + Michael Ferdman and + Rob Johnson and + Prashant Pandey and + Rob Patro}, + title = {An incrementally updatable and scalable system for large-scale sequence + search using the Bentley-Saxe transformation}, + journal = {Bioinform.}, + volume = {38}, + number = {12}, + pages = {3155--3163}, + year = {2022}, + url = {https://doi.org/10.1093/bioinformatics/btac142}, + doi = {10.1093/bioinformatics/btac142}, + timestamp = {Mon, 22 Aug 2022 08:21:09 +0200}, + biburl = {https://dblp.org/rec/journals/bioinformatics/AlmodaresiKMFJP22.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@book{bulmer79, + title = {Principles of Statistics}, + author = {M.G. Bulmer}, + year = {1979}, + publisher = {Dover}, + address = {New York} +} + +@inproceedings{olken86, + author = {Frank Olken and + Doron Rotem}, + editor = {Wesley W. Chu and + Georges Gardarin and + Setsuo Ohsuga and + Yahiko Kambayashi}, + title = {Simple Random Sampling from Relational Databases}, + booktitle = {VLDB'86 Twelfth International Conference on Very Large Data Bases, + August 25-28, 1986, Kyoto, Japan, Proceedings}, + pages = {160--169}, + publisher = {Morgan Kaufmann}, + year = {1986}, + url = {http://www.vldb.org/conf/1986/P160.PDF}, + timestamp = {Wed, 29 Mar 2017 16:45:23 +0200}, + biburl = {https://dblp.org/rec/conf/vldb/OlkenR86.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@misc {postgres-doc, + title = {PostgreSQL Documentation}, + url = {https://www.postgresql.org/docs/15/sql-select.html}, + year = {2023} +} + + +@online {pinecone, + title = {Pinecone DB}, + url = {https://www.pinecone.io/}, + year = {2023} +} + +@online {neptune, + title = {Amazon Neptune}, + url = {https://aws.amazon.com/neptune/}, + year = {2023} +} + +@online {teradata, + title = {Teradata}, + url = {https://www.teradata.com/}, + year = {2023} +} + +@online {pinot, + title = {Apache Pinot}, + url = {https://pinot.apache.org/}, + year = {2023} +} + +@online {pinecone-db, + title = {Pinecone DB: Hierarchical Navigable Small Worlds}, + url = {https://www.pinecone.io/learn/series/faiss/hnsw/}, + year = {2023} +} + +@online {postgis-doc, + title = {Introduction to PostGIS: Spatial Indexing}, + url = {https://postgis.net/workshops/postgis-intro/indexing.html}, + year = {2023} +} + +@online {mysql-btree-hash, + title = {MySQL Documentation - Comparison of B-tree and Hash Indexes}, + url = {https://dev.mysql.com/doc/refman/8.0/en/index-btree-hash.html}, + year = {2023} +} + +@article{olken95, +title = {Random sampling from databases: a survey}, +journal = {Statistics and Computing}, +volume = {5}, +pages = {25-42}, +year = {1995}, +doi = {https://doi.org/10.1007/BF00140664}, +author = {Frank Olken and Doron Rotem} +} + +@inproceedings{hu15, + author = {Xiaocheng Hu and + Miao Qiao and + Yufei Tao}, + editor = {Tova Milo and + Diego Calvanese}, + title = {External Memory Stream Sampling}, + booktitle = {Proceedings of the 34th {ACM} Symposium on Principles of Database + Systems, {PODS} 2015, Melbourne, Victoria, Australia, May 31 - June + 4, 2015}, + pages = {229--239}, + publisher = {{ACM}}, + year = {2015}, + url = {https://doi.org/10.1145/2745754.2745757}, + doi = {10.1145/2745754.2745757}, + timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, + biburl = {https://dblp.org/rec/conf/pods/HuQT15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{vitter85, + author = {Jeffrey Scott Vitter}, + title = {Random Sampling with a Reservoir}, + journal = {{ACM} Trans. Math. Softw.}, + volume = {11}, + number = {1}, + pages = {37--57}, + year = {1985}, + url = {https://doi.org/10.1145/3147.3165}, + doi = {10.1145/3147.3165}, + timestamp = {Tue, 21 Mar 2023 21:15:05 +0100}, + biburl = {https://dblp.org/rec/journals/toms/Vitter85.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{zhang18, + author = {Huanchen Zhang and + Hyeontaek Lim and + Viktor Leis and + David G. Andersen and + Michael Kaminsky and + Kimberly Keeton and + Andrew Pavlo}, + editor = {Gautam Das and + Christopher M. Jermaine and + Philip A. Bernstein}, + title = {SuRF: Practical Range Query Filtering with Fast Succinct Tries}, + booktitle = {Proceedings of the 2018 International Conference on Management of + Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, + pages = {323--336}, + publisher = {{ACM}}, + year = {2018}, + url = {https://doi.org/10.1145/3183713.3196931}, + doi = {10.1145/3183713.3196931}, + timestamp = {Sun, 25 Oct 2020 22:52:40 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/ZhangLLAKKP18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{siqiang20, + author = {Siqiang Luo and + Subarna Chatterjee and + Rafael Ketsetsidis and + Niv Dayan and + Wilson Qin and + Stratos Idreos}, + editor = {David Maier and + Rachel Pottinger and + AnHai Doan and + Wang{-}Chiew Tan and + Abdussalam Alawini and + Hung Q. Ngo}, + title = {Rosetta: {A} Robust Space-Time Optimized Range Filter for Key-Value + Stores}, + booktitle = {Proceedings of the 2020 International Conference on Management of + Data, {SIGMOD} Conference 2020, online conference [Portland, OR, USA], + June 14-19, 2020}, + pages = {2071--2086}, + publisher = {{ACM}}, + year = {2020}, + url = {https://doi.org/10.1145/3318464.3389731}, + doi = {10.1145/3318464.3389731}, + timestamp = {Wed, 04 May 2022 13:02:28 +0200}, + biburl = {https://dblp.org/rec/conf/sigmod/LuoCKDQI20.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@inproceedings{blinkdb, + author = {Sameer Agarwal and + Barzan Mozafari and + Aurojit Panda and + Henry Milner and + Samuel Madden and + Ion Stoica}, + editor = {Zdenek Hanz{\'{a}}lek and + Hermann H{\"{a}}rtig and + Miguel Castro and + M. Frans Kaashoek}, + title = {BlinkDB: queries with bounded errors and bounded response times on + very large data}, + booktitle = {Eighth Eurosys Conference 2013, EuroSys '13, Prague, Czech Republic, + April 14-17, 2013}, + pages = {29--42}, + publisher = {{ACM}}, + year = {2013}, + url = {https://doi.org/10.1145/2465351.2465355}, + doi = {10.1145/2465351.2465355}, + timestamp = {Wed, 06 Jul 2022 14:43:33 +0200}, + biburl = {https://dblp.org/rec/conf/eurosys/AgarwalMPMMS13.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{sps, + author = {Bolin Ding and + Silu Huang and + Surajit Chaudhuri and + Kaushik Chakrabarti and + Chi Wang}, + editor = {Fatma {\"{O}}zcan and + Georgia Koutrika and + Sam Madden}, + title = {Sample + Seek: Approximating Aggregates with Distribution Precision + Guarantee}, + booktitle = {Proceedings of the 2016 International Conference on Management of + Data, {SIGMOD} Conference 2016, San Francisco, CA, USA, June 26 - + July 01, 2016}, + pages = {679--694}, + publisher = {{ACM}}, + year = {2016}, + url = {https://doi.org/10.1145/2882903.2915249}, + doi = {10.1145/2882903.2915249}, + timestamp = {Wed, 14 Nov 2018 10:56:20 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/DingHCC016.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@phdthesis{olken-thesis, + author = {Frank Olken}, + title = {Random Sampling from Databases}, + school = {University of California at Berkeley}, + year = {1993}, + timestamp = {Thu, 03 Jan 2002 12:33:26 +0100}, + biburl = {https://dblp.org/rec/phd/Olken93.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{ml-sampling, + author = {Silu Huang and + Chi Wang and + Bolin Ding and + Surajit Chaudhuri}, + title = {Efficient Identification of Approximate Best Configuration of Training + in Large Datasets}, + booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI} + 2019, The Thirty-First Innovative Applications of Artificial Intelligence + Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational + Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii, + USA, January 27 - February 1, 2019}, + pages = {3862--3869}, + publisher = {{AAAI} Press}, + year = {2019}, + url = {https://doi.org/10.1609/aaai.v33i01.33013862}, + doi = {10.1609/aaai.v33i01.33013862}, + timestamp = {Tue, 02 Feb 2021 08:00:44 +0100}, + biburl = {https://dblp.org/rec/conf/aaai/Huang0DC19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{quickr, + author = {Srikanth Kandula and + Anil Shanbhag and + Aleksandar Vitorovic and + Matthaios Olma and + Robert Grandl and + Surajit Chaudhuri and + Bolin Ding}, + editor = {Fatma {\"{O}}zcan and + Georgia Koutrika and + Sam Madden}, + title = {Quickr: Lazily Approximating Complex AdHoc Queries in BigData Clusters}, + booktitle = {Proceedings of the 2016 International Conference on Management of + Data, {SIGMOD} Conference 2016, San Francisco, CA, USA, June 26 - + July 01, 2016}, + pages = {631--646}, + publisher = {{ACM}}, + year = {2016}, + url = {https://doi.org/10.1145/2882903.2882940}, + doi = {10.1145/2882903.2882940}, + timestamp = {Wed, 14 Nov 2018 10:56:20 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/KandulaSVOGCD16.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{verdict, + author = {Yongjoo Park and + Barzan Mozafari and + Joseph Sorenson and + Junhao Wang}, + editor = {Gautam Das and + Christopher M. Jermaine and + Philip A. Bernstein}, + title = {VerdictDB: Universalizing Approximate Query Processing}, + booktitle = {Proceedings of the 2018 International Conference on Management of + Data, {SIGMOD} Conference 2018, Houston, TX, USA, June 10-15, 2018}, + pages = {1461--1476}, + publisher = {{ACM}}, + year = {2018}, + url = {https://doi.org/10.1145/3183713.3196905}, + doi = {10.1145/3183713.3196905}, + timestamp = {Wed, 21 Nov 2018 12:44:08 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/ParkMSW18.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{irsra, + author = {Peyman Afshani and + Jeff M. Phillips}, + editor = {Gill Barequet and + Yusu Wang}, + title = {Independent Range Sampling, Revisited Again}, + booktitle = {35th International Symposium on Computational Geometry, SoCG 2019, + June 18-21, 2019, Portland, Oregon, {USA}}, + series = {LIPIcs}, + volume = {129}, + pages = {4:1--4:13}, + publisher = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik}, + year = {2019}, + url = {https://doi.org/10.4230/LIPIcs.SoCG.2019.4}, + doi = {10.4230/LIPIcs.SoCG.2019.4}, + timestamp = {Mon, 02 Jan 2023 09:02:13 +0100}, + biburl = {https://dblp.org/rec/conf/compgeom/AfshaniP19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{data-twitter1, + author = {Haewoon Kwak and + Changhyun Lee and + Hosung Park and + Sue B. Moon}, + editor = {Michael Rappa and + Paul Jones and + Juliana Freire and + Soumen Chakrabarti}, + title = {What is Twitter, a social network or a news media?}, + booktitle = {Proceedings of the 19th International Conference on World Wide Web, + {WWW} 2010, Raleigh, North Carolina, USA, April 26-30, 2010}, + pages = {591--600}, + publisher = {{ACM}}, + year = {2010}, + url = {https://doi.org/10.1145/1772690.1772751}, + doi = {10.1145/1772690.1772751}, + timestamp = {Sun, 02 Jun 2019 21:15:56 +0200}, + biburl = {https://dblp.org/rec/conf/www/KwakLPM10.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@online{data-twitter, + title = {Twitter Dataset}, + url = {https://github.com/ANLAB-KAIST/traces/releases/tag/twitter_rv.net}, + year = {2023} +} + +@online{data-delicious, + title = {Delicious Dataset}, + url = {http://konect.cc/networks/delicious-ti/}, + year = {2023} +} + +@online{data-osm, + title = {Open Street Map Dataset}, + url = {https://planet.openstreetmap.org/}, + year = {2023} +} + +@inproceedings{golan-gueta15, + author = {Guy Golan{-}Gueta and + Edward Bortnikov and + Eshcar Hillel and + Idit Keidar}, + editor = {Laurent R{\'{e}}veill{\`{e}}re and + Tim Harris and + Maurice Herlihy}, + title = {Scaling concurrent log-structured data stores}, + booktitle = {Proceedings of the Tenth European Conference on Computer Systems, + EuroSys 2015, Bordeaux, France, April 21-24, 2015}, + pages = {32:1--32:14}, + publisher = {{ACM}}, + year = {2015}, + url = {https://doi.org/10.1145/2741948.2741973}, + doi = {10.1145/2741948.2741973}, + timestamp = {Wed, 14 Nov 2018 10:57:04 +0100}, + biburl = {https://dblp.org/rec/conf/eurosys/Golan-GuetaBHK15.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{custers19, + author = {Bram Custers and + Mees van de Kerkhof and + Wouter Meulemans and + Bettina Speckmann and + Frank Staals}, + editor = {Farnoush Banaei Kashani and + Goce Trajcevski and + Ralf Hartmut G{\"{u}}ting and + Lars Kulik and + Shawn D. Newsam}, + title = {Maximum Physically Consistent Trajectories}, + booktitle = {Proceedings of the 27th {ACM} {SIGSPATIAL} International Conference + on Advances in Geographic Information Systems, {SIGSPATIAL} 2019, + Chicago, IL, USA, November 5-8, 2019}, + pages = {79--88}, + publisher = {{ACM}}, + year = {2019}, + url = {https://doi.org/10.1145/3347146.3359363}, + doi = {10.1145/3347146.3359363}, + timestamp = {Thu, 14 Nov 2019 10:14:43 +0100}, + biburl = {https://dblp.org/rec/conf/gis/CustersKMSS19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{aumuller20, + author = {Martin Aum{\"{u}}ller and + Rasmus Pagh and + Francesco Silvestri}, + editor = {Dan Suciu and + Yufei Tao and + Zhewei Wei}, + title = {Fair Near Neighbor Search: Independent Range Sampling in High Dimensions}, + booktitle = {Proceedings of the 39th {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles + of Database Systems, {PODS} 2020, Portland, OR, USA, June 14-19, 2020}, + pages = {191--204}, + publisher = {{ACM}}, + year = {2020}, + url = {https://doi.org/10.1145/3375395.3387648}, + doi = {10.1145/3375395.3387648}, + timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, + biburl = {https://dblp.org/rec/conf/pods/0001P020.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{cohen23, + author = {Edith Cohen}, + editor = {Floris Geerts and + Hung Q. Ngo and + Stavros Sintos}, + title = {Sampling Big Ideas in Query Optimization}, + booktitle = {Proceedings of the 42nd {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles + of Database Systems, {PODS} 2023, Seattle, WA, USA, June 18-23, 2023}, + pages = {361--371}, + publisher = {{ACM}}, + year = {2023}, + url = {https://doi.org/10.1145/3584372.3589935}, + doi = {10.1145/3584372.3589935}, + timestamp = {Thu, 15 Jun 2023 21:57:01 +0200}, + biburl = {https://dblp.org/rec/conf/pods/Cohen23.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + + +@inproceedings{gryz04, + author = {Jarek Gryz and + Junjie Guo and + Linqi Liu and + Calisto Zuzarte}, + editor = {Gerhard Weikum and + Arnd Christian K{\"{o}}nig and + Stefan De{\ss}loch}, + title = {Query Sampling in {DB2} Universal Database}, + booktitle = {Proceedings of the {ACM} {SIGMOD} International Conference on Management + of Data, Paris, France, June 13-18, 2004}, + pages = {839--843}, + publisher = {{ACM}}, + year = {2004}, + url = {https://doi.org/10.1145/1007568.1007664}, + doi = {10.1145/1007568.1007664}, + timestamp = {Thu, 11 Mar 2021 15:20:15 +0100}, + biburl = {https://dblp.org/rec/conf/sigmod/GryzGLZ04.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{ben-eliezer20, + author = {Omri Ben{-}Eliezer and + Eylon Yogev}, + editor = {Dan Suciu and + Yufei Tao and + Zhewei Wei}, + title = {The Adversarial Robustness of Sampling}, + booktitle = {Proceedings of the 39th {ACM} {SIGMOD-SIGACT-SIGAI} Symposium on Principles + of Database Systems, {PODS} 2020, Portland, OR, USA, June 14-19, 2020}, + pages = {49--62}, + publisher = {{ACM}}, + year = {2020}, + url = {https://doi.org/10.1145/3375395.3387643}, + doi = {10.1145/3375395.3387643}, + timestamp = {Thu, 29 Sep 2022 08:01:46 +0200}, + biburl = {https://dblp.org/rec/conf/pods/Ben-EliezerY20.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{li19, + author = {Feifei Li and + Bin Wu and + Ke Yi and + Zhuoyue Zhao}, + title = {Wander Join and {XDB:} Online Aggregation via Random Walks}, + journal = {{ACM} Trans. Database Syst.}, + volume = {44}, + number = {1}, + pages = {2:1--2:41}, + year = {2019}, + url = {https://doi.org/10.1145/3284551}, + doi = {10.1145/3284551}, + timestamp = {Sun, 02 Oct 2022 15:51:46 +0200}, + biburl = {https://dblp.org/rec/journals/tods/LiWYZ19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{rdd, + author = {Matei Zaharia and + Mosharaf Chowdhury and + Tathagata Das and + Ankur Dave and + Justin Ma and + Murphy McCauly and + Michael J. Franklin and + Scott Shenker and + Ion Stoica}, + editor = {Steven D. Gribble and + Dina Katabi}, + title = {Resilient Distributed Datasets: {A} Fault-Tolerant Abstraction for + In-Memory Cluster Computing}, + booktitle = {Proceedings of the 9th {USENIX} Symposium on Networked Systems Design + and Implementation, {NSDI} 2012, San Jose, CA, USA, April 25-27, 2012}, + pages = {15--28}, + publisher = {{USENIX} Association}, + year = {2012}, + url = {https://www.usenix.org/conference/nsdi12/technical-sessions/presentation/zaharia}, + timestamp = {Tue, 21 Mar 2023 21:02:49 +0100}, + biburl = {https://dblp.org/rec/conf/nsdi/ZahariaCDDMMFSS12.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{hdfs, + author = {Konstantin Shvachko and + Hairong Kuang and + Sanjay Radia and + Robert Chansler}, + editor = {Mohammed G. Khatib and + Xubin He and + Michael Factor}, + title = {The Hadoop Distributed File System}, + booktitle = {{IEEE} 26th Symposium on Mass Storage Systems and Technologies, {MSST} + 2012, Lake Tahoe, Nevada, USA, May 3-7, 2010}, + pages = {1--10}, + publisher = {{IEEE} Computer Society}, + year = {2010}, + url = {https://doi.org/10.1109/MSST.2010.5496972}, + doi = {10.1109/MSST.2010.5496972}, + timestamp = {Fri, 24 Mar 2023 00:01:51 +0100}, + biburl = {https://dblp.org/rec/conf/mss/ShvachkoKRC10.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + +@book{overmars83, + author = {Mark H. Overmars}, + title = {The Design of Dynamic Data Structures}, + series = {Lecture Notes in Computer Science}, + volume = {156}, + publisher = {Springer}, + year = {1983}, + url = {https://doi.org/10.1007/BFb0014927}, + doi = {10.1007/BFb0014927}, + isbn = {3-540-12330-X}, + timestamp = {Tue, 14 May 2019 10:00:35 +0200}, + biburl = {https://dblp.org/rec/books/sp/Overmars83.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@inproceedings{vptree, + author = {Peter N. Yianilos}, + editor = {Vijaya Ramachandran}, + title = {Data Structures and Algorithms for Nearest Neighbor Search in General + Metric Spaces}, + booktitle = {Proceedings of the Fourth Annual {ACM/SIGACT-SIAM} Symposium on Discrete + Algorithms, 25-27 January 1993, Austin, Texas, {USA}}, + year = {1993}, +} + +@inproceedings{mtree, + author = {Paolo Ciaccia and + Marco Patella and + Pavel Zezula}, + title = {M-tree: An Efficient Access Method for Similarity Search in Metric + Spaces}, + booktitle = {VLDB'97, Proceedings of 23rd International Conference on Very Large Data Bases}, + year = {1997}, +} + +@inproceedings{ALEX, + author = {Jialin Ding and + Umar Farooq Minhas and + Jia Yu and + Chi Wang and + Jaeyoung Do and + Yinan Li and + Hantian Zhang and + Badrish Chandramouli and + Johannes Gehrke and + Donald Kossmann and + David B. Lomet and + Tim Kraska}, + title = {{ALEX:} An Updatable Adaptive Learned Index}, + booktitle = {Proceedings of the 2020 ACM International Conference on Management of + Data}, + year = {2020}, +} + +@article{pgm, + author = {Paolo Ferragina and + Giorgio Vinciguerra}, + title = {The PGM-index: a fully-dynamic compressed learned index with provable + worst-case bounds}, + journal = {Proc. {VLDB} Endow.}, + volume = {13}, + number = {8}, + year = {2020}, +} + +@article{sosd-datasets, + author = {Ryan Marcus and + Andreas Kipf and + Alexander van Renen and + Mihail Stoian and + Sanchit Misra and + Alfons Kemper and + Thomas Neumann and + Tim Kraska}, + title = {Benchmarking Learned Indexes}, + journal = {Proc. {VLDB} Endow.}, + volume = {14}, + number = {1}, + pages = {1--13}, + year = {2020} +} + +@inproceedings{RMI, +author = {Kraska, Tim and Beutel, Alex and Chi, Ed H. and Dean, Jeffrey and Polyzotis, Neoklis}, +title = {The Case for Learned Index Structures}, +year = {2018}, +booktitle = {Proceedings of the 2018 International Conference on Management of Data}, +series = {SIGMOD '18} +} + +@article{10.14778/3551793.3551848, +author = {Wongkham, Chaichon and Lu, Baotong and Liu, Chris and Zhong, Zhicong and Lo, Eric and Wang, Tianzheng}, +title = {Are Updatable Learned Indexes Ready?}, +year = {2022}, +publisher = {VLDB Endowment}, +volume = {15}, +number = {11}, +journal = {Proc. VLDB Endow.}, +} + +@article{10.14778/2850583.2850584, +author = {Wang, Lu and Christensen, Robert and Li, Feifei and Yi, Ke}, +title = {Spatial Online Sampling and Aggregation}, +year = {2015}, +publisher = {VLDB Endowment}, +volume = {9}, +number = {3}, +journal = {Proc. VLDB Endow.}, +} + +@article{plex, + author = {Mihail Stoian and + Andreas Kipf and + Ryan Marcus and + Tim Kraska}, + title = {{PLEX:} Towards Practical Learned Indexing}, + journal = {CoRR}, + volume = {abs/2108.05117}, + year = {2021}, +} + +@misc{sbw, + author = {Cardellino, Cristian}, + title = {Spanish {B}illion {W}ords {C}orpus and {E}mbeddings}, + url = {https://crscardellino.github.io/SBWCE/}, + month = {August}, + year = {2019} +} + +@article{DBLP:journals/corr/abs-1712-01208, + author = {Tim Kraska and + Alex Beutel and + Ed H. Chi and + Jeffrey Dean and + Neoklis Polyzotis}, + title = {The Case for Learned Index Structures}, + journal = {CoRR}, + year = {2017}, +} + +@article{DBLP:journals/corr/abs-1903-00507, + author = {Giorgio Vinciguerra and + Paolo Ferragina and + Michele Miccinesi}, + title = {Superseding traditional indexes by orchestrating learning and geometry}, + journal = {CoRR}, + year = {2019}, +} + +@article{DBLP:journals/corr/abs-1905-08898, + author = {Jialin Ding and + Umar Farooq Minhas and + Hantian Zhang and + Yinan Li and + Chi Wang and + Badrish Chandramouli and + Johannes Gehrke and + Donald Kossmann and + David B. Lomet}, + title = {{ALEX:} An Updatable Adaptive Learned Index}, + journal = {CoRR}, + year = {2019}, +} + +@inproceedings{10.1145/3332466.3374547, +author = {Tang, Chuzhe and Wang, Youyun and Dong, Zhiyuan and Hu, Gansen and Wang, Zhaoguo and Wang, Minjie and Chen, Haibo}, +title = {XIndex: A Scalable Learned Index for Multicore Data Storage}, +year = {2020}, +booktitle = {Proceedings of the 25th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, +series = {PPoPP '20} +} + +@article{10.14778/3489496.3489512, +author = {Li, Pengfei and Hua, Yu and Jia, Jingnan and Zuo, Pengfei}, +title = {FINEdex: A Fine-Grained Learned Index Scheme for Scalable and Concurrent Memory Systems}, +year = {2021}, +publisher = {VLDB Endowment}, +volume = {15}, +number = {2}, +journal = {Proc. VLDB Endow.}, +} + +@inproceedings{10.1145/2933349.2933352, +author = {Leis, Viktor and Scheibner, Florian and Kemper, Alfons and Neumann, Thomas}, +title = {The ART of Practical Synchronization}, +year = {2016}, +booktitle = {Proceedings of the 12th International Workshop on Data Management on New Hardware}, +series = {DaMoN '16} +} + +@article{DBLP:journals/corr/abs-1910-06169, + author = {Paolo Ferragina and + Giorgio Vinciguerra}, + title = {The PGM-index: a multicriteria, compressed and learned approach to + data indexing}, + journal = {CoRR}, + year = {2019}, +} + +@article{byods-datalog, +author = {Sahebolamri, Arash and Barrett, Langston and Moore, Scott and Micinski, Kristopher}, +title = {Bring Your Own Data Structures to Datalog}, +year = {2023}, +issue_date = {October 2023}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {7}, +number = {OOPSLA2}, +url = {https://doi.org/10.1145/3622840}, +doi = {10.1145/3622840}, +abstract = {The restricted logic programming language Datalog has become a popular implementation target for deductive-analytic workloads including social-media analytics and program analysis. Modern Datalog engines compile Datalog rules to joins over explicit representations of relations—often B-trees or hash maps. While these modern engines have enabled high scalability in many application domains, they have a crucial weakness: achieving the desired algorithmic complexity may be impossible due to representation-imposed overhead of the engine’s data structures. In this paper, we present the "Bring Your Own Data Structures" (Byods) approach, in the form of a DSL embedded in Rust. Using Byods, an engineer writes logical rules which are implicitly parametric on the concrete data structure representation; our implementation provides an interface to enable "bringing their own" data structures to represent relations, which harmoniously interact with code generated by our compiler (implemented as Rust procedural macros). We formalize the semantics of Byods as an extension of Datalog’s; our formalization captures the key properties demanded of data structures compatible with Byods, including properties required for incrementalized (semi-na\"{\i}ve) evaluation. We detail many applications of the Byods approach, implementing analyses requiring specialized data structures for transitive and equivalence relations to scale, including an optimized version of the Rust borrow checker Polonius; highly-parallel PageRank made possible by lattices; and a large-scale analysis of LLVM utilizing index-sharing to scale. Our results show that Byods offers both improved algorithmic scalability (reduced time and/or space complexity) and runtimes competitive with state-of-the-art parallelizing Datalog solvers.}, +journal = {Proc. ACM Program. Lang.}, +month = {oct}, +articleno = {264}, +numpages = {26}, +keywords = {Program Analysis, Logic Programming, Static Analysis, Datalog} +} + + +@inproceedings{avl, + title={An algorithm for organization of information}, + author={Adelson-Velskii, Georgii Maksimovich and Landis, Evgenii Mikhailovich}, + booktitle={Doklady Akademii Nauk}, + volume={146}, + number={2}, + pages={263--266}, + year={1962}, + organization={Russian Academy of Sciences} +} + +@book{cowbook, + title={Database Management Systems}, + author={Raghu Ramakrishnan and Johannes Gehrke}, + edition={3}, + publisher={McGraw-Hill}, + year = {2003} +} + +@book{intro-analysis, + title={Introduction to Real Analysis}, + author={Christopher Heil}, + edition={1}, + publisher={Springer}, + year = {2019} +} + +@inproceedings{wavesofmisery, + author = {Nikolaus Glombiewski and + Bernhard Seeger and + Goetz Graefe}, + editor = {Torsten Grust and + Felix Naumann and + Alexander B{\"{o}}hm and + Wolfgang Lehner and + Theo H{\"{a}}rder and + Erhard Rahm and + Andreas Heuer and + Meike Klettke and + Holger Meyer}, + title = {Waves of Misery After Index Creation}, + booktitle = {Datenbanksysteme f{\"{u}}r Business, Technologie und Web {(BTW} + 2019), 18. Fachtagung des GI-Fachbereichs ,,Datenbanken und Informationssysteme" + (DBIS), 4.-8. M{\"{a}}rz 2019, Rostock, Germany, Proceedings}, + series = {{LNI}}, + volume = {{P-289}}, + pages = {77--96}, + publisher = {Gesellschaft f{\"{u}}r Informatik, Bonn}, + year = {2019}, + url = {https://doi.org/10.18420/btw2019-06}, + doi = {10.18420/BTW2019-06}, + timestamp = {Wed, 13 Jan 2021 11:37:30 +0100}, + biburl = {https://dblp.org/rec/conf/btw/GlombiewskiSG19.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{wavesofmisery-rtree, + author = {Lu Xing and + Eric Lee and + Tong An and + Bo{-}Cheng Chu and + Ahmed Mahmood and + Ahmed M. Aly and + Jianguo Wang and + Walid G. Aref}, + title = {An Experimental Evaluation and Investigation of Waves of Misery in + R-trees}, + journal = {Proc. {VLDB} Endow.}, + volume = {15}, + number = {3}, + pages = {478--490}, + year = {2021}, + url = {http://www.vldb.org/pvldb/vol15/p478-aref.pdf}, + doi = {10.14778/3494124.3494132}, + timestamp = {Sun, 12 Nov 2023 02:17:29 +0100}, + biburl = {https://dblp.org/rec/journals/pvldb/XingLACMAWA21.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{ubiq-btree, +author = {Comer, Douglas}, +title = {Ubiquitous B-Tree}, +year = {1979}, +issue_date = {June 1979}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {11}, +number = {2}, +issn = {0360-0300}, +url = {https://doi.org/10.1145/356770.356776}, +doi = {10.1145/356770.356776}, +journal = {ACM Comput. Surv.}, +month = {jun}, +pages = {121–137}, +numpages = {17} +} + +@article{rocksdb, +author = {Dong, Siying and Kryczka, Andrew and Jin, Yanqin and Stumm, Michael}, +title = {RocksDB: Evolution of Development Priorities in a Key-Value Store Serving Large-Scale Applications}, +year = {2021}, +issue_date = {November 2021}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {17}, +number = {4}, +issn = {1553-3077}, +url = {https://doi.org/10.1145/3483840}, +doi = {10.1145/3483840}, +abstract = {This article is an eight-year retrospective on development priorities for RocksDB, a key-value store developed at Facebook that targets large-scale distributed systems and that is optimized for Solid State Drives (SSDs). We describe how the priorities evolved over time as a result of hardware trends and extensive experiences running RocksDB at scale in production at a number of organizations: from optimizing write amplification, to space amplification, to CPU utilization. We describe lessons from running large-scale applications, including that resource allocation needs to be managed across different RocksDB instances, that data formats need to remain backward- and forward-compatible to allow incremental software rollouts, and that appropriate support for database replication and backups are needed. Lessons from failure handling taught us that data corruption errors needed to be detected earlier and that data integrity protection mechanisms are needed at every layer of the system. We describe improvements to the key-value interface. We describe a number of efforts that in retrospect proved to be misguided. Finally, we describe a number of open problems that could benefit from future research.}, +journal = {ACM Trans. Storage}, +month = {oct}, +articleno = {26}, +numpages = {32}, +keywords = {large-scale applications, RocksDB, SSD, compaction, databases, Key-value stores} +} + + +@techreport{ubiq-rtree, + title={R-trees have grown everywhere}, + author={Manolopoulos, Yannis and Nanopoulos, Alexandros and Papadopoulos, Apostolos N and Theodoridis, Yannis}, + year={2003}, + institution={Technical Report available at http://www. rtreeportal. org} +} + +@article{mergeable-summaries, + author = {Pankaj K. Agarwal and + Graham Cormode and + Zengfeng Huang and + Jeff M. Phillips and + Zhewei Wei and + Ke Yi}, + title = {Mergeable summaries}, + journal = {{ACM} Trans. Database Syst.}, + volume = {38}, + number = {4}, + pages = {26}, + year = {2013}, + url = {https://doi.org/10.1145/2500128}, + doi = {10.1145/2500128}, + timestamp = {Tue, 21 Mar 2023 21:14:49 +0100}, + biburl = {https://dblp.org/rec/journals/tods/AgarwalCHPWY13.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + + + +@inproceedings{countmin-sketch, + author = {Graham Cormode and + S. Muthukrishnan}, + editor = {Martin Farach{-}Colton}, + title = {An Improved Data Stream Summary: The Count-Min Sketch and Its Applications}, + booktitle = {{LATIN} 2004: Theoretical Informatics, 6th Latin American Symposium, + Buenos Aires, Argentina, April 5-8, 2004, Proceedings}, + series = {Lecture Notes in Computer Science}, + volume = {2976}, + pages = {29--38}, + publisher = {Springer}, + year = {2004}, + url = {https://doi.org/10.1007/978-3-540-24698-5\_7}, + doi = {10.1007/978-3-540-24698-5\_7}, + timestamp = {Fri, 07 May 2021 12:53:47 +0200}, + biburl = {https://dblp.org/rec/conf/latin/CormodeM04.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} + +@article{isam-overflow, +author = {Larson, Per-\r{A}ke}, +title = {Analysis of Index-Sequential Files with Overflow Chaining}, +year = {1981}, +issue_date = {Dec. 1981}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {6}, +number = {4}, +issn = {0362-5915}, +url = {https://doi.org/10.1145/319628.319665}, +doi = {10.1145/319628.319665}, +abstract = {The gradual performance deterioration caused by deletions from and insertions into an index-sequential file after loading is analyzed. The model developed assumes that overflow records are handled by chaining. Formulas for computing the expected number of overflow records and the expected number of additional accesses caused by the overflow records for both successful and unsuccessful searches are derived.}, +journal = {ACM Trans. Database Syst.}, +month = {dec}, +pages = {671–680}, +numpages = {10}, +keywords = {analytic model, analysis of algorithms, overflow chaining, performance analysis, ISAM, overflow, overflow handling, indexed sequential access method, index sequential files, file organization, file structure} +} -- cgit v1.2.3