From eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Sat, 6 Jun 2026 12:02:41 -0400 Subject: Initial implementation (only a few years later!) This is pure Claude. I'd written out the plan for this suite of scripts eons ago, but never found the time to actual do it. Remembered it this morning, pointed Claude at the README, and had something that appears to work in minutes. caveat emptor: the design is mine, but the code is purely LLM generated at this point. --- tests/integration.sh | 123 +++++++++++++++++++++++++++++++++ tests/run-tests.sh | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100755 tests/integration.sh create mode 100755 tests/run-tests.sh (limited to 'tests') diff --git a/tests/integration.sh b/tests/integration.sh new file mode 100755 index 0000000..ea847e4 --- /dev/null +++ b/tests/integration.sh @@ -0,0 +1,123 @@ +#!/bin/sh +# integration.sh - end-to-end test against a real LaTeX document +# +# Requires pdflatex and bibtex; skipped otherwise. Set BIBTEST_NET=1 to +# also exercise bib-fetch against doi.org (needs network access). + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +PATH=$ROOT:$PATH +LSKEYS="awk -f $ROOT/lib/bib-parse.awk -f $ROOT/lib/bib-lskeys.awk" + +command -v pdflatex > /dev/null 2>&1 && command -v bibtex > /dev/null 2>&1 || { + printf 'integration: pdflatex/bibtex not found, skipping\n' >&2 + exit 0 +} + +tmpd=$(mktemp -d) || exit 1 +trap 'rm -rf "$tmpd"' EXIT INT TERM +cd "$tmpd" || exit 1 + +pass=0 +fail=0 +ok() { pass=$((pass + 1)); printf 'ok - %s\n' "$1"; } +not_ok() { fail=$((fail + 1)); printf 'FAIL - %s\n' "$1"; } + +# ---- build a database with bib-gen | bib-add --------------------------- +bib-gen -t article author='Donald E. Knuth' title='Literate Programming' \ + journal='The Computer Journal' year=1984 volume=27 number=2 \ + pages='97--111' | bib-add master.bib +bib-gen -t article author='Alan M. Turing' \ + title='Computing Machinery and Intelligence' journal='Mind' year=1950 \ + volume=59 pages='433--460' | bib-add master.bib +printf 'Claude E. Shannon\tA Mathematical Theory of Communication\tBell System Technical Journal\t1948 +Edsger W. Dijkstra\tGo To Statement Considered Harmful\tCommunications of the ACM\t1968 +' | bib-gen -F author,title,journal,year | bib-add master.bib + +n=$($LSKEYS master.bib | wc -l) +[ "$n" -eq 4 ] && ok "database built with 4 entries" \ + || not_ok "database built with 4 entries (got $n)" + +# ---- compile a document citing a subset -------------------------------- +cat > paper.tex <<'EOF' +\documentclass{article} +\begin{document} +Machines may think~\cite{turing1950computing}; programs are +literature~\cite{knuth1984literate}. + +DOI: 10.1093/comjnl/27.2.97 +\bibliographystyle{plain} +\bibliography{master} +\end{document} +EOF +pdflatex -interaction=batchmode paper.tex > /dev/null 2>&1 + +grep -q 'citation{turing1950computing}' paper.aux \ + && ok "pdflatex produced citations in aux" \ + || not_ok "pdflatex produced citations in aux" + +# ---- extract the cited subset and build against it --------------------- +bib-extract paper.aux master.bib > paper.bib +n=$($LSKEYS paper.bib | wc -l) +[ "$n" -eq 2 ] && ok "bib-extract kept the 2 cited entries" \ + || not_ok "bib-extract kept the 2 cited entries (got $n)" + +sed 's/\\bibdata{master}/\\bibdata{paper}/' paper.aux > tmp.aux \ + && mv tmp.aux paper.aux +bibtex paper > bibtex.log 2>&1 +grep -qi 'error\|warning' bibtex.log \ + && not_ok "bibtex accepts canonical output cleanly" \ + || ok "bibtex accepts canonical output cleanly" + +pdflatex -interaction=batchmode paper.tex > /dev/null 2>&1 +pdflatex -interaction=batchmode paper.tex > /dev/null 2>&1 +if grep -qi 'undefined' paper.log; then + not_ok "document resolves all citations" +else + ok "document resolves all citations" +fi +[ -s paper.pdf ] && ok "pdf produced" || not_ok "pdf produced" + +# ---- convert roundtrip -------------------------------------------------- +bib-convert master.bib | bib-convert > roundtrip.bib +if [ "$($LSKEYS master.bib | sort)" = "$($LSKEYS roundtrip.bib | sort)" ]; then + ok "bibtex -> refer -> bibtex preserves all keys" +else + not_ok "bibtex -> refer -> bibtex preserves all keys" +fi + +# ---- bib-fetch against the built pdf (network) -------------------------- +if [ "$BIBTEST_NET" = 1 ]; then + if bib-fetch paper.pdf > fetched.bib 2> /dev/null; then + grep -q '^@article{knuth1984literate,' fetched.bib \ + && ok "bib-fetch resolves DOI from built pdf" \ + || not_ok "bib-fetch resolves DOI from built pdf" + if bib-fetch paper.pdf 2> /dev/null | bib-add master.bib 2> /dev/null; then + not_ok "fetched entry detected as duplicate" + else + ok "fetched entry detected as duplicate" + fi + else + not_ok "bib-fetch resolves DOI from built pdf" + fi + bib-fetch -a 1706.03762 2> /dev/null \ + | grep -q '^@misc{vaswani[0-9]*attention,' \ + && ok "bib-fetch resolves arXiv id" \ + || not_ok "bib-fetch resolves arXiv id" + cat > arx.tex <<'EOF' +\documentclass{article} +\begin{document} +A preprint without any DOI. + +arXiv:1706.03762v7 [cs.CL] 2 Aug 2023 +\end{document} +EOF + pdflatex -interaction=batchmode arx.tex > /dev/null 2>&1 + bib-fetch arx.pdf 2> /dev/null | grep -q 'eprint = {1706.03762}' \ + && ok "bib-fetch extracts arXiv id from pdf" \ + || not_ok "bib-fetch extracts arXiv id from pdf" +else + printf 'skip - bib-fetch network tests (set BIBTEST_NET=1 to enable)\n' +fi + +printf '\n%d passed, %d failed\n' "$pass" "$fail" +[ "$fail" -eq 0 ] diff --git a/tests/run-tests.sh b/tests/run-tests.sh new file mode 100755 index 0000000..70721db --- /dev/null +++ b/tests/run-tests.sh @@ -0,0 +1,187 @@ +#!/bin/sh +# run-tests.sh - test suite for bibutils + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +PATH=$ROOT:$PATH +tmpd=$(mktemp -d) || exit 1 +trap 'rm -rf "$tmpd"' EXIT INT TERM + +pass=0 +fail=0 + +ok() { + pass=$((pass + 1)) + printf 'ok - %s\n' "$1" +} + +not_ok() { + fail=$((fail + 1)) + printf 'FAIL - %s\n' "$1" +} + +# check description command... (passes if the command succeeds) +check() { + desc=$1 + shift + if "$@" > /dev/null 2>&1; then + ok "$desc" + else + not_ok "$desc" + fi +} + +entry='@ARTICLE{ junk-key , + AUTHOR = "Donald E. Knuth", + Title={Literate Programming}, + JOURNAL = {The Computer Journal}, + Year = 1984, volume={27}, + pages = {97--111} +}' + +# ---- bib-key ---------------------------------------------------------- +out=$(printf '%s\n' "$entry" | bib-key) +check "bib-key generates surname-year-word key" \ + sh -c "printf '%s' '$out' | grep -q '^@article{knuth1984literate,'" + +# key collisions get letter suffixes +out=$(printf '@inproceedings{a, author={J. Smith}, title={Fast Trees}, year=2020} +@article{b, author={J. Smith}, title={Fast Trees Extended}, year=2020} +@article{c, author={J. Smith}, title={Fast Tree Methods}, year=2020}\n' | bib-key) +check "bib-key disambiguates colliding keys" \ + sh -c "printf '%s' '$out' | grep -q '{smith2020fast,' && + printf '%s' '$out' | grep -q '{smith2020fastb,' && + printf '%s' '$out' | grep -q '{smith2020fastc,'" + +# ---- canonicalization via bib-add ------------------------------------- +db=$tmpd/refs.bib +printf '%s\n' "$entry" | bib-add "$db" +check "bib-add creates database" test -s "$db" +check "bib-add lowercases field names" grep -q ' author = {Donald E. Knuth},' "$db" +check "bib-add collapses whitespace in values" \ + grep -q ' title = {Literate Programming},' "$db" +check "bib-add keeps bare numbers bare" grep -q ' year = 1984,' "$db" + +# duplicate detection +if printf '%s\n' "$entry" | bib-add "$db" 2> /dev/null; then + not_ok "bib-add rejects duplicate key" +else + ok "bib-add rejects duplicate key" +fi + +# forced replacement +printf '%s\n' "$entry" | sed 's/1984/1985/' | bib-add -f "$db" +check "bib-add -f replaces entry" grep -q ' year = 1985,' "$db" +n=$(grep -c '^@article{junk-key,' "$db") +[ "$n" = 1 ] && ok "bib-add -f leaves one copy" || not_ok "bib-add -f leaves one copy" + +# ---- bib-extract ------------------------------------------------------- +cat > "$tmpd/all.bib" <<'EOF' +@article{alpha2020one, author = {A. Alpha}, title = {One}, year = 2020} +@article{beta2021two, author = {B. Beta}, title = {Two}, year = 2021} +@article{gamma2022three, author = {C. Gamma}, title = {Three}, year = 2022} +EOF +cat > "$tmpd/doc.aux" <<'EOF' +\relax +\citation{alpha2020one} +\citation{gamma2022three,alpha2020one} +\bibstyle{plain} +EOF +out=$(bib-extract "$tmpd/doc.aux" "$tmpd/all.bib") +check "bib-extract keeps cited entries" \ + sh -c "printf '%s' '$out' | grep -q alpha2020one" +check "bib-extract keeps all cited entries" \ + sh -c "printf '%s' '$out' | grep -q gamma2022three" +if printf '%s' "$out" | grep -q beta2021two; then + not_ok "bib-extract drops uncited entries" +else + ok "bib-extract drops uncited entries" +fi + +# ---- bib-convert ------------------------------------------------------- +out=$(printf '%s\n' "$entry" | bib-convert) +check "bib-convert emits refer author" \ + sh -c "printf '%s' '$out' | grep -q '^%A Donald E. Knuth$'" +check "bib-convert emits refer pages with single dash" \ + sh -c "printf '%s' '$out' | grep -q '^%P 97-111$'" + +cat > "$tmpd/rec.ref" <<'EOF' +%A Alan M. Turing +%T Computing Machinery and Intelligence +%J Mind +%D 1950 +%V 59 +%P 433-460 +EOF +out=$(bib-convert "$tmpd/rec.ref") +check "bib-convert refer->bibtex type guess" \ + sh -c "printf '%s' '$out' | grep -q '^@article{turing1950computing,'" +check "bib-convert refer->bibtex pages" \ + sh -c "printf '%s' '$out' | grep -q ' pages = {433--460},'" + +# ---- bib-gen ----------------------------------------------------------- +out=$(bib-gen -t book author='Xavier Yu' title='Some Title' year=2001 publisher='Pub') +check "bib-gen argument mode" \ + sh -c "printf '%s' '$out' | grep -q '^@book{yu2001some,'" + +out=$(printf 'A. Author\tNeat Paper\tGood Journal\t1999\n' \ + | bib-gen -F author,title,journal,year) +check "bib-gen batch mode" \ + sh -c "printf '%s' '$out' | grep -q '^@article{author1999neat,'" + +# ---- bib-ls ------------------------------------------------------------ +out=$(bib-ls "$tmpd/all.bib") +check "bib-ls lists keys" \ + sh -c "[ \"\$(printf '%s\n' '$out' | wc -l)\" = 3 ]" +out=$(bib-ls -l "$tmpd/all.bib") +check "bib-ls -l shows details" \ + sh -c "printf '%s' '$out' | grep -q 'beta2021two article B. Beta 2021 Two'" + +# ---- bib-check --------------------------------------------------------- +cat > "$tmpd/bad.bib" <<'EOF' +@article{good2020fine, author = {A. Good}, title = {Fine}, journal = {J}, year = 2020} +@article{noj2020sad, author = {B. Sad}, title = {No Journal Here}, year = 2020} +@misc{noj2020sad, title = {Dup Key}} +@book{dup2021title, author = {C. Dup}, title = {FINE!}, publisher = {P}, year = 2021} +EOF +out=$(bib-check "$tmpd/bad.bib") +if [ $? -ne 0 ]; then ok "bib-check exits nonzero on problems"; else not_ok "bib-check exits nonzero on problems"; fi +check "bib-check finds missing field" \ + sh -c "printf '%s' '$out' | grep -q 'noj2020sad: missing required field: journal'" +check "bib-check finds duplicate key" \ + sh -c "printf '%s' '$out' | grep -q 'noj2020sad: duplicate key'" +check "bib-check finds duplicate title" \ + sh -c "printf '%s' '$out' | grep -q 'dup2021title: title duplicates good2020fine'" +cat > "$tmpd/clean.bib" <<'EOF' +@article{a2020x, author = {A. A}, title = {X}, journal = {J}, year = 2020} +@misc{b2021y, title = {Y}} +EOF +check "bib-check passes a clean db" bib-check "$tmpd/clean.bib" + +# ---- biblatex aux ------------------------------------------------------ +cat > "$tmpd/bl.aux" <<'EOF' +\abx@aux@refcontext{nty/global//global/global} +\abx@aux@cite{0}{beta2021two} +EOF +out=$(bib-extract "$tmpd/bl.aux" "$tmpd/all.bib") +check "bib-extract reads biblatex aux" \ + sh -c "printf '%s' '$out' | grep -q beta2021two" + +# ---- bib-util ---------------------------------------------------------- +out=$(printf '%s\n' "$entry" | bib-util key) +check "bib-util dispatches" \ + sh -c "printf '%s' '$out' | grep -q knuth1984literate" + +# ---- @string passthrough ----------------------------------------------- +cat > "$tmpd/str.bib" <<'EOF' +@string{cj = {The Computer Journal}} +@article{knuth1984literate, author = {D. Knuth}, journal = cj, year = 1984} +EOF +out=$(printf '\\citation{knuth1984literate}\n' > "$tmpd/s.aux"; \ + bib-extract "$tmpd/s.aux" "$tmpd/str.bib") +check "bib-extract passes @string through" \ + sh -c "printf '%s' '$out' | grep -q '@string{cj'" +check "macro field stays raw" \ + sh -c "printf '%s' '$out' | grep -q ' journal = cj,'" + +printf '\n%d passed, %d failed\n' "$pass" "$fail" +[ "$fail" -eq 0 ] -- cgit v1.2.3