aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rwxr-xr-xtests/fuzz.sh229
-rwxr-xr-xtests/run-tests.sh57
2 files changed, 286 insertions, 0 deletions
diff --git a/tests/fuzz.sh b/tests/fuzz.sh
new file mode 100755
index 0000000..4145d42
--- /dev/null
+++ b/tests/fuzz.sh
@@ -0,0 +1,229 @@
+#!/bin/sh
+# fuzz.sh - throw bogus input at the tools and watch for misbehavior
+#
+# usage: tests/fuzz.sh [iterations] [seed-offset]
+# (default 100 per generator; the offset lets parallel runs
+# explore different deterministic mutation seeds)
+#
+# Four generators feed every entry-consuming tool:
+# random - raw bytes from /dev/urandom
+# mutated - a valid bibtex file with random structural damage
+# (deleted/duplicated/inserted braces, quotes, @, #, =)
+# soup - random streams of bibtex syntax tokens
+# format - malformed aux files for bib-extract and refer records
+# for bib-convert
+#
+# A case fails if a tool hangs (5s timeout), dies with an awk runtime
+# error, exits above 2, or breaks the canonicalization fixed-point
+# property (canon(canon(x)) must equal canon(x)).
+#
+# The mutated and soup inputs are additionally fired at bib-add
+# against a known database, which must afterwards still parse and
+# still contain every original entry (the survival invariant).
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+PATH=$ROOT:$PATH
+
+# byte semantics: random bytes are rarely valid UTF-8, and gawk's
+# locale warnings about that are not parser failures
+LC_ALL=C
+export LC_ALL
+N=${1:-100}
+OFF=${2:-0}
+tmpd=$(mktemp -d) || exit 1
+trap 'rm -rf "$tmpd"' EXIT INT TERM
+
+fails=0
+cases=0
+
+# canonicalizing filter (used for the fixed-point property)
+canon() {
+ awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-canon.awk" \
+ -f "$ROOT/lib/bib-select.awk" -v keys= -v invert=1
+}
+
+# run one tool on one input file; report any sign of misbehavior
+probe() { # probe <case-name> <input> <cmd...>
+ name=$1
+ input=$2
+ shift 2
+ cases=$((cases + 1))
+ timeout 5 "$@" < "$input" > "$tmpd/out" 2> "$tmpd/err"
+ rc=$?
+ if [ "$rc" -eq 124 ]; then
+ fails=$((fails + 1))
+ printf 'HANG %s: %s\n' "$name" "$*"
+ cp "$input" "$tmpd/hang.$fails"
+ elif [ "$rc" -gt 2 ]; then
+ fails=$((fails + 1))
+ printf 'CRASH %s: %s (exit %d)\n' "$name" "$*" "$rc"
+ cp "$input" "$tmpd/crash.$fails"
+ elif grep -Eq 'awk:.*(fatal|error)|[Ss]egmentation' "$tmpd/err"; then
+ fails=$((fails + 1))
+ printf 'AWKERR %s: %s: %s\n' "$name" "$*" "$(head -1 "$tmpd/err")"
+ cp "$input" "$tmpd/awkerr.$fails"
+ fi
+}
+
+# the canonicalization of any input must be a fixed point
+probe_fixedpoint() { # probe_fixedpoint <case-name> <input>
+ cases=$((cases + 1))
+ timeout 5 canon < "$2" > "$tmpd/c1" 2> /dev/null
+ timeout 5 canon < "$tmpd/c1" > "$tmpd/c2" 2> /dev/null
+ if ! cmp -s "$tmpd/c1" "$tmpd/c2"; then
+ fails=$((fails + 1))
+ printf 'NOTFIX %s: canon not idempotent\n' "$1"
+ cp "$2" "$tmpd/notfix.$fails"
+ fi
+}
+
+seed_bib() {
+ cat <<'EOF'
+@string{cj = {The Computer Journal}}
+@article{knuth1984literate,
+ author = {Donald E. Knuth},
+ title = {Literate {P}rogramming},
+ journal = cj,
+ year = 1984,
+ pages = "97--111",
+ note = "vol. " # 27,
+}
+@inproceedings{lamport1978time,
+ author = {Leslie Lamport},
+ title = {Time, Clocks, and the Ordering of Events},
+ booktitle = {Communications of the ACM},
+ year = {1978},
+}
+EOF
+}
+
+# damage a file at a random spot: delete, duplicate, or insert a
+# structural character (awk does the randomness; seeded per case)
+mutate() { # mutate <seed> < in > out
+ awk -v seed="$1" '
+ BEGIN { srand(seed) }
+ { buf = buf $0 "\n" }
+ END {
+ n = length(buf)
+ chars = "{}\"@#=,()\\%"
+ for (m = 0; m < 1 + int(rand() * 8); m++) {
+ pos = 1 + int(rand() * n)
+ op = int(rand() * 3)
+ c = substr(chars, 1 + int(rand() * length(chars)), 1)
+ if (op == 0) # delete a character
+ buf = substr(buf, 1, pos - 1) substr(buf, pos + 1)
+ else if (op == 1) # insert a structural character
+ buf = substr(buf, 1, pos - 1) c substr(buf, pos)
+ else # duplicate a slice
+ buf = substr(buf, 1, pos) substr(buf, pos, 1 + int(rand() * 20)) substr(buf, pos)
+ n = length(buf)
+ }
+ printf "%s", buf
+ }'
+}
+
+# a stream of plausible bibtex syntax fragments in random order
+soup() { # soup <seed> > out
+ awk -v seed="$1" '
+ BEGIN {
+ srand(seed)
+ n = 0
+ T[++n] = "@"; T[++n] = "{"; T[++n] = "}"; T[++n] = "\""
+ T[++n] = "#"; T[++n] = "="; T[++n] = ","; T[++n] = "("
+ T[++n] = ")"; T[++n] = "%"; T[++n] = "\\"; T[++n] = " "
+ T[++n] = "\n"; T[++n] = "word"; T[++n] = "1984"
+ T[++n] = "@article{k,"; T[++n] = "t = {v}"; T[++n] = "@string"
+ T[++n] = "@comment"; T[++n] = " and "; T[++n] = "--"
+ len = 200 + int(rand() * 800)
+ for (i = 0; i < len; i++)
+ printf "%s", T[1 + int(rand() * n)]
+ }'
+}
+
+run_entry_tools() { # run_entry_tools <case-name> <input>
+ probe "$1" "$2" bib-key
+ probe "$1" "$2" bib-ls -l
+ probe "$1" "$2" bib-check
+ probe "$1" "$2" bib-convert -r
+ probe "$1" "$2" bib-add "$tmpd/scratch.bib"
+ rm -f "$tmpd/scratch.bib" "$tmpd/scratch.bib.bak"
+ probe_fixedpoint "$1" "$2"
+}
+
+# fire input at bib-add (with and without -f) against a known database;
+# afterwards the database must still parse and still contain every
+# original entry
+probe_survival() { # probe_survival <case-name> <input>
+ cases=$((cases + 1))
+ cat > "$tmpd/inv.bib" <<'EOF'
+@string{js = {Journal of Survival}}
+@article{orig1990one, author = {A. Original}, title = {One}, year = 1990}
+@article{orig1991two, author = {B. Original}, title = {Two}, journal = js, year = 1991}
+@misc{orig1992three, title = {Three}, note = "v. " # 3}
+EOF
+ timeout 5 bib-add "$tmpd/inv.bib" < "$2" > /dev/null 2>&1
+ timeout 5 bib-add -f "$tmpd/inv.bib" < "$2" > /dev/null 2>&1
+ if ! awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-lskeys.awk" \
+ "$tmpd/inv.bib" > "$tmpd/invkeys" 2> /dev/null \
+ || ! grep -q '^orig1990one$' "$tmpd/invkeys" \
+ || ! grep -q '^orig1991two$' "$tmpd/invkeys" \
+ || ! grep -q '^orig1992three$' "$tmpd/invkeys"; then
+ fails=$((fails + 1))
+ printf 'WRECK %s: database lost entries or no longer parses\n' "$1"
+ cp "$2" "$tmpd/wreck.$fails"
+ fi
+ rm -f "$tmpd/inv.bib.bak"
+}
+
+echo "=== random bytes (x$N) ==="
+i=0
+while [ "$i" -lt "$N" ]; do
+ i=$((i + 1))
+ head -c 512 /dev/urandom > "$tmpd/in"
+ run_entry_tools "random/$i" "$tmpd/in"
+done
+
+echo "=== mutated bibtex (x$N) ==="
+seed_bib > "$tmpd/seed"
+i=0
+while [ "$i" -lt "$N" ]; do
+ i=$((i + 1))
+ mutate "$((i + OFF))" < "$tmpd/seed" > "$tmpd/in"
+ run_entry_tools "mutated/$i" "$tmpd/in"
+ probe_survival "mutated/$i" "$tmpd/in"
+done
+
+echo "=== syntax soup (x$N) ==="
+i=0
+while [ "$i" -lt "$N" ]; do
+ i=$((i + 1))
+ soup "$((i + OFF))" > "$tmpd/in"
+ run_entry_tools "soup/$i" "$tmpd/in"
+ probe_survival "soup/$i" "$tmpd/in"
+done
+
+echo "=== malformed aux and refer (x$N) ==="
+printf '@article{k, author={A}, title={T}, year=1}\n' > "$tmpd/db.bib"
+i=0
+while [ "$i" -lt "$N" ]; do
+ i=$((i + 1))
+ printf '\\citation{k}\n\\citation{a,b,c}\n\\abx@aux@cite{0}{k}\n%%A Some One\n%%T Title\n' \
+ | mutate "$((i + OFF))" > "$tmpd/in"
+ cases=$((cases + 1))
+ if ! timeout 5 bib-extract "$tmpd/in" "$tmpd/db.bib" > /dev/null 2> "$tmpd/err"; then
+ rc=$?
+ if [ "$rc" -gt 2 ]; then
+ fails=$((fails + 1))
+ printf 'CRASH aux/%d: bib-extract (exit %d)\n' "$i" "$rc"
+ fi
+ fi
+ probe "ref/$i" "$tmpd/in" bib-convert -b
+done
+
+printf '\n%d cases, %d failures' "$cases" "$fails"
+if [ "$fails" -gt 0 ]; then
+ printf ' (failing inputs preserved in %s)\n' "$tmpd"
+ trap - EXIT
+ exit 1
+fi
+printf '\n'
diff --git a/tests/run-tests.sh b/tests/run-tests.sh
index 8a9f49a..653f838 100755
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -74,6 +74,63 @@ check "bib-add -f replaces entry" grep -q ' year = 1985,' "$db"
n=$(grep -c '^@article{junk-key,' "$db")
[ "$n" = 1 ] && ok "bib-add -f leaves one copy" || not_ok "bib-add -f leaves one copy"
+# ---- bib-add hardening --------------------------------------------------
+check "bib-add writes a backup on modify" \
+ sh -c "cmp -s '$db.bak' /dev/null; [ -s '$db.bak' ]"
+
+# replacement must not disturb other bytes (comments, formatting)
+cat > "$tmpd/pres.bib" <<'EOF'
+% Encoding: UTF-8
+% hand-maintained; do not reformat
+
+@ARTICLE{ keep , AUTHOR = "Stays Verbatim", YEAR = 1111 }
+
+@article{swap2000old, author = {Old One}, title = {Swap}, year = 2000}
+EOF
+printf '@article{swap2000old, author = {New One}, title = {Swap}, year = 2000}\n' \
+ | bib-add -f "$tmpd/pres.bib"
+check "bib-add -f preserves comments" grep -q '^% Encoding: UTF-8$' "$tmpd/pres.bib"
+check "bib-add -f preserves untouched entries verbatim" \
+ grep -q 'AUTHOR = "Stays Verbatim"' "$tmpd/pres.bib"
+check "bib-add -f swapped the entry" grep -q '{New One}' "$tmpd/pres.bib"
+n=$(grep -c 'swap2000old' "$tmpd/pres.bib")
+[ "$n" = 1 ] && ok "bib-add -f removed the old version" \
+ || not_ok "bib-add -f removed the old version"
+
+# bogus input must never modify the database
+cp "$db" "$tmpd/before"
+printf '@article{, author = {No Key}, year = 1}\n' | bib-add "$db" 2> /dev/null \
+ && not_ok "bib-add rejects empty keys" || ok "bib-add rejects empty keys"
+printf '@misc{same2, title={A}}\n@misc{same2, title={B}}\n' \
+ | bib-add "$db" 2> /dev/null \
+ && not_ok "bib-add rejects dup keys within input" \
+ || ok "bib-add rejects dup keys within input"
+check "database untouched after rejected input" cmp -s "$db" "$tmpd/before"
+
+# concurrent writers serialize; no entries lost, lock released
+i=0
+while [ "$i" -lt 10 ]; do
+ i=$((i + 1))
+ printf '@misc{lock%d, title = {L %d}}\n' "$i" "$i" \
+ | bib-add "$tmpd/lock.bib" 2> /dev/null &
+done
+wait
+n=$(bib-ls "$tmpd/lock.bib" | wc -l)
+[ "$n" -eq 10 ] && ok "concurrent bib-add loses no entries" \
+ || not_ok "concurrent bib-add loses no entries (got $n)"
+[ -e "$tmpd/lock.bib.lock" ] && not_ok "lock released after use" \
+ || ok "lock released after use"
+
+# a stale lock from a dead process is reaped
+echo 999999 > "$tmpd/lock.bib.lock"
+printf '@misc{lock11, title = {L 11}}\n' | bib-add "$tmpd/lock.bib" 2> /dev/null
+check "stale lock reaped" grep -q 'lock11' "$tmpd/lock.bib"
+
+mkdir "$tmpd/adir"
+printf '@misc{k, title={T}}\n' | bib-add "$tmpd/adir" 2> /dev/null \
+ && not_ok "bib-add refuses non-regular files" \
+ || ok "bib-add refuses non-regular files"
+
# ---- bib-extract -------------------------------------------------------
cat > "$tmpd/all.bib" <<'EOF'
@article{alpha2020one, author = {A. Alpha}, title = {One}, year = 2020}