#!/bin/sh # fuzz.sh - throw bogus input at the tools and watch for misbehavior # # usage: tests/fuzz.sh [iterations] [seed-offset] # (default 100 per generator; the offset lets parallel runs # explore different deterministic mutation seeds) # # Four generators feed every entry-consuming tool: # random - raw bytes from /dev/urandom # mutated - a valid bibtex file with random structural damage # (deleted/duplicated/inserted braces, quotes, @, #, =) # soup - random streams of bibtex syntax tokens # format - malformed aux files for bib-extract and refer records # for bib-convert # # A case fails if a tool hangs (5s timeout), dies with an awk runtime # error, exits above 2, or breaks the canonicalization fixed-point # property (canon(canon(x)) must equal canon(x)). # # The mutated and soup inputs are additionally fired at bib-add # against a known database, which must afterwards still parse and # still contain every original entry (the survival invariant). ROOT=$(cd "$(dirname "$0")/.." && pwd) PATH=$ROOT:$PATH # byte semantics: random bytes are rarely valid UTF-8, and gawk's # locale warnings about that are not parser failures LC_ALL=C export LC_ALL N=${1:-100} OFF=${2:-0} tmpd=$(mktemp -d) || exit 1 trap 'rm -rf "$tmpd"' EXIT INT TERM fails=0 cases=0 # canonicalizing filter (used for the fixed-point property) canon() { awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-canon.awk" \ -f "$ROOT/lib/bib-select.awk" -v keys= -v invert=1 } # run one tool on one input file; report any sign of misbehavior probe() { # probe name=$1 input=$2 shift 2 cases=$((cases + 1)) timeout 5 "$@" < "$input" > "$tmpd/out" 2> "$tmpd/err" rc=$? if [ "$rc" -eq 124 ]; then fails=$((fails + 1)) printf 'HANG %s: %s\n' "$name" "$*" cp "$input" "$tmpd/hang.$fails" elif [ "$rc" -gt 2 ]; then fails=$((fails + 1)) printf 'CRASH %s: %s (exit %d)\n' "$name" "$*" "$rc" cp "$input" "$tmpd/crash.$fails" elif grep -Eq 'awk:.*(fatal|error)|[Ss]egmentation' "$tmpd/err"; then fails=$((fails + 1)) printf 'AWKERR %s: %s: %s\n' "$name" "$*" "$(head -1 "$tmpd/err")" cp "$input" "$tmpd/awkerr.$fails" fi } # the canonicalization of any input must be a fixed point probe_fixedpoint() { # probe_fixedpoint cases=$((cases + 1)) timeout 5 canon < "$2" > "$tmpd/c1" 2> /dev/null timeout 5 canon < "$tmpd/c1" > "$tmpd/c2" 2> /dev/null if ! cmp -s "$tmpd/c1" "$tmpd/c2"; then fails=$((fails + 1)) printf 'NOTFIX %s: canon not idempotent\n' "$1" cp "$2" "$tmpd/notfix.$fails" fi } seed_bib() { cat <<'EOF' @string{cj = {The Computer Journal}} @article{knuth1984literate, author = {Donald E. Knuth}, title = {Literate {P}rogramming}, journal = cj, year = 1984, pages = "97--111", note = "vol. " # 27, } @inproceedings{lamport1978time, author = {Leslie Lamport}, title = {Time, Clocks, and the Ordering of Events}, booktitle = {Communications of the ACM}, year = {1978}, } EOF } # damage a file at a random spot: delete, duplicate, or insert a # structural character (awk does the randomness; seeded per case) mutate() { # mutate < in > out awk -v seed="$1" ' BEGIN { srand(seed) } { buf = buf $0 "\n" } END { n = length(buf) chars = "{}\"@#=,()\\%" for (m = 0; m < 1 + int(rand() * 8); m++) { pos = 1 + int(rand() * n) op = int(rand() * 3) c = substr(chars, 1 + int(rand() * length(chars)), 1) if (op == 0) # delete a character buf = substr(buf, 1, pos - 1) substr(buf, pos + 1) else if (op == 1) # insert a structural character buf = substr(buf, 1, pos - 1) c substr(buf, pos) else # duplicate a slice buf = substr(buf, 1, pos) substr(buf, pos, 1 + int(rand() * 20)) substr(buf, pos) n = length(buf) } printf "%s", buf }' } # a stream of plausible bibtex syntax fragments in random order soup() { # soup > out awk -v seed="$1" ' BEGIN { srand(seed) n = 0 T[++n] = "@"; T[++n] = "{"; T[++n] = "}"; T[++n] = "\"" T[++n] = "#"; T[++n] = "="; T[++n] = ","; T[++n] = "(" T[++n] = ")"; T[++n] = "%"; T[++n] = "\\"; T[++n] = " " T[++n] = "\n"; T[++n] = "word"; T[++n] = "1984" T[++n] = "@article{k,"; T[++n] = "t = {v}"; T[++n] = "@string" T[++n] = "@comment"; T[++n] = " and "; T[++n] = "--" len = 200 + int(rand() * 800) for (i = 0; i < len; i++) printf "%s", T[1 + int(rand() * n)] }' } run_entry_tools() { # run_entry_tools probe "$1" "$2" bib-key probe "$1" "$2" bib-ls -l probe "$1" "$2" bib-check probe "$1" "$2" bib-convert -r probe "$1" "$2" bib-add "$tmpd/scratch.bib" rm -f "$tmpd/scratch.bib" "$tmpd/scratch.bib.bak" probe_fixedpoint "$1" "$2" } # fire input at bib-add (with and without -f) against a known database; # afterwards the database must still parse and still contain every # original entry probe_survival() { # probe_survival cases=$((cases + 1)) cat > "$tmpd/inv.bib" <<'EOF' @string{js = {Journal of Survival}} @article{orig1990one, author = {A. Original}, title = {One}, year = 1990} @article{orig1991two, author = {B. Original}, title = {Two}, journal = js, year = 1991} @misc{orig1992three, title = {Three}, note = "v. " # 3} EOF timeout 5 bib-add "$tmpd/inv.bib" < "$2" > /dev/null 2>&1 timeout 5 bib-add -f "$tmpd/inv.bib" < "$2" > /dev/null 2>&1 if ! awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-lskeys.awk" \ "$tmpd/inv.bib" > "$tmpd/invkeys" 2> /dev/null \ || ! grep -q '^orig1990one$' "$tmpd/invkeys" \ || ! grep -q '^orig1991two$' "$tmpd/invkeys" \ || ! grep -q '^orig1992three$' "$tmpd/invkeys"; then fails=$((fails + 1)) printf 'WRECK %s: database lost entries or no longer parses\n' "$1" cp "$2" "$tmpd/wreck.$fails" fi rm -f "$tmpd/inv.bib.bak" } echo "=== random bytes (x$N) ===" i=0 while [ "$i" -lt "$N" ]; do i=$((i + 1)) head -c 512 /dev/urandom > "$tmpd/in" run_entry_tools "random/$i" "$tmpd/in" done echo "=== mutated bibtex (x$N) ===" seed_bib > "$tmpd/seed" i=0 while [ "$i" -lt "$N" ]; do i=$((i + 1)) mutate "$((i + OFF))" < "$tmpd/seed" > "$tmpd/in" run_entry_tools "mutated/$i" "$tmpd/in" probe_survival "mutated/$i" "$tmpd/in" done echo "=== syntax soup (x$N) ===" i=0 while [ "$i" -lt "$N" ]; do i=$((i + 1)) soup "$((i + OFF))" > "$tmpd/in" run_entry_tools "soup/$i" "$tmpd/in" probe_survival "soup/$i" "$tmpd/in" done echo "=== malformed aux and refer (x$N) ===" printf '@article{k, author={A}, title={T}, year=1}\n' > "$tmpd/db.bib" i=0 while [ "$i" -lt "$N" ]; do i=$((i + 1)) printf '\\citation{k}\n\\citation{a,b,c}\n\\abx@aux@cite{0}{k}\n%%A Some One\n%%T Title\n' \ | mutate "$((i + OFF))" > "$tmpd/in" cases=$((cases + 1)) if ! timeout 5 bib-extract "$tmpd/in" "$tmpd/db.bib" > /dev/null 2> "$tmpd/err"; then rc=$? if [ "$rc" -gt 2 ]; then fails=$((fails + 1)) printf 'CRASH aux/%d: bib-extract (exit %d)\n' "$i" "$rc" fi fi probe "ref/$i" "$tmpd/in" bib-convert -b done printf '\n%d cases, %d failures' "$cases" "$fails" if [ "$fails" -gt 0 ]; then printf ' (failing inputs preserved in %s)\n' "$tmpd" trap - EXIT exit 1 fi printf '\n'