diff options
| -rw-r--r-- | Makefile | 9 | ||||
| -rw-r--r-- | README.md | 8 | ||||
| -rwxr-xr-x | bib-add | 121 | ||||
| -rwxr-xr-x | bib-extract | 13 | ||||
| -rw-r--r-- | lib/bib-key.awk | 4 | ||||
| -rw-r--r-- | lib/bib-parse.awk | 5 | ||||
| -rw-r--r-- | lib/bib-select.awk | 30 | ||||
| -rw-r--r-- | lib/bib-strip.awk | 49 | ||||
| -rw-r--r-- | man/bib-add.1 | 33 | ||||
| -rwxr-xr-x | tests/fuzz.sh | 229 | ||||
| -rwxr-xr-x | tests/run-tests.sh | 57 |
11 files changed, 506 insertions, 52 deletions
@@ -6,8 +6,8 @@ MANDIR = $(PREFIX)/share/man/man1 SCRIPTS = bib-util bib-add bib-check bib-convert bib-extract bib-fetch \ bib-gen bib-key bib-ls LIBS = lib/bib-parse.awk lib/bib-canon.awk lib/bib-select.awk \ - lib/bib-lskeys.awk lib/bib-key.awk lib/bib-ls.awk \ - lib/bib-check.awk lib/bib2ref.awk lib/ref2bib.awk + lib/bib-strip.awk lib/bib-lskeys.awk lib/bib-key.awk \ + lib/bib-ls.awk lib/bib-check.awk lib/bib2ref.awk lib/ref2bib.awk MANPAGES = man/bib-util.1 man/bib-add.1 man/bib-check.1 man/bib-convert.1 \ man/bib-extract.1 man/bib-fetch.1 man/bib-gen.1 man/bib-key.1 \ man/bib-ls.1 @@ -19,6 +19,9 @@ test: tests/run-tests.sh tests/integration.sh +fuzz: + tests/fuzz.sh + install: -mkdir -p $(BINDIR) $(LIBDIR) $(MANDIR) cp $(SCRIPTS) $(BINDIR) @@ -31,4 +34,4 @@ uninstall: bib-extract.1 bib-fetch.1 bib-gen.1 bib-key.1 bib-ls.1 rm -rf $(LIBDIR) -.PHONY: all test install uninstall +.PHONY: all test fuzz install uninstall @@ -10,7 +10,10 @@ smaller provided scripts for convenience's sake. ## bib-add A script for inserting a new entry into a bibtex database file. It will accept a fully formatted entry on standard input and add it to a database -file presented as an argument. +file presented as an argument. The database is never modified in place: +the new version is built in a temporary file, verified, and only then +swapped in, with the previous contents saved in db.bib.bak. Replacing an +entry (-f) preserves every other byte of the file. ## bib-gen A script which generates a bibtex entry based on input. By default it will @@ -72,4 +75,5 @@ POSIX shell and awk only, with two exceptions: bib-fetch requires curl, plus pdftotext (poppler) for DOI extraction from pdfs. # Tests - make test + make test # unit + integration suites + make fuzz # robustness fuzzing against bogus input @@ -3,12 +3,23 @@ # # usage: bib-add [-f] db.bib < entry # -f replace existing entries with the same key +# +# The database is never modified in place: the complete new version is +# built in a temporary file, verified by re-parsing, and only then +# moved over the original, with the previous contents saved in +# db.bib.bak. Replacement with -f splices entries out by their exact +# source spans, so the rest of the file is preserved byte-for-byte. usage() { printf 'usage: bib-add [-f] db.bib < entry\n' >&2 exit 2 } +die() { + printf 'bib-add: %s\n' "$1" >&2 + exit 1 +} + if [ -n "$BIBUTILS_LIB" ]; then LIB=$BIBUTILS_LIB elif [ -d "$(dirname "$0")/lib" ]; then @@ -17,6 +28,10 @@ else LIB=/usr/local/share/bibutils fi +lskeys() { + awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@" +} + force=0 while getopts f opt; do case $opt in @@ -27,41 +42,95 @@ done shift $((OPTIND - 1)) [ $# -eq 1 ] || usage db=$1 +[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file" -tmp=$(mktemp) && tmpkeys=$(mktemp) && tmpdb=$(mktemp) || exit 1 -trap 'rm -f "$tmp" "$tmpkeys" "$tmpdb"' EXIT INT TERM +# serialize writers: set -C (noclobber) makes creating db.lock with +# our pid inside a single atomic step, so whoever creates it owns the +# database until they remove it; a lock whose owner has died is reaped +lock=$db.lock +tries=0 +while ! (set -C; echo $$ > "$lock") 2> /dev/null; do + owner=$(cat "$lock" 2> /dev/null) + if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then + # reap, but only if it is still that dead process's lock + printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2 + [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock" + continue + fi + tries=$((tries + 1)) + [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)" + sleep 1 +done -# canonicalize the incoming entries +# release only a lock that is still ours +unlock() { + [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock" +} + +tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; } +trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM + +# canonicalize and validate the incoming entries awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \ -v keys= -v invert=1 > "$tmp" +[ -s "$tmp" ] || die "no entries on stdin" -if [ ! -s "$tmp" ]; then - printf 'bib-add: no entries on stdin\n' >&2 - exit 1 +lskeys "$tmp" > "$tmpkeys" +grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key" +indups=$(sort "$tmpkeys" | uniq -d) +[ -n "$indups" ] && die "duplicate keys within input: $indups" + +# check the incoming keys against the database +dups= +oldcount=0 +if [ -s "$db" ]; then + lskeys "$db" > "$tmp.old" || die "cannot parse $db" + oldcount=$(wc -l < "$tmp.old") + dups=$(grep -Fxf "$tmpkeys" "$tmp.old") + rm -f "$tmp.old" + if [ -n "$dups" ] && [ "$force" -ne 1 ]; then + printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2 + printf '%s\n' "$dups" >&2 + exit 1 + fi fi -awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$tmp" > "$tmpkeys" +# build the complete new database next to the original (same +# filesystem, so the final move cannot be interrupted halfway) +new=$(mktemp "$db.XXXXXX") || exit 1 -if [ -f "$db" ]; then - dups=$(awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$db" \ - | grep -Fxf "$tmpkeys") +if [ -s "$db" ]; then if [ -n "$dups" ]; then - if [ "$force" -eq 1 ]; then - # rewrite the database without the entries being replaced - keys=$(printf '%s\n' "$dups" | paste -sd, -) - awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \ - -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=1 \ - "$db" > "$tmpdb" || exit 1 - cp "$tmpdb" "$db" - else - printf 'bib-add: duplicate keys in %s:\n' "$db" >&2 - printf '%s\n' "$dups" >&2 - exit 1 - fi + # splice out the entries being replaced; all other bytes survive + printf '%s\n' "$dups" > "$tmp.dups" + awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \ + -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db" + rm -f "$tmp.dups" + else + cat "$db" > "$new" || die "failed to copy $db" fi + # ensure exactly one blank line before the appended entries + [ -n "$(tail -c 1 "$new")" ] && echo >> "$new" + echo >> "$new" fi +cat "$tmp" >> "$new" + +# verify the result before touching the original: every old key minus +# the replaced ones, plus every new key, must parse back out +ndups=$(printf '%s' "$dups" | grep -c '^' || true) +nnew=$(wc -l < "$tmpkeys") +expect=$((oldcount - ndups + nnew)) +actual=$(lskeys "$new" | wc -l) +[ "$actual" -eq "$expect" ] || \ + die "verification failed ($actual entries, expected $expect); $db left untouched" -{ - [ -s "$db" ] && echo "" - cat "$tmp" -} >> "$db" +if [ -s "$db" ]; then + # back up first, then write through the original name so that its + # permissions, ownership and any symlink are preserved + cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched" + cat "$new" > "$db" || die "write to $db failed; original is in $db.bak" + rm -f "$new" +else + mv "$new" "$db" || die "cannot write $db" + chmod 644 "$db" 2> /dev/null +fi diff --git a/bib-extract b/bib-extract index 297588a..ac0363d 100755 --- a/bib-extract +++ b/bib-extract @@ -23,7 +23,10 @@ aux=$1 shift [ -r "$aux" ] || { printf 'bib-extract: cannot read %s\n' "$aux" >&2; exit 1; } -keys=$(awk ' +keyfile=$(mktemp) || exit 1 +trap 'rm -f "$keyfile"' EXIT INT TERM + +awk ' # classic bibtex: \citation{key,key,...} { line = $0 @@ -46,10 +49,10 @@ keys=$(awk ' print s line = substr(line, RSTART + RLENGTH) } - }' "$aux" | sort -u | paste -sd, -) + }' "$aux" | sort -u > "$keyfile" -[ -n "$keys" ] || exit 0 +[ -s "$keyfile" ] || exit 0 # a key of "*" (from \nocite{*}) selects the whole database -exec awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \ - -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=0 "$@" +awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \ + -f "$LIB/bib-select.awk" -v keyfile="$keyfile" -v invert=0 "$@" diff --git a/lib/bib-key.awk b/lib/bib-key.awk index 4223155..3f4117f 100644 --- a/lib/bib-key.awk +++ b/lib/bib-key.awk @@ -38,7 +38,7 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { surname = (n > 0) ? parts[n] : "" } gsub(/[^A-Za-z0-9]/, "", surname) - surname = tolower(surname) + surname = tolower(substr(surname, 1, 30)) if (surname == "") surname = "anon" @@ -62,5 +62,5 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { break } - return surname y word + return surname y substr(word, 1, 30) } diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk index e5bf9fa..e83cb07 100644 --- a/lib/bib-parse.awk +++ b/lib/bib-parse.awk @@ -4,7 +4,8 @@ # bib_entry(type, key) - called once per regular entry. The fields are # available in BIB_N, BIB_NAME[], BIB_VAL[] and # BIB_KIND[]; the raw source text of the entry -# is in BIB_RAW. +# is in BIB_RAW, and its position in the input +# buffer bib_buf is BIB_START..BIB_END-1. # bib_pass(raw) - called for @string and @preamble blocks with # their raw source text. # @@ -211,6 +212,8 @@ function bib_entry_at(s, i, at, type, opener, closer, key, name, c) { BIB_KIND[BIB_N] = BIB_VKIND } BIB_RAW = bib_trim(substr(s, at, i - at)) + BIB_START = at + BIB_END = i bib_entry(type, key) return i } diff --git a/lib/bib-select.awk b/lib/bib-select.awk index 9aa5a37..3ebd16f 100644 --- a/lib/bib-select.awk +++ b/lib/bib-select.awk @@ -1,21 +1,31 @@ # bib-select.awk - emit entries selected by key, canonically # # Requires bib-parse.awk and bib-canon.awk. Variables (set with -v): -# keys - comma-separated list of entry keys; a key of "*" selects -# every entry (as produced by \nocite{*}) -# invert - 0: emit entries whose key is in the list -# 1: emit entries whose key is NOT in the list +# keys - comma-separated list of entry keys; a key of "*" selects +# every entry (as produced by \nocite{*}) +# keyfile - file with one key per line, for key lists too large to +# pass on the command line; merged with keys +# invert - 0: emit entries whose key is in the list +# 1: emit entries whose key is NOT in the list # -# With keys="" and invert=1 this acts as a canonicalizing filter for +# With no keys and invert=1 this acts as a canonicalizing filter for # everything. @string and @preamble blocks always pass through. +function bib_sel_add(k) { + if (k == "*") + BIB_SEL_ALL = 1 + else + BIB_SEL[k] = 1 +} + BEGIN { bib_sel_n = split(keys, bib_sel_k, ",") - for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) { - if (bib_sel_k[bib_sel_i] == "*") - BIB_SEL_ALL = 1 - else - BIB_SEL[bib_sel_k[bib_sel_i]] = 1 + for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) + bib_sel_add(bib_sel_k[bib_sel_i]) + if (keyfile != "") { + while ((getline bib_sel_line < keyfile) > 0) + bib_sel_add(bib_sel_line) + close(keyfile) } } diff --git a/lib/bib-strip.awk b/lib/bib-strip.awk new file mode 100644 index 0000000..cecca3e --- /dev/null +++ b/lib/bib-strip.awk @@ -0,0 +1,49 @@ +# bib-strip.awk - remove entries by key, preserving all other bytes +# +# Requires bib-parse.awk. Variables (set with -v): +# keys - comma-separated list of entry keys to remove +# keyfile - file with one key per line, for key lists too large to +# pass on the command line; merged with keys +# +# Unlike bib-select.awk, which re-emits entries canonically, this +# splices the matched entries' source spans out of the input and +# leaves everything else - comments, formatting, @string blocks - +# byte-for-byte intact. Used by bib-add -f so that replacing one +# entry never rewrites the rest of the database. +# +# This END block runs after bib-parse.awk's (END blocks execute in +# the order their files are given to awk), so the spans recorded by +# the hooks below are complete by the time output happens. + +BEGIN { + bib_strip_n = split(keys, bib_strip_k, ",") + for (bib_strip_i = 1; bib_strip_i <= bib_strip_n; bib_strip_i++) + BIB_DROP[bib_strip_k[bib_strip_i]] = 1 + if (keyfile != "") { + while ((getline bib_strip_line < keyfile) > 0) + BIB_DROP[bib_strip_line] = 1 + close(keyfile) + } +} + +function bib_pass(raw) { } + +function bib_entry(type, key) { + if (key in BIB_DROP) { + BIB_NSPAN++ + BIB_SPAN_S[BIB_NSPAN] = BIB_START + BIB_SPAN_E[BIB_NSPAN] = BIB_END + } +} + +END { + i = 1 + for (j = 1; j <= BIB_NSPAN; j++) { + printf "%s", substr(bib_buf, i, BIB_SPAN_S[j] - i) + i = BIB_SPAN_E[j] + # swallow the whitespace that followed the removed entry + while (i <= length(bib_buf) && substr(bib_buf, i, 1) ~ /[ \t\r\n]/) + i++ + } + printf "%s", substr(bib_buf, i) +} diff --git a/man/bib-add.1 b/man/bib-add.1 index 5c7a674..32582c5 100644 --- a/man/bib-add.1 +++ b/man/bib-add.1 @@ -21,14 +21,41 @@ If an incoming entry's key already exists in the database, the entry is rejected and the duplicate keys are reported on standard error, unless .B \-f is given. +Input with an empty key, or with the same key appearing twice, is +always rejected. +.SH SAFETY +The database is never modified in place. +The complete new version is built in a temporary file alongside the +original, verified by re-parsing it and checking that exactly the +expected entries are present, and only then written over the database +\(em with the previous contents first saved in +.IB db.bib .bak\fR. +If anything fails along the way, the original file is left untouched. +.PP +Concurrent invocations are serialized through a lock file, +.IB db.bib .lock\fR, +created atomically with the owner's pid inside. +A waiter retries for 30 seconds before giving up with an error; +a lock whose owning process has died is reaped automatically. .SH OPTIONS .TP .B \-f Replace existing entries that share a key with an incoming entry. -The database is rewritten canonically in the process. +The replaced entries are spliced out by their exact source spans, so +comments and the formatting of every other entry are preserved +byte-for-byte. .SH EXIT STATUS -0 on success, 1 if no entries were read or a duplicate key was -rejected, 2 on usage error. +0 on success, 1 if the input was rejected or the database could not +be safely rewritten, 2 on usage error. +.SH FILES +.TP +.IB db.bib .bak +The previous contents of the database, written before each +modification. +.TP +.IB db.bib .lock +Write lock held for the duration of an invocation; contains the +owner's pid. .SH ENVIRONMENT .TP .B BIBUTILS_LIB diff --git a/tests/fuzz.sh b/tests/fuzz.sh new file mode 100755 index 0000000..4145d42 --- /dev/null +++ b/tests/fuzz.sh @@ -0,0 +1,229 @@ +#!/bin/sh +# fuzz.sh - throw bogus input at the tools and watch for misbehavior +# +# usage: tests/fuzz.sh [iterations] [seed-offset] +# (default 100 per generator; the offset lets parallel runs +# explore different deterministic mutation seeds) +# +# Four generators feed every entry-consuming tool: +# random - raw bytes from /dev/urandom +# mutated - a valid bibtex file with random structural damage +# (deleted/duplicated/inserted braces, quotes, @, #, =) +# soup - random streams of bibtex syntax tokens +# format - malformed aux files for bib-extract and refer records +# for bib-convert +# +# A case fails if a tool hangs (5s timeout), dies with an awk runtime +# error, exits above 2, or breaks the canonicalization fixed-point +# property (canon(canon(x)) must equal canon(x)). +# +# The mutated and soup inputs are additionally fired at bib-add +# against a known database, which must afterwards still parse and +# still contain every original entry (the survival invariant). + +ROOT=$(cd "$(dirname "$0")/.." && pwd) +PATH=$ROOT:$PATH + +# byte semantics: random bytes are rarely valid UTF-8, and gawk's +# locale warnings about that are not parser failures +LC_ALL=C +export LC_ALL +N=${1:-100} +OFF=${2:-0} +tmpd=$(mktemp -d) || exit 1 +trap 'rm -rf "$tmpd"' EXIT INT TERM + +fails=0 +cases=0 + +# canonicalizing filter (used for the fixed-point property) +canon() { + awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-canon.awk" \ + -f "$ROOT/lib/bib-select.awk" -v keys= -v invert=1 +} + +# run one tool on one input file; report any sign of misbehavior +probe() { # probe <case-name> <input> <cmd...> + name=$1 + input=$2 + shift 2 + cases=$((cases + 1)) + timeout 5 "$@" < "$input" > "$tmpd/out" 2> "$tmpd/err" + rc=$? + if [ "$rc" -eq 124 ]; then + fails=$((fails + 1)) + printf 'HANG %s: %s\n' "$name" "$*" + cp "$input" "$tmpd/hang.$fails" + elif [ "$rc" -gt 2 ]; then + fails=$((fails + 1)) + printf 'CRASH %s: %s (exit %d)\n' "$name" "$*" "$rc" + cp "$input" "$tmpd/crash.$fails" + elif grep -Eq 'awk:.*(fatal|error)|[Ss]egmentation' "$tmpd/err"; then + fails=$((fails + 1)) + printf 'AWKERR %s: %s: %s\n' "$name" "$*" "$(head -1 "$tmpd/err")" + cp "$input" "$tmpd/awkerr.$fails" + fi +} + +# the canonicalization of any input must be a fixed point +probe_fixedpoint() { # probe_fixedpoint <case-name> <input> + cases=$((cases + 1)) + timeout 5 canon < "$2" > "$tmpd/c1" 2> /dev/null + timeout 5 canon < "$tmpd/c1" > "$tmpd/c2" 2> /dev/null + if ! cmp -s "$tmpd/c1" "$tmpd/c2"; then + fails=$((fails + 1)) + printf 'NOTFIX %s: canon not idempotent\n' "$1" + cp "$2" "$tmpd/notfix.$fails" + fi +} + +seed_bib() { + cat <<'EOF' +@string{cj = {The Computer Journal}} +@article{knuth1984literate, + author = {Donald E. Knuth}, + title = {Literate {P}rogramming}, + journal = cj, + year = 1984, + pages = "97--111", + note = "vol. " # 27, +} +@inproceedings{lamport1978time, + author = {Leslie Lamport}, + title = {Time, Clocks, and the Ordering of Events}, + booktitle = {Communications of the ACM}, + year = {1978}, +} +EOF +} + +# damage a file at a random spot: delete, duplicate, or insert a +# structural character (awk does the randomness; seeded per case) +mutate() { # mutate <seed> < in > out + awk -v seed="$1" ' + BEGIN { srand(seed) } + { buf = buf $0 "\n" } + END { + n = length(buf) + chars = "{}\"@#=,()\\%" + for (m = 0; m < 1 + int(rand() * 8); m++) { + pos = 1 + int(rand() * n) + op = int(rand() * 3) + c = substr(chars, 1 + int(rand() * length(chars)), 1) + if (op == 0) # delete a character + buf = substr(buf, 1, pos - 1) substr(buf, pos + 1) + else if (op == 1) # insert a structural character + buf = substr(buf, 1, pos - 1) c substr(buf, pos) + else # duplicate a slice + buf = substr(buf, 1, pos) substr(buf, pos, 1 + int(rand() * 20)) substr(buf, pos) + n = length(buf) + } + printf "%s", buf + }' +} + +# a stream of plausible bibtex syntax fragments in random order +soup() { # soup <seed> > out + awk -v seed="$1" ' + BEGIN { + srand(seed) + n = 0 + T[++n] = "@"; T[++n] = "{"; T[++n] = "}"; T[++n] = "\"" + T[++n] = "#"; T[++n] = "="; T[++n] = ","; T[++n] = "(" + T[++n] = ")"; T[++n] = "%"; T[++n] = "\\"; T[++n] = " " + T[++n] = "\n"; T[++n] = "word"; T[++n] = "1984" + T[++n] = "@article{k,"; T[++n] = "t = {v}"; T[++n] = "@string" + T[++n] = "@comment"; T[++n] = " and "; T[++n] = "--" + len = 200 + int(rand() * 800) + for (i = 0; i < len; i++) + printf "%s", T[1 + int(rand() * n)] + }' +} + +run_entry_tools() { # run_entry_tools <case-name> <input> + probe "$1" "$2" bib-key + probe "$1" "$2" bib-ls -l + probe "$1" "$2" bib-check + probe "$1" "$2" bib-convert -r + probe "$1" "$2" bib-add "$tmpd/scratch.bib" + rm -f "$tmpd/scratch.bib" "$tmpd/scratch.bib.bak" + probe_fixedpoint "$1" "$2" +} + +# fire input at bib-add (with and without -f) against a known database; +# afterwards the database must still parse and still contain every +# original entry +probe_survival() { # probe_survival <case-name> <input> + cases=$((cases + 1)) + cat > "$tmpd/inv.bib" <<'EOF' +@string{js = {Journal of Survival}} +@article{orig1990one, author = {A. Original}, title = {One}, year = 1990} +@article{orig1991two, author = {B. Original}, title = {Two}, journal = js, year = 1991} +@misc{orig1992three, title = {Three}, note = "v. " # 3} +EOF + timeout 5 bib-add "$tmpd/inv.bib" < "$2" > /dev/null 2>&1 + timeout 5 bib-add -f "$tmpd/inv.bib" < "$2" > /dev/null 2>&1 + if ! awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-lskeys.awk" \ + "$tmpd/inv.bib" > "$tmpd/invkeys" 2> /dev/null \ + || ! grep -q '^orig1990one$' "$tmpd/invkeys" \ + || ! grep -q '^orig1991two$' "$tmpd/invkeys" \ + || ! grep -q '^orig1992three$' "$tmpd/invkeys"; then + fails=$((fails + 1)) + printf 'WRECK %s: database lost entries or no longer parses\n' "$1" + cp "$2" "$tmpd/wreck.$fails" + fi + rm -f "$tmpd/inv.bib.bak" +} + +echo "=== random bytes (x$N) ===" +i=0 +while [ "$i" -lt "$N" ]; do + i=$((i + 1)) + head -c 512 /dev/urandom > "$tmpd/in" + run_entry_tools "random/$i" "$tmpd/in" +done + +echo "=== mutated bibtex (x$N) ===" +seed_bib > "$tmpd/seed" +i=0 +while [ "$i" -lt "$N" ]; do + i=$((i + 1)) + mutate "$((i + OFF))" < "$tmpd/seed" > "$tmpd/in" + run_entry_tools "mutated/$i" "$tmpd/in" + probe_survival "mutated/$i" "$tmpd/in" +done + +echo "=== syntax soup (x$N) ===" +i=0 +while [ "$i" -lt "$N" ]; do + i=$((i + 1)) + soup "$((i + OFF))" > "$tmpd/in" + run_entry_tools "soup/$i" "$tmpd/in" + probe_survival "soup/$i" "$tmpd/in" +done + +echo "=== malformed aux and refer (x$N) ===" +printf '@article{k, author={A}, title={T}, year=1}\n' > "$tmpd/db.bib" +i=0 +while [ "$i" -lt "$N" ]; do + i=$((i + 1)) + printf '\\citation{k}\n\\citation{a,b,c}\n\\abx@aux@cite{0}{k}\n%%A Some One\n%%T Title\n' \ + | mutate "$((i + OFF))" > "$tmpd/in" + cases=$((cases + 1)) + if ! timeout 5 bib-extract "$tmpd/in" "$tmpd/db.bib" > /dev/null 2> "$tmpd/err"; then + rc=$? + if [ "$rc" -gt 2 ]; then + fails=$((fails + 1)) + printf 'CRASH aux/%d: bib-extract (exit %d)\n' "$i" "$rc" + fi + fi + probe "ref/$i" "$tmpd/in" bib-convert -b +done + +printf '\n%d cases, %d failures' "$cases" "$fails" +if [ "$fails" -gt 0 ]; then + printf ' (failing inputs preserved in %s)\n' "$tmpd" + trap - EXIT + exit 1 +fi +printf '\n' diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 8a9f49a..653f838 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -74,6 +74,63 @@ check "bib-add -f replaces entry" grep -q ' year = 1985,' "$db" n=$(grep -c '^@article{junk-key,' "$db") [ "$n" = 1 ] && ok "bib-add -f leaves one copy" || not_ok "bib-add -f leaves one copy" +# ---- bib-add hardening -------------------------------------------------- +check "bib-add writes a backup on modify" \ + sh -c "cmp -s '$db.bak' /dev/null; [ -s '$db.bak' ]" + +# replacement must not disturb other bytes (comments, formatting) +cat > "$tmpd/pres.bib" <<'EOF' +% Encoding: UTF-8 +% hand-maintained; do not reformat + +@ARTICLE{ keep , AUTHOR = "Stays Verbatim", YEAR = 1111 } + +@article{swap2000old, author = {Old One}, title = {Swap}, year = 2000} +EOF +printf '@article{swap2000old, author = {New One}, title = {Swap}, year = 2000}\n' \ + | bib-add -f "$tmpd/pres.bib" +check "bib-add -f preserves comments" grep -q '^% Encoding: UTF-8$' "$tmpd/pres.bib" +check "bib-add -f preserves untouched entries verbatim" \ + grep -q 'AUTHOR = "Stays Verbatim"' "$tmpd/pres.bib" +check "bib-add -f swapped the entry" grep -q '{New One}' "$tmpd/pres.bib" +n=$(grep -c 'swap2000old' "$tmpd/pres.bib") +[ "$n" = 1 ] && ok "bib-add -f removed the old version" \ + || not_ok "bib-add -f removed the old version" + +# bogus input must never modify the database +cp "$db" "$tmpd/before" +printf '@article{, author = {No Key}, year = 1}\n' | bib-add "$db" 2> /dev/null \ + && not_ok "bib-add rejects empty keys" || ok "bib-add rejects empty keys" +printf '@misc{same2, title={A}}\n@misc{same2, title={B}}\n' \ + | bib-add "$db" 2> /dev/null \ + && not_ok "bib-add rejects dup keys within input" \ + || ok "bib-add rejects dup keys within input" +check "database untouched after rejected input" cmp -s "$db" "$tmpd/before" + +# concurrent writers serialize; no entries lost, lock released +i=0 +while [ "$i" -lt 10 ]; do + i=$((i + 1)) + printf '@misc{lock%d, title = {L %d}}\n' "$i" "$i" \ + | bib-add "$tmpd/lock.bib" 2> /dev/null & +done +wait +n=$(bib-ls "$tmpd/lock.bib" | wc -l) +[ "$n" -eq 10 ] && ok "concurrent bib-add loses no entries" \ + || not_ok "concurrent bib-add loses no entries (got $n)" +[ -e "$tmpd/lock.bib.lock" ] && not_ok "lock released after use" \ + || ok "lock released after use" + +# a stale lock from a dead process is reaped +echo 999999 > "$tmpd/lock.bib.lock" +printf '@misc{lock11, title = {L 11}}\n' | bib-add "$tmpd/lock.bib" 2> /dev/null +check "stale lock reaped" grep -q 'lock11' "$tmpd/lock.bib" + +mkdir "$tmpd/adir" +printf '@misc{k, title={T}}\n' | bib-add "$tmpd/adir" 2> /dev/null \ + && not_ok "bib-add refuses non-regular files" \ + || ok "bib-add refuses non-regular files" + # ---- bib-extract ------------------------------------------------------- cat > "$tmpd/all.bib" <<'EOF' @article{alpha2020one, author = {A. Alpha}, title = {One}, year = 2020} |