11 files changed, 506 insertions, 52 deletions
diff --git a/Makefile b/Makefile
index a7df72c..1c48121 100644
--- a/Makefile
+++ b/Makefile
@@ -6,8 +6,8 @@ MANDIR = $(PREFIX)/share/man/man1
 SCRIPTS = bib-util bib-add bib-check bib-convert bib-extract bib-fetch \
           bib-gen bib-key bib-ls
 LIBS = lib/bib-parse.awk lib/bib-canon.awk lib/bib-select.awk \
-       lib/bib-lskeys.awk lib/bib-key.awk lib/bib-ls.awk \
-       lib/bib-check.awk lib/bib2ref.awk lib/ref2bib.awk
+       lib/bib-strip.awk lib/bib-lskeys.awk lib/bib-key.awk \
+       lib/bib-ls.awk lib/bib-check.awk lib/bib2ref.awk lib/ref2bib.awk
 MANPAGES = man/bib-util.1 man/bib-add.1 man/bib-check.1 man/bib-convert.1 \
            man/bib-extract.1 man/bib-fetch.1 man/bib-gen.1 man/bib-key.1 \
            man/bib-ls.1
@@ -19,6 +19,9 @@ test:
 	tests/run-tests.sh
 	tests/integration.sh
 
+fuzz:
+	tests/fuzz.sh
+
 install:
 	-mkdir -p $(BINDIR) $(LIBDIR) $(MANDIR)
 	cp $(SCRIPTS) $(BINDIR)
@@ -31,4 +34,4 @@ uninstall:
 	    bib-extract.1 bib-fetch.1 bib-gen.1 bib-key.1 bib-ls.1
 	rm -rf $(LIBDIR)
 
-.PHONY: all test install uninstall
+.PHONY: all test fuzz install uninstall
diff --git a/README.md b/README.md
index 11cda7f..434a98a 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,10 @@ smaller provided scripts for convenience's sake.
 ## bib-add
 A script for inserting a new entry into a bibtex database file. It will
 accept a fully formatted entry on standard input and add it to a database
-file presented as an argument.
+file presented as an argument. The database is never modified in place:
+the new version is built in a temporary file, verified, and only then
+swapped in, with the previous contents saved in db.bib.bak. Replacing an
+entry (-f) preserves every other byte of the file.
 
 ## bib-gen
 A script which generates a bibtex entry based on input. By default it will
@@ -72,4 +75,5 @@ POSIX shell and awk only, with two exceptions: bib-fetch requires curl,
 plus pdftotext (poppler) for DOI extraction from pdfs.
 
 # Tests
-    make test
+    make test               # unit + integration suites
+    make fuzz               # robustness fuzzing against bogus input
diff --git a/bib-add b/bib-add
index 02a079e..8e3846c 100755
--- a/bib-add
+++ b/bib-add
@@ -3,12 +3,23 @@
 #
 # usage: bib-add [-f] db.bib < entry
 #   -f  replace existing entries with the same key
+#
+# The database is never modified in place: the complete new version is
+# built in a temporary file, verified by re-parsing, and only then
+# moved over the original, with the previous contents saved in
+# db.bib.bak. Replacement with -f splices entries out by their exact
+# source spans, so the rest of the file is preserved byte-for-byte.
 
 usage() {
   printf 'usage: bib-add [-f] db.bib < entry\n' >&2
   exit 2
 }
 
+die() {
+  printf 'bib-add: %s\n' "$1" >&2
+  exit 1
+}
+
 if [ -n "$BIBUTILS_LIB" ]; then
   LIB=$BIBUTILS_LIB
 elif [ -d "$(dirname "$0")/lib" ]; then
@@ -17,6 +28,10 @@ else
   LIB=/usr/local/share/bibutils
 fi
 
+lskeys() {
+  awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@"
+}
+
 force=0
 while getopts f opt; do
   case $opt in
@@ -27,41 +42,95 @@ done
 shift $((OPTIND - 1))
 [ $# -eq 1 ] || usage
 db=$1
+[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file"
 
-tmp=$(mktemp) && tmpkeys=$(mktemp) && tmpdb=$(mktemp) || exit 1
-trap 'rm -f "$tmp" "$tmpkeys" "$tmpdb"' EXIT INT TERM
+# serialize writers: set -C (noclobber) makes creating db.lock with
+# our pid inside a single atomic step, so whoever creates it owns the
+# database until they remove it; a lock whose owner has died is reaped
+lock=$db.lock
+tries=0
+while ! (set -C; echo $$ > "$lock") 2> /dev/null; do
+  owner=$(cat "$lock" 2> /dev/null)
+  if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then
+    # reap, but only if it is still that dead process's lock
+    printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2
+    [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock"
+    continue
+  fi
+  tries=$((tries + 1))
+  [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)"
+  sleep 1
+done
 
-# canonicalize the incoming entries
+# release only a lock that is still ours
+unlock() {
+  [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock"
+}
+
+tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; }
+trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM
+
+# canonicalize and validate the incoming entries
 awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \
     -v keys= -v invert=1 > "$tmp"
+[ -s "$tmp" ] || die "no entries on stdin"
 
-if [ ! -s "$tmp" ]; then
-  printf 'bib-add: no entries on stdin\n' >&2
-  exit 1
+lskeys "$tmp" > "$tmpkeys"
+grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key"
+indups=$(sort "$tmpkeys" | uniq -d)
+[ -n "$indups" ] && die "duplicate keys within input: $indups"
+
+# check the incoming keys against the database
+dups=
+oldcount=0
+if [ -s "$db" ]; then
+  lskeys "$db" > "$tmp.old" || die "cannot parse $db"
+  oldcount=$(wc -l < "$tmp.old")
+  dups=$(grep -Fxf "$tmpkeys" "$tmp.old")
+  rm -f "$tmp.old"
+  if [ -n "$dups" ] && [ "$force" -ne 1 ]; then
+    printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2
+    printf '%s\n' "$dups" >&2
+    exit 1
+  fi
 fi
 
-awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$tmp" > "$tmpkeys"
+# build the complete new database next to the original (same
+# filesystem, so the final move cannot be interrupted halfway)
+new=$(mktemp "$db.XXXXXX") || exit 1
 
-if [ -f "$db" ]; then
-  dups=$(awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$db" \
-      | grep -Fxf "$tmpkeys")
+if [ -s "$db" ]; then
   if [ -n "$dups" ]; then
-    if [ "$force" -eq 1 ]; then
-      # rewrite the database without the entries being replaced
-      keys=$(printf '%s\n' "$dups" | paste -sd, -)
-      awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \
-          -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=1 \
-          "$db" > "$tmpdb" || exit 1
-      cp "$tmpdb" "$db"
-    else
-      printf 'bib-add: duplicate keys in %s:\n' "$db" >&2
-      printf '%s\n' "$dups" >&2
-      exit 1
-    fi
+    # splice out the entries being replaced; all other bytes survive
+    printf '%s\n' "$dups" > "$tmp.dups"
+    awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \
+        -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db"
+    rm -f "$tmp.dups"
+  else
+    cat "$db" > "$new" || die "failed to copy $db"
   fi
+  # ensure exactly one blank line before the appended entries
+  [ -n "$(tail -c 1 "$new")" ] && echo >> "$new"
+  echo >> "$new"
 fi
+cat "$tmp" >> "$new"
+
+# verify the result before touching the original: every old key minus
+# the replaced ones, plus every new key, must parse back out
+ndups=$(printf '%s' "$dups" | grep -c '^' || true)
+nnew=$(wc -l < "$tmpkeys")
+expect=$((oldcount - ndups + nnew))
+actual=$(lskeys "$new" | wc -l)
+[ "$actual" -eq "$expect" ] || \
+  die "verification failed ($actual entries, expected $expect); $db left untouched"
 
-{
-  [ -s "$db" ] && echo ""
-  cat "$tmp"
-} >> "$db"
+if [ -s "$db" ]; then
+  # back up first, then write through the original name so that its
+  # permissions, ownership and any symlink are preserved
+  cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched"
+  cat "$new" > "$db" || die "write to $db failed; original is in $db.bak"
+  rm -f "$new"
+else
+  mv "$new" "$db" || die "cannot write $db"
+  chmod 644 "$db" 2> /dev/null
+fi
diff --git a/bib-extract b/bib-extract
index 297588a..ac0363d 100755
--- a/bib-extract
+++ b/bib-extract
@@ -23,7 +23,10 @@ aux=$1
 shift
 [ -r "$aux" ] || { printf 'bib-extract: cannot read %s\n' "$aux" >&2; exit 1; }
 
-keys=$(awk '
+keyfile=$(mktemp) || exit 1
+trap 'rm -f "$keyfile"' EXIT INT TERM
+
+awk '
   # classic bibtex: \citation{key,key,...}
   {
     line = $0
@@ -46,10 +49,10 @@ keys=$(awk '
         print s
       line = substr(line, RSTART + RLENGTH)
     }
-  }' "$aux" | sort -u | paste -sd, -)
+  }' "$aux" | sort -u > "$keyfile"
 
-[ -n "$keys" ] || exit 0
+[ -s "$keyfile" ] || exit 0
 
 # a key of "*" (from \nocite{*}) selects the whole database
-exec awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \
-         -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=0 "$@"
+awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \
+    -f "$LIB/bib-select.awk" -v keyfile="$keyfile" -v invert=0 "$@"
diff --git a/lib/bib-key.awk b/lib/bib-key.awk
index 4223155..3f4117f 100644
--- a/lib/bib-key.awk
+++ b/lib/bib-key.awk
@@ -38,7 +38,7 @@ function bib_mkkey(    a, y, t, surname, word, n, parts, i, w) {
     surname = (n > 0) ? parts[n] : ""
   }
   gsub(/[^A-Za-z0-9]/, "", surname)
-  surname = tolower(surname)
+  surname = tolower(substr(surname, 1, 30))
   if (surname == "")
     surname = "anon"
 
@@ -62,5 +62,5 @@ function bib_mkkey(    a, y, t, surname, word, n, parts, i, w) {
     break
   }
 
-  return surname y word
+  return surname y substr(word, 1, 30)
 }
diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk
index e5bf9fa..e83cb07 100644
--- a/lib/bib-parse.awk
+++ b/lib/bib-parse.awk
@@ -4,7 +4,8 @@
 #   bib_entry(type, key) - called once per regular entry. The fields are
 #                          available in BIB_N, BIB_NAME[], BIB_VAL[] and
 #                          BIB_KIND[]; the raw source text of the entry
-#                          is in BIB_RAW.
+#                          is in BIB_RAW, and its position in the input
+#                          buffer bib_buf is BIB_START..BIB_END-1.
 #   bib_pass(raw)        - called for @string and @preamble blocks with
 #                          their raw source text.
 #
@@ -211,6 +212,8 @@ function bib_entry_at(s, i,    at, type, opener, closer, key, name, c) {
     BIB_KIND[BIB_N] = BIB_VKIND
   }
   BIB_RAW = bib_trim(substr(s, at, i - at))
+  BIB_START = at
+  BIB_END = i
   bib_entry(type, key)
   return i
 }
diff --git a/lib/bib-select.awk b/lib/bib-select.awk
index 9aa5a37..3ebd16f 100644
--- a/lib/bib-select.awk
+++ b/lib/bib-select.awk
@@ -1,21 +1,31 @@
 # bib-select.awk - emit entries selected by key, canonically
 #
 # Requires bib-parse.awk and bib-canon.awk. Variables (set with -v):
-#   keys   - comma-separated list of entry keys; a key of "*" selects
-#            every entry (as produced by \nocite{*})
-#   invert - 0: emit entries whose key is in the list
-#            1: emit entries whose key is NOT in the list
+#   keys    - comma-separated list of entry keys; a key of "*" selects
+#             every entry (as produced by \nocite{*})
+#   keyfile - file with one key per line, for key lists too large to
+#             pass on the command line; merged with keys
+#   invert  - 0: emit entries whose key is in the list
+#             1: emit entries whose key is NOT in the list
 #
-# With keys="" and invert=1 this acts as a canonicalizing filter for
+# With no keys and invert=1 this acts as a canonicalizing filter for
 # everything. @string and @preamble blocks always pass through.
 
+function bib_sel_add(k) {
+  if (k == "*")
+    BIB_SEL_ALL = 1
+  else
+    BIB_SEL[k] = 1
+}
+
 BEGIN {
   bib_sel_n = split(keys, bib_sel_k, ",")
-  for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) {
-    if (bib_sel_k[bib_sel_i] == "*")
-      BIB_SEL_ALL = 1
-    else
-      BIB_SEL[bib_sel_k[bib_sel_i]] = 1
+  for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++)
+    bib_sel_add(bib_sel_k[bib_sel_i])
+  if (keyfile != "") {
+    while ((getline bib_sel_line < keyfile) > 0)
+      bib_sel_add(bib_sel_line)
+    close(keyfile)
   }
 }
 
diff --git a/lib/bib-strip.awk b/lib/bib-strip.awk
new file mode 100644
index 0000000..cecca3e
--- /dev/null
+++ b/lib/bib-strip.awk
@@ -0,0 +1,49 @@
+# bib-strip.awk - remove entries by key, preserving all other bytes
+#
+# Requires bib-parse.awk. Variables (set with -v):
+#   keys    - comma-separated list of entry keys to remove
+#   keyfile - file with one key per line, for key lists too large to
+#             pass on the command line; merged with keys
+#
+# Unlike bib-select.awk, which re-emits entries canonically, this
+# splices the matched entries' source spans out of the input and
+# leaves everything else - comments, formatting, @string blocks -
+# byte-for-byte intact. Used by bib-add -f so that replacing one
+# entry never rewrites the rest of the database.
+#
+# This END block runs after bib-parse.awk's (END blocks execute in
+# the order their files are given to awk), so the spans recorded by
+# the hooks below are complete by the time output happens.
+
+BEGIN {
+  bib_strip_n = split(keys, bib_strip_k, ",")
+  for (bib_strip_i = 1; bib_strip_i <= bib_strip_n; bib_strip_i++)
+    BIB_DROP[bib_strip_k[bib_strip_i]] = 1
+  if (keyfile != "") {
+    while ((getline bib_strip_line < keyfile) > 0)
+      BIB_DROP[bib_strip_line] = 1
+    close(keyfile)
+  }
+}
+
+function bib_pass(raw) { }
+
+function bib_entry(type, key) {
+  if (key in BIB_DROP) {
+    BIB_NSPAN++
+    BIB_SPAN_S[BIB_NSPAN] = BIB_START
+    BIB_SPAN_E[BIB_NSPAN] = BIB_END
+  }
+}
+
+END {
+  i = 1
+  for (j = 1; j <= BIB_NSPAN; j++) {
+    printf "%s", substr(bib_buf, i, BIB_SPAN_S[j] - i)
+    i = BIB_SPAN_E[j]
+    # swallow the whitespace that followed the removed entry
+    while (i <= length(bib_buf) && substr(bib_buf, i, 1) ~ /[ \t\r\n]/)
+      i++
+  }
+  printf "%s", substr(bib_buf, i)
+}
diff --git a/man/bib-add.1 b/man/bib-add.1
index 5c7a674..32582c5 100644
--- a/man/bib-add.1
+++ b/man/bib-add.1
@@ -21,14 +21,41 @@ If an incoming entry's key already exists in the database, the entry is
 rejected and the duplicate keys are reported on standard error, unless
 .B \-f
 is given.
+Input with an empty key, or with the same key appearing twice, is
+always rejected.
+.SH SAFETY
+The database is never modified in place.
+The complete new version is built in a temporary file alongside the
+original, verified by re-parsing it and checking that exactly the
+expected entries are present, and only then written over the database
+\(em with the previous contents first saved in
+.IB db.bib .bak\fR.
+If anything fails along the way, the original file is left untouched.
+.PP
+Concurrent invocations are serialized through a lock file,
+.IB db.bib .lock\fR,
+created atomically with the owner's pid inside.
+A waiter retries for 30 seconds before giving up with an error;
+a lock whose owning process has died is reaped automatically.
 .SH OPTIONS
 .TP
 .B \-f
 Replace existing entries that share a key with an incoming entry.
-The database is rewritten canonically in the process.
+The replaced entries are spliced out by their exact source spans, so
+comments and the formatting of every other entry are preserved
+byte-for-byte.
 .SH EXIT STATUS
-0 on success, 1 if no entries were read or a duplicate key was
-rejected, 2 on usage error.
+0 on success, 1 if the input was rejected or the database could not
+be safely rewritten, 2 on usage error.
+.SH FILES
+.TP
+.IB db.bib .bak
+The previous contents of the database, written before each
+modification.
+.TP
+.IB db.bib .lock
+Write lock held for the duration of an invocation; contains the
+owner's pid.
 .SH ENVIRONMENT
 .TP
 .B BIBUTILS_LIB
diff --git a/tests/fuzz.sh b/tests/fuzz.sh
new file mode 100755
index 0000000..4145d42
--- /dev/null
+++ b/tests/fuzz.sh
@@ -0,0 +1,229 @@
+#!/bin/sh
+# fuzz.sh - throw bogus input at the tools and watch for misbehavior
+#
+# usage: tests/fuzz.sh [iterations] [seed-offset]
+#        (default 100 per generator; the offset lets parallel runs
+#         explore different deterministic mutation seeds)
+#
+# Four generators feed every entry-consuming tool:
+#   random  - raw bytes from /dev/urandom
+#   mutated - a valid bibtex file with random structural damage
+#             (deleted/duplicated/inserted braces, quotes, @, #, =)
+#   soup    - random streams of bibtex syntax tokens
+#   format  - malformed aux files for bib-extract and refer records
+#             for bib-convert
+#
+# A case fails if a tool hangs (5s timeout), dies with an awk runtime
+# error, exits above 2, or breaks the canonicalization fixed-point
+# property (canon(canon(x)) must equal canon(x)).
+#
+# The mutated and soup inputs are additionally fired at bib-add
+# against a known database, which must afterwards still parse and
+# still contain every original entry (the survival invariant).
+
+ROOT=$(cd "$(dirname "$0")/.." && pwd)
+PATH=$ROOT:$PATH
+
+# byte semantics: random bytes are rarely valid UTF-8, and gawk's
+# locale warnings about that are not parser failures
+LC_ALL=C
+export LC_ALL
+N=${1:-100}
+OFF=${2:-0}
+tmpd=$(mktemp -d) || exit 1
+trap 'rm -rf "$tmpd"' EXIT INT TERM
+
+fails=0
+cases=0
+
+# canonicalizing filter (used for the fixed-point property)
+canon() {
+  awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-canon.awk" \
+      -f "$ROOT/lib/bib-select.awk" -v keys= -v invert=1
+}
+
+# run one tool on one input file; report any sign of misbehavior
+probe() { # probe <case-name> <input> <cmd...>
+  name=$1
+  input=$2
+  shift 2
+  cases=$((cases + 1))
+  timeout 5 "$@" < "$input" > "$tmpd/out" 2> "$tmpd/err"
+  rc=$?
+  if [ "$rc" -eq 124 ]; then
+    fails=$((fails + 1))
+    printf 'HANG  %s: %s\n' "$name" "$*"
+    cp "$input" "$tmpd/hang.$fails"
+  elif [ "$rc" -gt 2 ]; then
+    fails=$((fails + 1))
+    printf 'CRASH %s: %s (exit %d)\n' "$name" "$*" "$rc"
+    cp "$input" "$tmpd/crash.$fails"
+  elif grep -Eq 'awk:.*(fatal|error)|[Ss]egmentation' "$tmpd/err"; then
+    fails=$((fails + 1))
+    printf 'AWKERR %s: %s: %s\n' "$name" "$*" "$(head -1 "$tmpd/err")"
+    cp "$input" "$tmpd/awkerr.$fails"
+  fi
+}
+
+# the canonicalization of any input must be a fixed point
+probe_fixedpoint() { # probe_fixedpoint <case-name> <input>
+  cases=$((cases + 1))
+  timeout 5 canon < "$2" > "$tmpd/c1" 2> /dev/null
+  timeout 5 canon < "$tmpd/c1" > "$tmpd/c2" 2> /dev/null
+  if ! cmp -s "$tmpd/c1" "$tmpd/c2"; then
+    fails=$((fails + 1))
+    printf 'NOTFIX %s: canon not idempotent\n' "$1"
+    cp "$2" "$tmpd/notfix.$fails"
+  fi
+}
+
+seed_bib() {
+  cat <<'EOF'
+@string{cj = {The Computer Journal}}
+@article{knuth1984literate,
+  author = {Donald E. Knuth},
+  title = {Literate {P}rogramming},
+  journal = cj,
+  year = 1984,
+  pages = "97--111",
+  note = "vol. " # 27,
+}
+@inproceedings{lamport1978time,
+  author = {Leslie Lamport},
+  title = {Time, Clocks, and the Ordering of Events},
+  booktitle = {Communications of the ACM},
+  year = {1978},
+}
+EOF
+}
+
+# damage a file at a random spot: delete, duplicate, or insert a
+# structural character (awk does the randomness; seeded per case)
+mutate() { # mutate <seed> < in > out
+  awk -v seed="$1" '
+    BEGIN { srand(seed) }
+    { buf = buf $0 "\n" }
+    END {
+      n = length(buf)
+      chars = "{}\"@#=,()\\%"
+      for (m = 0; m < 1 + int(rand() * 8); m++) {
+        pos = 1 + int(rand() * n)
+        op = int(rand() * 3)
+        c = substr(chars, 1 + int(rand() * length(chars)), 1)
+        if (op == 0)        # delete a character
+          buf = substr(buf, 1, pos - 1) substr(buf, pos + 1)
+        else if (op == 1)   # insert a structural character
+          buf = substr(buf, 1, pos - 1) c substr(buf, pos)
+        else                # duplicate a slice
+          buf = substr(buf, 1, pos) substr(buf, pos, 1 + int(rand() * 20)) substr(buf, pos)
+        n = length(buf)
+      }
+      printf "%s", buf
+    }'
+}
+
+# a stream of plausible bibtex syntax fragments in random order
+soup() { # soup <seed> > out
+  awk -v seed="$1" '
+    BEGIN {
+      srand(seed)
+      n = 0
+      T[++n] = "@";  T[++n] = "{";    T[++n] = "}";  T[++n] = "\""
+      T[++n] = "#";  T[++n] = "=";    T[++n] = ",";  T[++n] = "("
+      T[++n] = ")";  T[++n] = "%";    T[++n] = "\\"; T[++n] = " "
+      T[++n] = "\n"; T[++n] = "word"; T[++n] = "1984"
+      T[++n] = "@article{k,"; T[++n] = "t = {v}";   T[++n] = "@string"
+      T[++n] = "@comment";    T[++n] = " and ";     T[++n] = "--"
+      len = 200 + int(rand() * 800)
+      for (i = 0; i < len; i++)
+        printf "%s", T[1 + int(rand() * n)]
+    }'
+}
+
+run_entry_tools() { # run_entry_tools <case-name> <input>
+  probe "$1" "$2" bib-key
+  probe "$1" "$2" bib-ls -l
+  probe "$1" "$2" bib-check
+  probe "$1" "$2" bib-convert -r
+  probe "$1" "$2" bib-add "$tmpd/scratch.bib"
+  rm -f "$tmpd/scratch.bib" "$tmpd/scratch.bib.bak"
+  probe_fixedpoint "$1" "$2"
+}
+
+# fire input at bib-add (with and without -f) against a known database;
+# afterwards the database must still parse and still contain every
+# original entry
+probe_survival() { # probe_survival <case-name> <input>
+  cases=$((cases + 1))
+  cat > "$tmpd/inv.bib" <<'EOF'
+@string{js = {Journal of Survival}}
+@article{orig1990one, author = {A. Original}, title = {One}, year = 1990}
+@article{orig1991two, author = {B. Original}, title = {Two}, journal = js, year = 1991}
+@misc{orig1992three, title = {Three}, note = "v. " # 3}
+EOF
+  timeout 5 bib-add "$tmpd/inv.bib" < "$2" > /dev/null 2>&1
+  timeout 5 bib-add -f "$tmpd/inv.bib" < "$2" > /dev/null 2>&1
+  if ! awk -f "$ROOT/lib/bib-parse.awk" -f "$ROOT/lib/bib-lskeys.awk" \
+       "$tmpd/inv.bib" > "$tmpd/invkeys" 2> /dev/null \
+     || ! grep -q '^orig1990one$' "$tmpd/invkeys" \
+     || ! grep -q '^orig1991two$' "$tmpd/invkeys" \
+     || ! grep -q '^orig1992three$' "$tmpd/invkeys"; then
+    fails=$((fails + 1))
+    printf 'WRECK %s: database lost entries or no longer parses\n' "$1"
+    cp "$2" "$tmpd/wreck.$fails"
+  fi
+  rm -f "$tmpd/inv.bib.bak"
+}
+
+echo "=== random bytes (x$N) ==="
+i=0
+while [ "$i" -lt "$N" ]; do
+  i=$((i + 1))
+  head -c 512 /dev/urandom > "$tmpd/in"
+  run_entry_tools "random/$i" "$tmpd/in"
+done
+
+echo "=== mutated bibtex (x$N) ==="
+seed_bib > "$tmpd/seed"
+i=0
+while [ "$i" -lt "$N" ]; do
+  i=$((i + 1))
+  mutate "$((i + OFF))" < "$tmpd/seed" > "$tmpd/in"
+  run_entry_tools "mutated/$i" "$tmpd/in"
+  probe_survival "mutated/$i" "$tmpd/in"
+done
+
+echo "=== syntax soup (x$N) ==="
+i=0
+while [ "$i" -lt "$N" ]; do
+  i=$((i + 1))
+  soup "$((i + OFF))" > "$tmpd/in"
+  run_entry_tools "soup/$i" "$tmpd/in"
+  probe_survival "soup/$i" "$tmpd/in"
+done
+
+echo "=== malformed aux and refer (x$N) ==="
+printf '@article{k, author={A}, title={T}, year=1}\n' > "$tmpd/db.bib"
+i=0
+while [ "$i" -lt "$N" ]; do
+  i=$((i + 1))
+  printf '\\citation{k}\n\\citation{a,b,c}\n\\abx@aux@cite{0}{k}\n%%A Some One\n%%T Title\n' \
+    | mutate "$((i + OFF))" > "$tmpd/in"
+  cases=$((cases + 1))
+  if ! timeout 5 bib-extract "$tmpd/in" "$tmpd/db.bib" > /dev/null 2> "$tmpd/err"; then
+    rc=$?
+    if [ "$rc" -gt 2 ]; then
+      fails=$((fails + 1))
+      printf 'CRASH aux/%d: bib-extract (exit %d)\n' "$i" "$rc"
+    fi
+  fi
+  probe "ref/$i" "$tmpd/in" bib-convert -b
+done
+
+printf '\n%d cases, %d failures' "$cases" "$fails"
+if [ "$fails" -gt 0 ]; then
+  printf ' (failing inputs preserved in %s)\n' "$tmpd"
+  trap - EXIT
+  exit 1
+fi
+printf '\n'
diff --git a/tests/run-tests.sh b/tests/run-tests.sh
index 8a9f49a..653f838 100755
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -74,6 +74,63 @@ check "bib-add -f replaces entry" grep -q '  year = 1985,' "$db"
 n=$(grep -c '^@article{junk-key,' "$db")
 [ "$n" = 1 ] && ok "bib-add -f leaves one copy" || not_ok "bib-add -f leaves one copy"
 
+# ---- bib-add hardening --------------------------------------------------
+check "bib-add writes a backup on modify" \
+  sh -c "cmp -s '$db.bak' /dev/null; [ -s '$db.bak' ]"
+
+# replacement must not disturb other bytes (comments, formatting)
+cat > "$tmpd/pres.bib" <<'EOF'
+% Encoding: UTF-8
+% hand-maintained; do not reformat
+
+@ARTICLE{ keep ,  AUTHOR = "Stays  Verbatim", YEAR = 1111 }
+
+@article{swap2000old, author = {Old One}, title = {Swap}, year = 2000}
+EOF
+printf '@article{swap2000old, author = {New One}, title = {Swap}, year = 2000}\n' \
+  | bib-add -f "$tmpd/pres.bib"
+check "bib-add -f preserves comments" grep -q '^% Encoding: UTF-8$' "$tmpd/pres.bib"
+check "bib-add -f preserves untouched entries verbatim" \
+  grep -q 'AUTHOR = "Stays  Verbatim"' "$tmpd/pres.bib"
+check "bib-add -f swapped the entry" grep -q '{New One}' "$tmpd/pres.bib"
+n=$(grep -c 'swap2000old' "$tmpd/pres.bib")
+[ "$n" = 1 ] && ok "bib-add -f removed the old version" \
+             || not_ok "bib-add -f removed the old version"
+
+# bogus input must never modify the database
+cp "$db" "$tmpd/before"
+printf '@article{, author = {No Key}, year = 1}\n' | bib-add "$db" 2> /dev/null \
+  && not_ok "bib-add rejects empty keys" || ok "bib-add rejects empty keys"
+printf '@misc{same2, title={A}}\n@misc{same2, title={B}}\n' \
+  | bib-add "$db" 2> /dev/null \
+  && not_ok "bib-add rejects dup keys within input" \
+  || ok "bib-add rejects dup keys within input"
+check "database untouched after rejected input" cmp -s "$db" "$tmpd/before"
+
+# concurrent writers serialize; no entries lost, lock released
+i=0
+while [ "$i" -lt 10 ]; do
+  i=$((i + 1))
+  printf '@misc{lock%d, title = {L %d}}\n' "$i" "$i" \
+    | bib-add "$tmpd/lock.bib" 2> /dev/null &
+done
+wait
+n=$(bib-ls "$tmpd/lock.bib" | wc -l)
+[ "$n" -eq 10 ] && ok "concurrent bib-add loses no entries" \
+                || not_ok "concurrent bib-add loses no entries (got $n)"
+[ -e "$tmpd/lock.bib.lock" ] && not_ok "lock released after use" \
+                             || ok "lock released after use"
+
+# a stale lock from a dead process is reaped
+echo 999999 > "$tmpd/lock.bib.lock"
+printf '@misc{lock11, title = {L 11}}\n' | bib-add "$tmpd/lock.bib" 2> /dev/null
+check "stale lock reaped" grep -q 'lock11' "$tmpd/lock.bib"
+
+mkdir "$tmpd/adir"
+printf '@misc{k, title={T}}\n' | bib-add "$tmpd/adir" 2> /dev/null \
+  && not_ok "bib-add refuses non-regular files" \
+  || ok "bib-add refuses non-regular files"
+
 # ---- bib-extract -------------------------------------------------------
 cat > "$tmpd/all.bib" <<'EOF'
 @article{alpha2020one, author = {A. Alpha}, title = {One}, year = 2020}