aboutsummaryrefslogtreecommitdiffstats
path: root/bib-add
diff options
context:
space:
mode:
authorDouglas B. Rumbaugh <doug@douglasrumbaugh.com>2026-06-06 13:44:00 -0400
committerDouglas B. Rumbaugh <doug@douglasrumbaugh.com>2026-06-06 13:44:00 -0400
commitc102ab995f9a86a77e40b9a952b2b23c0bd7de74 (patch)
treed51b9a8f1a55f7f6e6e5afb89d524b9baa350f45 /bib-add
parentb56c273d8198ae6cee69bbc9fe5a6a61da4074e4 (diff)
downloadbibutils-c102ab995f9a86a77e40b9a952b2b23c0bd7de74.tar.gz
Fuzzing with associated fixes
Diffstat (limited to 'bib-add')
-rwxr-xr-xbib-add121
1 files changed, 95 insertions, 26 deletions
diff --git a/bib-add b/bib-add
index 02a079e..8e3846c 100755
--- a/bib-add
+++ b/bib-add
@@ -3,12 +3,23 @@
#
# usage: bib-add [-f] db.bib < entry
# -f replace existing entries with the same key
+#
+# The database is never modified in place: the complete new version is
+# built in a temporary file, verified by re-parsing, and only then
+# moved over the original, with the previous contents saved in
+# db.bib.bak. Replacement with -f splices entries out by their exact
+# source spans, so the rest of the file is preserved byte-for-byte.
usage() {
printf 'usage: bib-add [-f] db.bib < entry\n' >&2
exit 2
}
+die() {
+ printf 'bib-add: %s\n' "$1" >&2
+ exit 1
+}
+
if [ -n "$BIBUTILS_LIB" ]; then
LIB=$BIBUTILS_LIB
elif [ -d "$(dirname "$0")/lib" ]; then
@@ -17,6 +28,10 @@ else
LIB=/usr/local/share/bibutils
fi
+lskeys() {
+ awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@"
+}
+
force=0
while getopts f opt; do
case $opt in
@@ -27,41 +42,95 @@ done
shift $((OPTIND - 1))
[ $# -eq 1 ] || usage
db=$1
+[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file"
-tmp=$(mktemp) && tmpkeys=$(mktemp) && tmpdb=$(mktemp) || exit 1
-trap 'rm -f "$tmp" "$tmpkeys" "$tmpdb"' EXIT INT TERM
+# serialize writers: set -C (noclobber) makes creating db.lock with
+# our pid inside a single atomic step, so whoever creates it owns the
+# database until they remove it; a lock whose owner has died is reaped
+lock=$db.lock
+tries=0
+while ! (set -C; echo $$ > "$lock") 2> /dev/null; do
+ owner=$(cat "$lock" 2> /dev/null)
+ if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then
+ # reap, but only if it is still that dead process's lock
+ printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2
+ [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock"
+ continue
+ fi
+ tries=$((tries + 1))
+ [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)"
+ sleep 1
+done
-# canonicalize the incoming entries
+# release only a lock that is still ours
+unlock() {
+ [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock"
+}
+
+tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; }
+trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM
+
+# canonicalize and validate the incoming entries
awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \
-v keys= -v invert=1 > "$tmp"
+[ -s "$tmp" ] || die "no entries on stdin"
-if [ ! -s "$tmp" ]; then
- printf 'bib-add: no entries on stdin\n' >&2
- exit 1
+lskeys "$tmp" > "$tmpkeys"
+grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key"
+indups=$(sort "$tmpkeys" | uniq -d)
+[ -n "$indups" ] && die "duplicate keys within input: $indups"
+
+# check the incoming keys against the database
+dups=
+oldcount=0
+if [ -s "$db" ]; then
+ lskeys "$db" > "$tmp.old" || die "cannot parse $db"
+ oldcount=$(wc -l < "$tmp.old")
+ dups=$(grep -Fxf "$tmpkeys" "$tmp.old")
+ rm -f "$tmp.old"
+ if [ -n "$dups" ] && [ "$force" -ne 1 ]; then
+ printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2
+ printf '%s\n' "$dups" >&2
+ exit 1
+ fi
fi
-awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$tmp" > "$tmpkeys"
+# build the complete new database next to the original (same
+# filesystem, so the final move cannot be interrupted halfway)
+new=$(mktemp "$db.XXXXXX") || exit 1
-if [ -f "$db" ]; then
- dups=$(awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$db" \
- | grep -Fxf "$tmpkeys")
+if [ -s "$db" ]; then
if [ -n "$dups" ]; then
- if [ "$force" -eq 1 ]; then
- # rewrite the database without the entries being replaced
- keys=$(printf '%s\n' "$dups" | paste -sd, -)
- awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \
- -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=1 \
- "$db" > "$tmpdb" || exit 1
- cp "$tmpdb" "$db"
- else
- printf 'bib-add: duplicate keys in %s:\n' "$db" >&2
- printf '%s\n' "$dups" >&2
- exit 1
- fi
+ # splice out the entries being replaced; all other bytes survive
+ printf '%s\n' "$dups" > "$tmp.dups"
+ awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \
+ -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db"
+ rm -f "$tmp.dups"
+ else
+ cat "$db" > "$new" || die "failed to copy $db"
fi
+ # ensure exactly one blank line before the appended entries
+ [ -n "$(tail -c 1 "$new")" ] && echo >> "$new"
+ echo >> "$new"
fi
+cat "$tmp" >> "$new"
+
+# verify the result before touching the original: every old key minus
+# the replaced ones, plus every new key, must parse back out
+ndups=$(printf '%s' "$dups" | grep -c '^' || true)
+nnew=$(wc -l < "$tmpkeys")
+expect=$((oldcount - ndups + nnew))
+actual=$(lskeys "$new" | wc -l)
+[ "$actual" -eq "$expect" ] || \
+ die "verification failed ($actual entries, expected $expect); $db left untouched"
-{
- [ -s "$db" ] && echo ""
- cat "$tmp"
-} >> "$db"
+if [ -s "$db" ]; then
+ # back up first, then write through the original name so that its
+ # permissions, ownership and any symlink are preserved
+ cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched"
+ cat "$new" > "$db" || die "write to $db failed; original is in $db.bak"
+ rm -f "$new"
+else
+ mv "$new" "$db" || die "cannot write $db"
+ chmod 644 "$db" 2> /dev/null
+fi