diff options
| author | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2026-06-06 13:44:00 -0400 |
|---|---|---|
| committer | Douglas B. Rumbaugh <doug@douglasrumbaugh.com> | 2026-06-06 13:44:00 -0400 |
| commit | c102ab995f9a86a77e40b9a952b2b23c0bd7de74 (patch) | |
| tree | d51b9a8f1a55f7f6e6e5afb89d524b9baa350f45 /bib-add | |
| parent | b56c273d8198ae6cee69bbc9fe5a6a61da4074e4 (diff) | |
| download | bibutils-c102ab995f9a86a77e40b9a952b2b23c0bd7de74.tar.gz | |
Fuzzing with associated fixes
Diffstat (limited to 'bib-add')
| -rwxr-xr-x | bib-add | 121 |
1 files changed, 95 insertions, 26 deletions
@@ -3,12 +3,23 @@ # # usage: bib-add [-f] db.bib < entry # -f replace existing entries with the same key +# +# The database is never modified in place: the complete new version is +# built in a temporary file, verified by re-parsing, and only then +# moved over the original, with the previous contents saved in +# db.bib.bak. Replacement with -f splices entries out by their exact +# source spans, so the rest of the file is preserved byte-for-byte. usage() { printf 'usage: bib-add [-f] db.bib < entry\n' >&2 exit 2 } +die() { + printf 'bib-add: %s\n' "$1" >&2 + exit 1 +} + if [ -n "$BIBUTILS_LIB" ]; then LIB=$BIBUTILS_LIB elif [ -d "$(dirname "$0")/lib" ]; then @@ -17,6 +28,10 @@ else LIB=/usr/local/share/bibutils fi +lskeys() { + awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@" +} + force=0 while getopts f opt; do case $opt in @@ -27,41 +42,95 @@ done shift $((OPTIND - 1)) [ $# -eq 1 ] || usage db=$1 +[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file" -tmp=$(mktemp) && tmpkeys=$(mktemp) && tmpdb=$(mktemp) || exit 1 -trap 'rm -f "$tmp" "$tmpkeys" "$tmpdb"' EXIT INT TERM +# serialize writers: set -C (noclobber) makes creating db.lock with +# our pid inside a single atomic step, so whoever creates it owns the +# database until they remove it; a lock whose owner has died is reaped +lock=$db.lock +tries=0 +while ! (set -C; echo $$ > "$lock") 2> /dev/null; do + owner=$(cat "$lock" 2> /dev/null) + if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then + # reap, but only if it is still that dead process's lock + printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2 + [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock" + continue + fi + tries=$((tries + 1)) + [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)" + sleep 1 +done -# canonicalize the incoming entries +# release only a lock that is still ours +unlock() { + [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock" +} + +tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; } +trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM + +# canonicalize and validate the incoming entries awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \ -v keys= -v invert=1 > "$tmp" +[ -s "$tmp" ] || die "no entries on stdin" -if [ ! -s "$tmp" ]; then - printf 'bib-add: no entries on stdin\n' >&2 - exit 1 +lskeys "$tmp" > "$tmpkeys" +grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key" +indups=$(sort "$tmpkeys" | uniq -d) +[ -n "$indups" ] && die "duplicate keys within input: $indups" + +# check the incoming keys against the database +dups= +oldcount=0 +if [ -s "$db" ]; then + lskeys "$db" > "$tmp.old" || die "cannot parse $db" + oldcount=$(wc -l < "$tmp.old") + dups=$(grep -Fxf "$tmpkeys" "$tmp.old") + rm -f "$tmp.old" + if [ -n "$dups" ] && [ "$force" -ne 1 ]; then + printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2 + printf '%s\n' "$dups" >&2 + exit 1 + fi fi -awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$tmp" > "$tmpkeys" +# build the complete new database next to the original (same +# filesystem, so the final move cannot be interrupted halfway) +new=$(mktemp "$db.XXXXXX") || exit 1 -if [ -f "$db" ]; then - dups=$(awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$db" \ - | grep -Fxf "$tmpkeys") +if [ -s "$db" ]; then if [ -n "$dups" ]; then - if [ "$force" -eq 1 ]; then - # rewrite the database without the entries being replaced - keys=$(printf '%s\n' "$dups" | paste -sd, -) - awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \ - -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=1 \ - "$db" > "$tmpdb" || exit 1 - cp "$tmpdb" "$db" - else - printf 'bib-add: duplicate keys in %s:\n' "$db" >&2 - printf '%s\n' "$dups" >&2 - exit 1 - fi + # splice out the entries being replaced; all other bytes survive + printf '%s\n' "$dups" > "$tmp.dups" + awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \ + -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db" + rm -f "$tmp.dups" + else + cat "$db" > "$new" || die "failed to copy $db" fi + # ensure exactly one blank line before the appended entries + [ -n "$(tail -c 1 "$new")" ] && echo >> "$new" + echo >> "$new" fi +cat "$tmp" >> "$new" + +# verify the result before touching the original: every old key minus +# the replaced ones, plus every new key, must parse back out +ndups=$(printf '%s' "$dups" | grep -c '^' || true) +nnew=$(wc -l < "$tmpkeys") +expect=$((oldcount - ndups + nnew)) +actual=$(lskeys "$new" | wc -l) +[ "$actual" -eq "$expect" ] || \ + die "verification failed ($actual entries, expected $expect); $db left untouched" -{ - [ -s "$db" ] && echo "" - cat "$tmp" -} >> "$db" +if [ -s "$db" ]; then + # back up first, then write through the original name so that its + # permissions, ownership and any symlink are preserved + cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched" + cat "$new" > "$db" || die "write to $db failed; original is in $db.bak" + rm -f "$new" +else + mv "$new" "$db" || die "cannot write $db" + chmod 644 "$db" 2> /dev/null +fi |