#!/bin/sh
# bib-add - insert bibtex entries from stdin into a database file
#
# usage: bib-add [-f] db.bib < entry
#   -f  replace existing entries with the same key
#
# The database is never modified in place: the complete new version is
# built in a temporary file, verified by re-parsing, and only then
# moved over the original, with the previous contents saved in
# db.bib.bak. Replacement with -f splices entries out by their exact
# source spans, so the rest of the file is preserved byte-for-byte.

usage() {
  printf 'usage: bib-add [-f] db.bib < entry\n' >&2
  exit 2
}

die() {
  printf 'bib-add: %s\n' "$1" >&2
  exit 1
}

if [ -n "$BIBUTILS_LIB" ]; then
  LIB=$BIBUTILS_LIB
elif [ -d "$(dirname "$0")/lib" ]; then
  LIB=$(dirname "$0")/lib
else
  LIB=/usr/local/share/bibutils
fi

lskeys() {
  awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@"
}

force=0
while getopts f opt; do
  case $opt in
    f) force=1 ;;
    *) usage ;;
  esac
done
shift $((OPTIND - 1))
[ $# -eq 1 ] || usage
db=$1
[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file"

# serialize writers: set -C (noclobber) makes creating db.lock with
# our pid inside a single atomic step, so whoever creates it owns the
# database until they remove it; a lock whose owner has died is reaped
lock=$db.lock
tries=0
while ! (set -C; echo $$ > "$lock") 2> /dev/null; do
  owner=$(cat "$lock" 2> /dev/null)
  if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then
    # reap, but only if it is still that dead process's lock
    printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2
    [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock"
    continue
  fi
  tries=$((tries + 1))
  [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)"
  sleep 1
done

# release only a lock that is still ours
unlock() {
  [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock"
}

tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; }
trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM

# canonicalize and validate the incoming entries
awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \
    -v keys= -v invert=1 > "$tmp"
[ -s "$tmp" ] || die "no entries on stdin"

lskeys "$tmp" > "$tmpkeys"
grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key"
indups=$(sort "$tmpkeys" | uniq -d)
[ -n "$indups" ] && die "duplicate keys within input: $indups"

# check the incoming keys against the database
dups=
oldcount=0
if [ -s "$db" ]; then
  lskeys "$db" > "$tmp.old" || die "cannot parse $db"
  oldcount=$(wc -l < "$tmp.old")
  dups=$(grep -Fxf "$tmpkeys" "$tmp.old")
  rm -f "$tmp.old"
  if [ -n "$dups" ] && [ "$force" -ne 1 ]; then
    printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2
    printf '%s\n' "$dups" >&2
    exit 1
  fi
fi

# build the complete new database next to the original (same
# filesystem, so the final move cannot be interrupted halfway)
new=$(mktemp "$db.XXXXXX") || exit 1

if [ -s "$db" ]; then
  if [ -n "$dups" ]; then
    # splice out the entries being replaced; all other bytes survive
    printf '%s\n' "$dups" > "$tmp.dups"
    awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \
        -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db"
    rm -f "$tmp.dups"
  else
    cat "$db" > "$new" || die "failed to copy $db"
  fi
  # ensure exactly one blank line before the appended entries
  [ -n "$(tail -c 1 "$new")" ] && echo >> "$new"
  echo >> "$new"
fi
cat "$tmp" >> "$new"

# verify the result before touching the original: every old key minus
# the replaced ones, plus every new key, must parse back out
ndups=$(printf '%s' "$dups" | grep -c '^' || true)
nnew=$(wc -l < "$tmpkeys")
expect=$((oldcount - ndups + nnew))
actual=$(lskeys "$new" | wc -l)
[ "$actual" -eq "$expect" ] || \
  die "verification failed ($actual entries, expected $expect); $db left untouched"

if [ -s "$db" ]; then
  # back up first, then write through the original name so that its
  # permissions, ownership and any symlink are preserved
  cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched"
  cat "$new" > "$db" || die "write to $db failed; original is in $db.bak"
  rm -f "$new"
else
  mv "$new" "$db" || die "cannot write $db"
  chmod 644 "$db" 2> /dev/null
fi
