aboutsummaryrefslogtreecommitdiffstats
path: root/bib-add
blob: 8e3846c3792fb881dfcb771009bdaef05aff5a33 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/sh
# bib-add - insert bibtex entries from stdin into a database file
#
# usage: bib-add [-f] db.bib < entry
#   -f  replace existing entries with the same key
#
# The database is never modified in place: the complete new version is
# built in a temporary file, verified by re-parsing, and only then
# moved over the original, with the previous contents saved in
# db.bib.bak. Replacement with -f splices entries out by their exact
# source spans, so the rest of the file is preserved byte-for-byte.

usage() {
  printf 'usage: bib-add [-f] db.bib < entry\n' >&2
  exit 2
}

die() {
  printf 'bib-add: %s\n' "$1" >&2
  exit 1
}

if [ -n "$BIBUTILS_LIB" ]; then
  LIB=$BIBUTILS_LIB
elif [ -d "$(dirname "$0")/lib" ]; then
  LIB=$(dirname "$0")/lib
else
  LIB=/usr/local/share/bibutils
fi

lskeys() {
  awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$@"
}

force=0
while getopts f opt; do
  case $opt in
    f) force=1 ;;
    *) usage ;;
  esac
done
shift $((OPTIND - 1))
[ $# -eq 1 ] || usage
db=$1
[ -e "$db" ] && [ ! -f "$db" ] && die "$db is not a regular file"

# serialize writers: set -C (noclobber) makes creating db.lock with
# our pid inside a single atomic step, so whoever creates it owns the
# database until they remove it; a lock whose owner has died is reaped
lock=$db.lock
tries=0
while ! (set -C; echo $$ > "$lock") 2> /dev/null; do
  owner=$(cat "$lock" 2> /dev/null)
  if [ -n "$owner" ] && ! kill -0 "$owner" 2> /dev/null; then
    # reap, but only if it is still that dead process's lock
    printf 'bib-add: reaping stale lock from dead pid %s\n' "$owner" >&2
    [ "$(cat "$lock" 2> /dev/null)" = "$owner" ] && rm -f "$lock"
    continue
  fi
  tries=$((tries + 1))
  [ "$tries" -ge 30 ] && die "$db is locked by pid ${owner:-unknown} (remove $lock if wrong)"
  sleep 1
done

# release only a lock that is still ours
unlock() {
  [ "$(cat "$lock" 2> /dev/null)" = "$$" ] && rm -f "$lock"
}

tmp=$(mktemp) && tmpkeys=$(mktemp) || { unlock; exit 1; }
trap 'rm -f "$tmp" "$tmpkeys" "$new"; unlock' EXIT INT TERM

# canonicalize and validate the incoming entries
awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" -f "$LIB/bib-select.awk" \
    -v keys= -v invert=1 > "$tmp"
[ -s "$tmp" ] || die "no entries on stdin"

lskeys "$tmp" > "$tmpkeys"
grep -q '^$' "$tmpkeys" && die "refusing to add an entry with an empty key"
indups=$(sort "$tmpkeys" | uniq -d)
[ -n "$indups" ] && die "duplicate keys within input: $indups"

# check the incoming keys against the database
dups=
oldcount=0
if [ -s "$db" ]; then
  lskeys "$db" > "$tmp.old" || die "cannot parse $db"
  oldcount=$(wc -l < "$tmp.old")
  dups=$(grep -Fxf "$tmpkeys" "$tmp.old")
  rm -f "$tmp.old"
  if [ -n "$dups" ] && [ "$force" -ne 1 ]; then
    printf 'bib-add: duplicate keys in %s (use -f to replace):\n' "$db" >&2
    printf '%s\n' "$dups" >&2
    exit 1
  fi
fi

# build the complete new database next to the original (same
# filesystem, so the final move cannot be interrupted halfway)
new=$(mktemp "$db.XXXXXX") || exit 1

if [ -s "$db" ]; then
  if [ -n "$dups" ]; then
    # splice out the entries being replaced; all other bytes survive
    printf '%s\n' "$dups" > "$tmp.dups"
    awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-strip.awk" \
        -v keyfile="$tmp.dups" "$db" > "$new" || die "failed to rewrite $db"
    rm -f "$tmp.dups"
  else
    cat "$db" > "$new" || die "failed to copy $db"
  fi
  # ensure exactly one blank line before the appended entries
  [ -n "$(tail -c 1 "$new")" ] && echo >> "$new"
  echo >> "$new"
fi
cat "$tmp" >> "$new"

# verify the result before touching the original: every old key minus
# the replaced ones, plus every new key, must parse back out
ndups=$(printf '%s' "$dups" | grep -c '^' || true)
nnew=$(wc -l < "$tmpkeys")
expect=$((oldcount - ndups + nnew))
actual=$(lskeys "$new" | wc -l)
[ "$actual" -eq "$expect" ] || \
  die "verification failed ($actual entries, expected $expect); $db left untouched"

if [ -s "$db" ]; then
  # back up first, then write through the original name so that its
  # permissions, ownership and any symlink are preserved
  cp "$db" "$db.bak" || die "cannot write backup $db.bak; $db left untouched"
  cat "$new" > "$db" || die "write to $db failed; original is in $db.bak"
  rm -f "$new"
else
  mv "$new" "$db" || die "cannot write $db"
  chmod 644 "$db" 2> /dev/null
fi