#!/bin/sh
# bib-extract - emit only the database entries cited in an aux file
#
# usage: bib-extract file.aux [db.bib]   (db on stdin if omitted)
#
# roff/refer citation sources are planned but not yet supported.

usage() {
  printf 'usage: bib-extract file.aux [db.bib]\n' >&2
  exit 2
}

if [ -n "$BIBUTILS_LIB" ]; then
  LIB=$BIBUTILS_LIB
elif [ -d "$(dirname "$0")/lib" ]; then
  LIB=$(dirname "$0")/lib
else
  LIB=/usr/local/share/bibutils
fi

[ $# -ge 1 ] && [ $# -le 2 ] || usage
aux=$1
shift
[ -r "$aux" ] || { printf 'bib-extract: cannot read %s\n' "$aux" >&2; exit 1; }

keyfile=$(mktemp) || exit 1
trap 'rm -f "$keyfile"' EXIT INT TERM

awk '
  # classic bibtex: \citation{key,key,...}
  {
    line = $0
    while (match(line, /\\citation\{[^}]*\}/)) {
      n = split(substr(line, RSTART + 10, RLENGTH - 11), a, ",")
      for (i = 1; i <= n; i++)
        if (a[i] != "")
          print a[i]
      line = substr(line, RSTART + RLENGTH)
    }
  }
  # biblatex/biber: \abx@aux@cite{segment}{key} (older: one argument)
  {
    line = $0
    while (match(line, /\\abx@aux@cite(\{[0-9]*\})?\{[^}]*\}/)) {
      s = substr(line, RSTART, RLENGTH)
      sub(/\}$/, "", s)
      sub(/^.*\{/, "", s)
      if (s != "")
        print s
      line = substr(line, RSTART + RLENGTH)
    }
  }' "$aux" | sort -u > "$keyfile"

[ -s "$keyfile" ] || exit 0

# a key of "*" (from \nocite{*}) selects the whole database
awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \
    -f "$LIB/bib-select.awk" -v keyfile="$keyfile" -v invert=0 "$@"
