1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
#!/bin/sh
# bib-fetch - fetch a bibtex entry for a pdf from its DOI or arXiv id
#
# usage: bib-fetch [-d doi | -a arxiv-id] [file.pdf]
#
# Unless given with -d or -a, an identifier is extracted from the first
# pages of the pdf (requires pdftotext): a DOI if one is found, falling
# back to an arXiv id. DOIs are resolved through doi.org content
# negotiation (crossref et al.), arXiv ids through arxiv.org. The entry
# is emitted canonically on stdout with a generated key.
usage() {
printf 'usage: bib-fetch [-d doi | -a arxiv-id] [file.pdf]\n' >&2
exit 2
}
doi=
arxiv=
while getopts d:a: opt; do
case $opt in
d) doi=$OPTARG ;;
a) arxiv=$OPTARG ;;
*) usage ;;
esac
done
shift $((OPTIND - 1))
[ -n "$doi" ] && [ -n "$arxiv" ] && usage
bibkey=$(dirname "$0")/bib-key
[ -x "$bibkey" ] || bibkey=bib-key
command -v curl > /dev/null 2>&1 || {
printf 'bib-fetch: curl is required\n' >&2
exit 1
}
if [ -z "$doi" ] && [ -z "$arxiv" ]; then
[ $# -eq 1 ] || usage
pdf=$1
[ -r "$pdf" ] || { printf 'bib-fetch: cannot read %s\n' "$pdf" >&2; exit 1; }
command -v pdftotext > /dev/null 2>&1 || {
printf 'bib-fetch: pdftotext is required to extract an identifier\n' >&2
exit 1
}
ids=$(pdftotext -l 2 "$pdf" - 2> /dev/null | awk '
# first DOI on a "doi" line, first arXiv stamp; prefer the DOI
doi == "" {
if (match($0, /10\.[0-9][0-9][0-9][0-9][0-9]*\/[^ \t"<>]+/)) {
doi = substr($0, RSTART, RLENGTH)
sub(/[.,;)\]]+$/, "", doi)
}
}
arxiv == "" {
# modern ids (arXiv:2104.01234v2) and old style (arXiv:cs/0101001)
if (match($0, /[aA][rR][xX][iI][vV]:[0-9][0-9][0-9][0-9]\.[0-9][0-9][0-9][0-9][0-9]?(v[0-9]+)?/) ||
match($0, /[aA][rR][xX][iI][vV]:[a-z-]+(\.[A-Z][A-Z])?\/[0-9][0-9][0-9][0-9][0-9][0-9][0-9](v[0-9]+)?/))
arxiv = substr($0, RSTART + 6, RLENGTH - 6)
}
END { printf "%s\t%s\n", doi, arxiv }')
doi=${ids%% *}
arxiv=${ids#* }
if [ -z "$doi" ] && [ -z "$arxiv" ]; then
printf 'bib-fetch: no DOI or arXiv id found in %s\n' "$pdf" >&2
exit 1
fi
fi
if [ -n "$doi" ]; then
entry=$(curl -sSfL -H 'Accept: application/x-bibtex' \
"https://doi.org/$doi") || {
printf 'bib-fetch: failed to fetch entry for doi %s\n' "$doi" >&2
exit 1
}
else
arxiv=${arxiv#arXiv:}
entry=$(curl -sSfL "https://arxiv.org/bibtex/$arxiv") || {
printf 'bib-fetch: failed to fetch entry for arXiv id %s\n' "$arxiv" >&2
exit 1
}
fi
printf '%s\n' "$entry" | "$bibkey"
|