1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# bib-check.awk - lint a bibtex database
#
# Requires bib-parse.awk and bib-canon.awk. Reports, one problem per
# line on stdout:
# - missing fields required by the entry type
# - duplicate keys
# - entries whose titles normalize to the same string (likely dups)
# - empty field values
# Exits 1 if any problem was found.
BEGIN {
REQ["article"] = "author title journal year"
REQ["book"] = "author|editor title publisher year"
REQ["booklet"] = "title"
REQ["inbook"] = "author|editor title publisher year"
REQ["incollection"] = "author title booktitle publisher year"
REQ["inproceedings"] = "author title booktitle year"
REQ["conference"] = "author title booktitle year"
REQ["manual"] = "title"
REQ["mastersthesis"] = "author title school year"
REQ["phdthesis"] = "author title school year"
REQ["proceedings"] = "title year"
REQ["techreport"] = "author title institution year"
REQ["unpublished"] = "author title note"
}
function bib_pass(raw) { }
function problem(key, msg) {
printf "%s: %s\n", key, msg
BIB_BAD = 1
}
function bib_entry(type, key, n, req, i, alts, na, j, found, t, k) {
if (key in BIB_KEYS_SEEN)
problem(key, "duplicate key")
BIB_KEYS_SEEN[key] = 1
# required fields ("a|b" means at least one of a, b)
if (type in REQ) {
n = split(REQ[type], req, " ")
for (i = 1; i <= n; i++) {
na = split(req[i], alts, "|")
found = 0
for (j = 1; j <= na; j++)
if (bib_get(alts[j]) != "")
found = 1
if (!found)
problem(key, "missing required field: " req[i])
}
}
# empty values
for (i = 1; i <= BIB_N; i++)
if (bib_trim(BIB_VAL[i]) == "")
problem(key, "empty field: " BIB_NAME[i])
# likely duplicate entries: same normalized title
t = tolower(bib_get("title"))
gsub(/[^a-z0-9]/, "", t)
if (t != "") {
if (t in BIB_TITLES_SEEN)
problem(key, "title duplicates " BIB_TITLES_SEEN[t])
else
BIB_TITLES_SEEN[t] = key
}
}
END { exit BIB_BAD }
|