diff options
Diffstat (limited to 'lib/bib-check.awk')
| -rw-r--r-- | lib/bib-check.awk | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/bib-check.awk b/lib/bib-check.awk new file mode 100644 index 0000000..4411a55 --- /dev/null +++ b/lib/bib-check.awk @@ -0,0 +1,69 @@ +# bib-check.awk - lint a bibtex database +# +# Requires bib-parse.awk and bib-canon.awk. Reports, one problem per +# line on stdout: +# - missing fields required by the entry type +# - duplicate keys +# - entries whose titles normalize to the same string (likely dups) +# - empty field values +# Exits 1 if any problem was found. + +BEGIN { + REQ["article"] = "author title journal year" + REQ["book"] = "author|editor title publisher year" + REQ["booklet"] = "title" + REQ["inbook"] = "author|editor title publisher year" + REQ["incollection"] = "author title booktitle publisher year" + REQ["inproceedings"] = "author title booktitle year" + REQ["conference"] = "author title booktitle year" + REQ["manual"] = "title" + REQ["mastersthesis"] = "author title school year" + REQ["phdthesis"] = "author title school year" + REQ["proceedings"] = "title year" + REQ["techreport"] = "author title institution year" + REQ["unpublished"] = "author title note" +} + +function bib_pass(raw) { } + +function problem(key, msg) { + printf "%s: %s\n", key, msg + BIB_BAD = 1 +} + +function bib_entry(type, key, n, req, i, alts, na, j, found, t, k) { + if (key in BIB_KEYS_SEEN) + problem(key, "duplicate key") + BIB_KEYS_SEEN[key] = 1 + + # required fields ("a|b" means at least one of a, b) + if (type in REQ) { + n = split(REQ[type], req, " ") + for (i = 1; i <= n; i++) { + na = split(req[i], alts, "|") + found = 0 + for (j = 1; j <= na; j++) + if (bib_get(alts[j]) != "") + found = 1 + if (!found) + problem(key, "missing required field: " req[i]) + } + } + + # empty values + for (i = 1; i <= BIB_N; i++) + if (bib_trim(BIB_VAL[i]) == "") + problem(key, "empty field: " BIB_NAME[i]) + + # likely duplicate entries: same normalized title + t = tolower(bib_get("title")) + gsub(/[^a-z0-9]/, "", t) + if (t != "") { + if (t in BIB_TITLES_SEEN) + problem(key, "title duplicates " BIB_TITLES_SEEN[t]) + else + BIB_TITLES_SEEN[t] = key + } +} + +END { exit BIB_BAD } |