aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bib-check.awk
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bib-check.awk')
-rw-r--r--lib/bib-check.awk69
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/bib-check.awk b/lib/bib-check.awk
new file mode 100644
index 0000000..4411a55
--- /dev/null
+++ b/lib/bib-check.awk
@@ -0,0 +1,69 @@
+# bib-check.awk - lint a bibtex database
+#
+# Requires bib-parse.awk and bib-canon.awk. Reports, one problem per
+# line on stdout:
+# - missing fields required by the entry type
+# - duplicate keys
+# - entries whose titles normalize to the same string (likely dups)
+# - empty field values
+# Exits 1 if any problem was found.
+
+BEGIN {
+ REQ["article"] = "author title journal year"
+ REQ["book"] = "author|editor title publisher year"
+ REQ["booklet"] = "title"
+ REQ["inbook"] = "author|editor title publisher year"
+ REQ["incollection"] = "author title booktitle publisher year"
+ REQ["inproceedings"] = "author title booktitle year"
+ REQ["conference"] = "author title booktitle year"
+ REQ["manual"] = "title"
+ REQ["mastersthesis"] = "author title school year"
+ REQ["phdthesis"] = "author title school year"
+ REQ["proceedings"] = "title year"
+ REQ["techreport"] = "author title institution year"
+ REQ["unpublished"] = "author title note"
+}
+
+function bib_pass(raw) { }
+
+function problem(key, msg) {
+ printf "%s: %s\n", key, msg
+ BIB_BAD = 1
+}
+
+function bib_entry(type, key, n, req, i, alts, na, j, found, t, k) {
+ if (key in BIB_KEYS_SEEN)
+ problem(key, "duplicate key")
+ BIB_KEYS_SEEN[key] = 1
+
+ # required fields ("a|b" means at least one of a, b)
+ if (type in REQ) {
+ n = split(REQ[type], req, " ")
+ for (i = 1; i <= n; i++) {
+ na = split(req[i], alts, "|")
+ found = 0
+ for (j = 1; j <= na; j++)
+ if (bib_get(alts[j]) != "")
+ found = 1
+ if (!found)
+ problem(key, "missing required field: " req[i])
+ }
+ }
+
+ # empty values
+ for (i = 1; i <= BIB_N; i++)
+ if (bib_trim(BIB_VAL[i]) == "")
+ problem(key, "empty field: " BIB_NAME[i])
+
+ # likely duplicate entries: same normalized title
+ t = tolower(bib_get("title"))
+ gsub(/[^a-z0-9]/, "", t)
+ if (t != "") {
+ if (t in BIB_TITLES_SEEN)
+ problem(key, "title duplicates " BIB_TITLES_SEEN[t])
+ else
+ BIB_TITLES_SEEN[t] = key
+ }
+}
+
+END { exit BIB_BAD }