1 files changed, 69 insertions, 0 deletions
diff --git a/lib/bib-check.awk b/lib/bib-check.awk
new file mode 100644
index 0000000..4411a55
--- /dev/null
+++ b/lib/bib-check.awk
@@ -0,0 +1,69 @@
+# bib-check.awk - lint a bibtex database
+#
+# Requires bib-parse.awk and bib-canon.awk. Reports, one problem per
+# line on stdout:
+#   - missing fields required by the entry type
+#   - duplicate keys
+#   - entries whose titles normalize to the same string (likely dups)
+#   - empty field values
+# Exits 1 if any problem was found.
+
+BEGIN {
+  REQ["article"] = "author title journal year"
+  REQ["book"] = "author|editor title publisher year"
+  REQ["booklet"] = "title"
+  REQ["inbook"] = "author|editor title publisher year"
+  REQ["incollection"] = "author title booktitle publisher year"
+  REQ["inproceedings"] = "author title booktitle year"
+  REQ["conference"] = "author title booktitle year"
+  REQ["manual"] = "title"
+  REQ["mastersthesis"] = "author title school year"
+  REQ["phdthesis"] = "author title school year"
+  REQ["proceedings"] = "title year"
+  REQ["techreport"] = "author title institution year"
+  REQ["unpublished"] = "author title note"
+}
+
+function bib_pass(raw) { }
+
+function problem(key, msg) {
+  printf "%s: %s\n", key, msg
+  BIB_BAD = 1
+}
+
+function bib_entry(type, key,    n, req, i, alts, na, j, found, t, k) {
+  if (key in BIB_KEYS_SEEN)
+    problem(key, "duplicate key")
+  BIB_KEYS_SEEN[key] = 1
+
+  # required fields ("a|b" means at least one of a, b)
+  if (type in REQ) {
+    n = split(REQ[type], req, " ")
+    for (i = 1; i <= n; i++) {
+      na = split(req[i], alts, "|")
+      found = 0
+      for (j = 1; j <= na; j++)
+        if (bib_get(alts[j]) != "")
+          found = 1
+      if (!found)
+        problem(key, "missing required field: " req[i])
+    }
+  }
+
+  # empty values
+  for (i = 1; i <= BIB_N; i++)
+    if (bib_trim(BIB_VAL[i]) == "")
+      problem(key, "empty field: " BIB_NAME[i])
+
+  # likely duplicate entries: same normalized title
+  t = tolower(bib_get("title"))
+  gsub(/[^a-z0-9]/, "", t)
+  if (t != "") {
+    if (t in BIB_TITLES_SEEN)
+      problem(key, "title duplicates " BIB_TITLES_SEEN[t])
+    else
+      BIB_TITLES_SEEN[t] = key
+  }
+}
+
+END { exit BIB_BAD }