From eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Sat, 6 Jun 2026 12:02:41 -0400 Subject: Initial implementation (only a few years later!) This is pure Claude. I'd written out the plan for this suite of scripts eons ago, but never found the time to actual do it. Remembered it this morning, pointed Claude at the README, and had something that appears to work in minutes. caveat emptor: the design is mine, but the code is purely LLM generated at this point. --- lib/bib-check.awk | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 lib/bib-check.awk (limited to 'lib/bib-check.awk') diff --git a/lib/bib-check.awk b/lib/bib-check.awk new file mode 100644 index 0000000..4411a55 --- /dev/null +++ b/lib/bib-check.awk @@ -0,0 +1,69 @@ +# bib-check.awk - lint a bibtex database +# +# Requires bib-parse.awk and bib-canon.awk. Reports, one problem per +# line on stdout: +# - missing fields required by the entry type +# - duplicate keys +# - entries whose titles normalize to the same string (likely dups) +# - empty field values +# Exits 1 if any problem was found. + +BEGIN { + REQ["article"] = "author title journal year" + REQ["book"] = "author|editor title publisher year" + REQ["booklet"] = "title" + REQ["inbook"] = "author|editor title publisher year" + REQ["incollection"] = "author title booktitle publisher year" + REQ["inproceedings"] = "author title booktitle year" + REQ["conference"] = "author title booktitle year" + REQ["manual"] = "title" + REQ["mastersthesis"] = "author title school year" + REQ["phdthesis"] = "author title school year" + REQ["proceedings"] = "title year" + REQ["techreport"] = "author title institution year" + REQ["unpublished"] = "author title note" +} + +function bib_pass(raw) { } + +function problem(key, msg) { + printf "%s: %s\n", key, msg + BIB_BAD = 1 +} + +function bib_entry(type, key, n, req, i, alts, na, j, found, t, k) { + if (key in BIB_KEYS_SEEN) + problem(key, "duplicate key") + BIB_KEYS_SEEN[key] = 1 + + # required fields ("a|b" means at least one of a, b) + if (type in REQ) { + n = split(REQ[type], req, " ") + for (i = 1; i <= n; i++) { + na = split(req[i], alts, "|") + found = 0 + for (j = 1; j <= na; j++) + if (bib_get(alts[j]) != "") + found = 1 + if (!found) + problem(key, "missing required field: " req[i]) + } + } + + # empty values + for (i = 1; i <= BIB_N; i++) + if (bib_trim(BIB_VAL[i]) == "") + problem(key, "empty field: " BIB_NAME[i]) + + # likely duplicate entries: same normalized title + t = tolower(bib_get("title")) + gsub(/[^a-z0-9]/, "", t) + if (t != "") { + if (t in BIB_TITLES_SEEN) + problem(key, "title duplicates " BIB_TITLES_SEEN[t]) + else + BIB_TITLES_SEEN[t] = key + } +} + +END { exit BIB_BAD } -- cgit v1.2.3