# bib-check.awk - lint a bibtex database # # Requires bib-parse.awk and bib-canon.awk. Reports, one problem per # line on stdout: # - missing fields required by the entry type # - duplicate keys # - entries whose titles normalize to the same string (likely dups) # - empty field values # Exits 1 if any problem was found. BEGIN { REQ["article"] = "author title journal year" REQ["book"] = "author|editor title publisher year" REQ["booklet"] = "title" REQ["inbook"] = "author|editor title publisher year" REQ["incollection"] = "author title booktitle publisher year" REQ["inproceedings"] = "author title booktitle year" REQ["conference"] = "author title booktitle year" REQ["manual"] = "title" REQ["mastersthesis"] = "author title school year" REQ["phdthesis"] = "author title school year" REQ["proceedings"] = "title year" REQ["techreport"] = "author title institution year" REQ["unpublished"] = "author title note" } function bib_pass(raw) { } function problem(key, msg) { printf "%s: %s\n", key, msg BIB_BAD = 1 } function bib_entry(type, key, n, req, i, alts, na, j, found, t) { if (key in BIB_KEYS_SEEN) problem(key, "duplicate key") BIB_KEYS_SEEN[key] = 1 # required fields ("a|b" means at least one of a, b) if (type in REQ) { n = split(REQ[type], req, " ") for (i = 1; i <= n; i++) { na = split(req[i], alts, "|") found = 0 for (j = 1; j <= na; j++) if (bib_get(alts[j]) != "") found = 1 if (!found) problem(key, "missing required field: " req[i]) } } # empty values for (i = 1; i <= BIB_N; i++) if (bib_trim(BIB_VAL[i]) == "") problem(key, "empty field: " BIB_NAME[i]) # likely duplicate entries: same normalized title t = tolower(bib_get("title")) gsub(/[^a-z0-9]/, "", t) if (t != "") { if (t in BIB_TITLES_SEEN) problem(key, "title duplicates " BIB_TITLES_SEEN[t]) else BIB_TITLES_SEEN[t] = key } } END { exit BIB_BAD }