From eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Sat, 6 Jun 2026 12:02:41 -0400 Subject: Initial implementation (only a few years later!) This is pure Claude. I'd written out the plan for this suite of scripts eons ago, but never found the time to actual do it. Remembered it this morning, pointed Claude at the README, and had something that appears to work in minutes. caveat emptor: the design is mine, but the code is purely LLM generated at this point. --- lib/bib-parse.awk | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 lib/bib-parse.awk (limited to 'lib/bib-parse.awk') diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk new file mode 100644 index 0000000..e5bf9fa --- /dev/null +++ b/lib/bib-parse.awk @@ -0,0 +1,216 @@ +# bib-parse.awk - shared bibtex parsing library for bibutils +# +# Consumers must define two hook functions: +# bib_entry(type, key) - called once per regular entry. The fields are +# available in BIB_N, BIB_NAME[], BIB_VAL[] and +# BIB_KIND[]; the raw source text of the entry +# is in BIB_RAW. +# bib_pass(raw) - called for @string and @preamble blocks with +# their raw source text. +# +# BIB_KIND[j] is "s" for ordinary string values (content stored without +# delimiters; re-wrap in braces on output), "n" for bare numbers, and +# "r" for raw values (macros, # concatenation) which should be emitted +# verbatim. + +{ bib_buf = bib_buf $0 "\n" } + +END { bib_main(bib_buf) } + +function bib_main(s, i) { + i = 1 + while (i <= length(s)) { + if (substr(s, i, 1) == "@") + i = bib_entry_at(s, i) + else + i++ + } +} + +function bib_ws(s, i) { + while (i <= length(s) && substr(s, i, 1) ~ /[ \t\r\n]/) + i++ + return i +} + +function bib_trim(t) { + sub(/^[ \t\r\n]+/, "", t) + sub(/[ \t\r\n]+$/, "", t) + return t +} + +# balanced {...} group starting at i; inner content goes to BIB_PIECE, +# returns the index just past the closing brace +function bib_braced(s, i, depth, start, c) { + start = i + depth = 0 + while (i <= length(s)) { + c = substr(s, i, 1) + i++ + if (c == "{") + depth++ + else if (c == "}") { + depth-- + if (depth == 0) + break + } + } + BIB_PIECE = substr(s, start + 1, i - start - 2) + return i +} + +# "..." group starting at i; braces protect embedded quotes +function bib_quoted(s, i, depth, start, c) { + start = i + i++ + depth = 0 + while (i <= length(s)) { + c = substr(s, i, 1) + if (c == "{") + depth++ + else if (c == "}") + depth-- + else if (c == "\"" && depth == 0) { + i++ + break + } + i++ + } + BIB_PIECE = substr(s, start + 1, i - start - 2) + return i +} + +# skip a balanced op...cl group starting at i (i must be at op) +function bib_skip_group(s, i, op, cl, depth, c) { + depth = 0 + while (i <= length(s)) { + c = substr(s, i, 1) + i++ + if (c == op) + depth++ + else if (c == cl) { + depth-- + if (depth == 0) + break + } + } + return i +} + +# field value at i, handling # concatenation; sets BIB_VALUE and +# BIB_VKIND, returns the index just past the value +function bib_value(s, i, start, c, piece, pieces, kind) { + start = i + pieces = 0 + kind = "" + BIB_VALUE = "" + while (1) { + c = substr(s, i, 1) + if (c == "{") { + i = bib_braced(s, i) + BIB_VALUE = BIB_VALUE BIB_PIECE + if (kind == "") + kind = "s" + } else if (c == "\"") { + i = bib_quoted(s, i) + BIB_VALUE = BIB_VALUE BIB_PIECE + if (kind == "") + kind = "s" + } else { + piece = "" + while (i <= length(s) && substr(s, i, 1) !~ /[,#}) \t\r\n]/) { + piece = piece substr(s, i, 1) + i++ + } + BIB_VALUE = BIB_VALUE piece + kind = (piece ~ /^[0-9]+$/) ? "n" : "r" + } + pieces++ + i = bib_ws(s, i) + if (substr(s, i, 1) == "#") + i = bib_ws(s, i + 1) + else + break + } + if (pieces > 1) + kind = "r" + if (kind == "r") + BIB_VALUE = bib_trim(substr(s, start, i - start)) + BIB_VKIND = kind + return i +} + +# parse the construct whose "@" is at i; returns the index past it +function bib_entry_at(s, i, at, type, opener, closer, key, name, c) { + at = i + i++ + type = "" + while (i <= length(s) && substr(s, i, 1) ~ /[A-Za-z]/) { + type = type substr(s, i, 1) + i++ + } + type = tolower(type) + i = bib_ws(s, i) + c = substr(s, i, 1) + if (c == "{") { + opener = "{" + closer = "}" + } else if (c == "(") { + opener = "(" + closer = ")" + } else + return i # stray @, not an entry + + if (type == "comment") + return bib_skip_group(s, i, opener, closer) + if (type == "string" || type == "preamble") { + i = bib_skip_group(s, i, opener, closer) + bib_pass(bib_trim(substr(s, at, i - at))) + return i + } + + i++ # consume opener + i = bib_ws(s, i) + key = "" + while (i <= length(s) && substr(s, i, 1) !~ /[, \t\r\n})]/) { + key = key substr(s, i, 1) + i++ + } + i = bib_ws(s, i) + if (substr(s, i, 1) == ",") + i++ + + BIB_N = 0 + while (1) { + i = bib_ws(s, i) + c = substr(s, i, 1) + if (c == "" || c == closer) { + if (c == closer) + i++ + break + } + if (c == ",") { + i++ + continue + } + name = "" + while (i <= length(s) && substr(s, i, 1) !~ /[=, \t\r\n})]/) { + name = name substr(s, i, 1) + i++ + } + i = bib_ws(s, i) + if (substr(s, i, 1) != "=") { # malformed; skip a char and resync + i++ + continue + } + i = bib_ws(s, i + 1) + i = bib_value(s, i) + BIB_N++ + BIB_NAME[BIB_N] = tolower(name) + BIB_VAL[BIB_N] = BIB_VALUE + BIB_KIND[BIB_N] = BIB_VKIND + } + BIB_RAW = bib_trim(substr(s, at, i - at)) + bib_entry(type, key) + return i +} -- cgit v1.2.3