Initial implementation (only a few years later!)

This is pure Claude. I'd written out the plan for this suite of scripts eons ago, but never found the time to actual do it. Remembered it this morning, pointed Claude at the README, and had something that appears to work in minutes. caveat emptor: the design is mine, but the code is purely LLM generated at this point.
author: Douglas B. Rumbaugh <doug@douglasrumbaugh.com> 2026-06-06 12:02:41 -0400
committer: Douglas B. Rumbaugh <doug@douglasrumbaugh.com> 2026-06-06 12:02:41 -0400
commit: eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 (patch)
tree: 626d64c3574cfbc7cc38eae6d142ef22b21cf59b /lib/bib-parse.awk
parent: 8351a1da3f56cde9939b934bc5533a95aff1c95e (diff)
download: bibutils-eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2.tar.gz
1 files changed, 216 insertions, 0 deletions
diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk
new file mode 100644
index 0000000..e5bf9fa
--- /dev/null
+++ b/lib/bib-parse.awk
@@ -0,0 +1,216 @@
+# bib-parse.awk - shared bibtex parsing library for bibutils
+#
+# Consumers must define two hook functions:
+#   bib_entry(type, key) - called once per regular entry. The fields are
+#                          available in BIB_N, BIB_NAME[], BIB_VAL[] and
+#                          BIB_KIND[]; the raw source text of the entry
+#                          is in BIB_RAW.
+#   bib_pass(raw)        - called for @string and @preamble blocks with
+#                          their raw source text.
+#
+# BIB_KIND[j] is "s" for ordinary string values (content stored without
+# delimiters; re-wrap in braces on output), "n" for bare numbers, and
+# "r" for raw values (macros, # concatenation) which should be emitted
+# verbatim.
+
+{ bib_buf = bib_buf $0 "\n" }
+
+END { bib_main(bib_buf) }
+
+function bib_main(s,    i) {
+  i = 1
+  while (i <= length(s)) {
+    if (substr(s, i, 1) == "@")
+      i = bib_entry_at(s, i)
+    else
+      i++
+  }
+}
+
+function bib_ws(s, i) {
+  while (i <= length(s) && substr(s, i, 1) ~ /[ \t\r\n]/)
+    i++
+  return i
+}
+
+function bib_trim(t) {
+  sub(/^[ \t\r\n]+/, "", t)
+  sub(/[ \t\r\n]+$/, "", t)
+  return t
+}
+
+# balanced {...} group starting at i; inner content goes to BIB_PIECE,
+# returns the index just past the closing brace
+function bib_braced(s, i,    depth, start, c) {
+  start = i
+  depth = 0
+  while (i <= length(s)) {
+    c = substr(s, i, 1)
+    i++
+    if (c == "{")
+      depth++
+    else if (c == "}") {
+      depth--
+      if (depth == 0)
+        break
+    }
+  }
+  BIB_PIECE = substr(s, start + 1, i - start - 2)
+  return i
+}
+
+# "..." group starting at i; braces protect embedded quotes
+function bib_quoted(s, i,    depth, start, c) {
+  start = i
+  i++
+  depth = 0
+  while (i <= length(s)) {
+    c = substr(s, i, 1)
+    if (c == "{")
+      depth++
+    else if (c == "}")
+      depth--
+    else if (c == "\"" && depth == 0) {
+      i++
+      break
+    }
+    i++
+  }
+  BIB_PIECE = substr(s, start + 1, i - start - 2)
+  return i
+}
+
+# skip a balanced op...cl group starting at i (i must be at op)
+function bib_skip_group(s, i, op, cl,    depth, c) {
+  depth = 0
+  while (i <= length(s)) {
+    c = substr(s, i, 1)
+    i++
+    if (c == op)
+      depth++
+    else if (c == cl) {
+      depth--
+      if (depth == 0)
+        break
+    }
+  }
+  return i
+}
+
+# field value at i, handling # concatenation; sets BIB_VALUE and
+# BIB_VKIND, returns the index just past the value
+function bib_value(s, i,    start, c, piece, pieces, kind) {
+  start = i
+  pieces = 0
+  kind = ""
+  BIB_VALUE = ""
+  while (1) {
+    c = substr(s, i, 1)
+    if (c == "{") {
+      i = bib_braced(s, i)
+      BIB_VALUE = BIB_VALUE BIB_PIECE
+      if (kind == "")
+        kind = "s"
+    } else if (c == "\"") {
+      i = bib_quoted(s, i)
+      BIB_VALUE = BIB_VALUE BIB_PIECE
+      if (kind == "")
+        kind = "s"
+    } else {
+      piece = ""
+      while (i <= length(s) && substr(s, i, 1) !~ /[,#}) \t\r\n]/) {
+        piece = piece substr(s, i, 1)
+        i++
+      }
+      BIB_VALUE = BIB_VALUE piece
+      kind = (piece ~ /^[0-9]+$/) ? "n" : "r"
+    }
+    pieces++
+    i = bib_ws(s, i)
+    if (substr(s, i, 1) == "#")
+      i = bib_ws(s, i + 1)
+    else
+      break
+  }
+  if (pieces > 1)
+    kind = "r"
+  if (kind == "r")
+    BIB_VALUE = bib_trim(substr(s, start, i - start))
+  BIB_VKIND = kind
+  return i
+}
+
+# parse the construct whose "@" is at i; returns the index past it
+function bib_entry_at(s, i,    at, type, opener, closer, key, name, c) {
+  at = i
+  i++
+  type = ""
+  while (i <= length(s) && substr(s, i, 1) ~ /[A-Za-z]/) {
+    type = type substr(s, i, 1)
+    i++
+  }
+  type = tolower(type)
+  i = bib_ws(s, i)
+  c = substr(s, i, 1)
+  if (c == "{") {
+    opener = "{"
+    closer = "}"
+  } else if (c == "(") {
+    opener = "("
+    closer = ")"
+  } else
+    return i                       # stray @, not an entry
+
+  if (type == "comment")
+    return bib_skip_group(s, i, opener, closer)
+  if (type == "string" || type == "preamble") {
+    i = bib_skip_group(s, i, opener, closer)
+    bib_pass(bib_trim(substr(s, at, i - at)))
+    return i
+  }
+
+  i++                              # consume opener
+  i = bib_ws(s, i)
+  key = ""
+  while (i <= length(s) && substr(s, i, 1) !~ /[, \t\r\n})]/) {
+    key = key substr(s, i, 1)
+    i++
+  }
+  i = bib_ws(s, i)
+  if (substr(s, i, 1) == ",")
+    i++
+
+  BIB_N = 0
+  while (1) {
+    i = bib_ws(s, i)
+    c = substr(s, i, 1)
+    if (c == "" || c == closer) {
+      if (c == closer)
+        i++
+      break
+    }
+    if (c == ",") {
+      i++
+      continue
+    }
+    name = ""
+    while (i <= length(s) && substr(s, i, 1) !~ /[=, \t\r\n})]/) {
+      name = name substr(s, i, 1)
+      i++
+    }
+    i = bib_ws(s, i)
+    if (substr(s, i, 1) != "=") {  # malformed; skip a char and resync
+      i++
+      continue
+    }
+    i = bib_ws(s, i + 1)
+    i = bib_value(s, i)
+    BIB_N++
+    BIB_NAME[BIB_N] = tolower(name)
+    BIB_VAL[BIB_N] = BIB_VALUE
+    BIB_KIND[BIB_N] = BIB_VKIND
+  }
+  BIB_RAW = bib_trim(substr(s, at, i - at))
+  bib_entry(type, key)
+  return i
+}
author	Douglas B. Rumbaugh <doug@douglasrumbaugh.com>	2026-06-06 12:02:41 -0400
committer	Douglas B. Rumbaugh <doug@douglasrumbaugh.com>	2026-06-06 12:02:41 -0400
commit	eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 (patch)
tree	626d64c3574cfbc7cc38eae6d142ef22b21cf59b /lib/bib-parse.awk
parent	8351a1da3f56cde9939b934bc5533a95aff1c95e (diff)
download	bibutils-eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2.tar.gz