aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bib-parse.awk
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bib-parse.awk')
-rw-r--r--lib/bib-parse.awk216
1 files changed, 216 insertions, 0 deletions
diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk
new file mode 100644
index 0000000..e5bf9fa
--- /dev/null
+++ b/lib/bib-parse.awk
@@ -0,0 +1,216 @@
+# bib-parse.awk - shared bibtex parsing library for bibutils
+#
+# Consumers must define two hook functions:
+# bib_entry(type, key) - called once per regular entry. The fields are
+# available in BIB_N, BIB_NAME[], BIB_VAL[] and
+# BIB_KIND[]; the raw source text of the entry
+# is in BIB_RAW.
+# bib_pass(raw) - called for @string and @preamble blocks with
+# their raw source text.
+#
+# BIB_KIND[j] is "s" for ordinary string values (content stored without
+# delimiters; re-wrap in braces on output), "n" for bare numbers, and
+# "r" for raw values (macros, # concatenation) which should be emitted
+# verbatim.
+
+{ bib_buf = bib_buf $0 "\n" }
+
+END { bib_main(bib_buf) }
+
+function bib_main(s, i) {
+ i = 1
+ while (i <= length(s)) {
+ if (substr(s, i, 1) == "@")
+ i = bib_entry_at(s, i)
+ else
+ i++
+ }
+}
+
+function bib_ws(s, i) {
+ while (i <= length(s) && substr(s, i, 1) ~ /[ \t\r\n]/)
+ i++
+ return i
+}
+
+function bib_trim(t) {
+ sub(/^[ \t\r\n]+/, "", t)
+ sub(/[ \t\r\n]+$/, "", t)
+ return t
+}
+
+# balanced {...} group starting at i; inner content goes to BIB_PIECE,
+# returns the index just past the closing brace
+function bib_braced(s, i, depth, start, c) {
+ start = i
+ depth = 0
+ while (i <= length(s)) {
+ c = substr(s, i, 1)
+ i++
+ if (c == "{")
+ depth++
+ else if (c == "}") {
+ depth--
+ if (depth == 0)
+ break
+ }
+ }
+ BIB_PIECE = substr(s, start + 1, i - start - 2)
+ return i
+}
+
+# "..." group starting at i; braces protect embedded quotes
+function bib_quoted(s, i, depth, start, c) {
+ start = i
+ i++
+ depth = 0
+ while (i <= length(s)) {
+ c = substr(s, i, 1)
+ if (c == "{")
+ depth++
+ else if (c == "}")
+ depth--
+ else if (c == "\"" && depth == 0) {
+ i++
+ break
+ }
+ i++
+ }
+ BIB_PIECE = substr(s, start + 1, i - start - 2)
+ return i
+}
+
+# skip a balanced op...cl group starting at i (i must be at op)
+function bib_skip_group(s, i, op, cl, depth, c) {
+ depth = 0
+ while (i <= length(s)) {
+ c = substr(s, i, 1)
+ i++
+ if (c == op)
+ depth++
+ else if (c == cl) {
+ depth--
+ if (depth == 0)
+ break
+ }
+ }
+ return i
+}
+
+# field value at i, handling # concatenation; sets BIB_VALUE and
+# BIB_VKIND, returns the index just past the value
+function bib_value(s, i, start, c, piece, pieces, kind) {
+ start = i
+ pieces = 0
+ kind = ""
+ BIB_VALUE = ""
+ while (1) {
+ c = substr(s, i, 1)
+ if (c == "{") {
+ i = bib_braced(s, i)
+ BIB_VALUE = BIB_VALUE BIB_PIECE
+ if (kind == "")
+ kind = "s"
+ } else if (c == "\"") {
+ i = bib_quoted(s, i)
+ BIB_VALUE = BIB_VALUE BIB_PIECE
+ if (kind == "")
+ kind = "s"
+ } else {
+ piece = ""
+ while (i <= length(s) && substr(s, i, 1) !~ /[,#}) \t\r\n]/) {
+ piece = piece substr(s, i, 1)
+ i++
+ }
+ BIB_VALUE = BIB_VALUE piece
+ kind = (piece ~ /^[0-9]+$/) ? "n" : "r"
+ }
+ pieces++
+ i = bib_ws(s, i)
+ if (substr(s, i, 1) == "#")
+ i = bib_ws(s, i + 1)
+ else
+ break
+ }
+ if (pieces > 1)
+ kind = "r"
+ if (kind == "r")
+ BIB_VALUE = bib_trim(substr(s, start, i - start))
+ BIB_VKIND = kind
+ return i
+}
+
+# parse the construct whose "@" is at i; returns the index past it
+function bib_entry_at(s, i, at, type, opener, closer, key, name, c) {
+ at = i
+ i++
+ type = ""
+ while (i <= length(s) && substr(s, i, 1) ~ /[A-Za-z]/) {
+ type = type substr(s, i, 1)
+ i++
+ }
+ type = tolower(type)
+ i = bib_ws(s, i)
+ c = substr(s, i, 1)
+ if (c == "{") {
+ opener = "{"
+ closer = "}"
+ } else if (c == "(") {
+ opener = "("
+ closer = ")"
+ } else
+ return i # stray @, not an entry
+
+ if (type == "comment")
+ return bib_skip_group(s, i, opener, closer)
+ if (type == "string" || type == "preamble") {
+ i = bib_skip_group(s, i, opener, closer)
+ bib_pass(bib_trim(substr(s, at, i - at)))
+ return i
+ }
+
+ i++ # consume opener
+ i = bib_ws(s, i)
+ key = ""
+ while (i <= length(s) && substr(s, i, 1) !~ /[, \t\r\n})]/) {
+ key = key substr(s, i, 1)
+ i++
+ }
+ i = bib_ws(s, i)
+ if (substr(s, i, 1) == ",")
+ i++
+
+ BIB_N = 0
+ while (1) {
+ i = bib_ws(s, i)
+ c = substr(s, i, 1)
+ if (c == "" || c == closer) {
+ if (c == closer)
+ i++
+ break
+ }
+ if (c == ",") {
+ i++
+ continue
+ }
+ name = ""
+ while (i <= length(s) && substr(s, i, 1) !~ /[=, \t\r\n})]/) {
+ name = name substr(s, i, 1)
+ i++
+ }
+ i = bib_ws(s, i)
+ if (substr(s, i, 1) != "=") { # malformed; skip a char and resync
+ i++
+ continue
+ }
+ i = bib_ws(s, i + 1)
+ i = bib_value(s, i)
+ BIB_N++
+ BIB_NAME[BIB_N] = tolower(name)
+ BIB_VAL[BIB_N] = BIB_VALUE
+ BIB_KIND[BIB_N] = BIB_VKIND
+ }
+ BIB_RAW = bib_trim(substr(s, at, i - at))
+ bib_entry(type, key)
+ return i
+}