# bib-parse.awk - shared bibtex parsing library for bibutils # # Consumers must define two hook functions: # bib_entry(type, key) - called once per regular entry. The fields are # available in BIB_N, BIB_NAME[], BIB_VAL[] and # BIB_KIND[]; the raw source text of the entry # is in BIB_RAW. # bib_pass(raw) - called for @string and @preamble blocks with # their raw source text. # # BIB_KIND[j] is "s" for ordinary string values (content stored without # delimiters; re-wrap in braces on output), "n" for bare numbers, and # "r" for raw values (macros, # concatenation) which should be emitted # verbatim. { bib_buf = bib_buf $0 "\n" } END { bib_main(bib_buf) } function bib_main(s, i) { i = 1 while (i <= length(s)) { if (substr(s, i, 1) == "@") i = bib_entry_at(s, i) else i++ } } function bib_ws(s, i) { while (i <= length(s) && substr(s, i, 1) ~ /[ \t\r\n]/) i++ return i } function bib_trim(t) { sub(/^[ \t\r\n]+/, "", t) sub(/[ \t\r\n]+$/, "", t) return t } # balanced {...} group starting at i; inner content goes to BIB_PIECE, # returns the index just past the closing brace function bib_braced(s, i, depth, start, c) { start = i depth = 0 while (i <= length(s)) { c = substr(s, i, 1) i++ if (c == "{") depth++ else if (c == "}") { depth-- if (depth == 0) break } } BIB_PIECE = substr(s, start + 1, i - start - 2) return i } # "..." group starting at i; braces protect embedded quotes function bib_quoted(s, i, depth, start, c) { start = i i++ depth = 0 while (i <= length(s)) { c = substr(s, i, 1) if (c == "{") depth++ else if (c == "}") depth-- else if (c == "\"" && depth == 0) { i++ break } i++ } BIB_PIECE = substr(s, start + 1, i - start - 2) return i } # skip a balanced op...cl group starting at i (i must be at op) function bib_skip_group(s, i, op, cl, depth, c) { depth = 0 while (i <= length(s)) { c = substr(s, i, 1) i++ if (c == op) depth++ else if (c == cl) { depth-- if (depth == 0) break } } return i } # field value at i, handling # concatenation; sets BIB_VALUE and # BIB_VKIND, returns the index just past the value function bib_value(s, i, start, c, piece, pieces, kind) { start = i pieces = 0 kind = "" BIB_VALUE = "" while (1) { c = substr(s, i, 1) if (c == "{") { i = bib_braced(s, i) BIB_VALUE = BIB_VALUE BIB_PIECE if (kind == "") kind = "s" } else if (c == "\"") { i = bib_quoted(s, i) BIB_VALUE = BIB_VALUE BIB_PIECE if (kind == "") kind = "s" } else { piece = "" while (i <= length(s) && substr(s, i, 1) !~ /[,#}) \t\r\n]/) { piece = piece substr(s, i, 1) i++ } BIB_VALUE = BIB_VALUE piece kind = (piece ~ /^[0-9]+$/) ? "n" : "r" } pieces++ i = bib_ws(s, i) if (substr(s, i, 1) == "#") i = bib_ws(s, i + 1) else break } if (pieces > 1) kind = "r" if (kind == "r") BIB_VALUE = bib_trim(substr(s, start, i - start)) BIB_VKIND = kind return i } # parse the construct whose "@" is at i; returns the index past it function bib_entry_at(s, i, at, type, opener, closer, key, name, c) { at = i i++ type = "" while (i <= length(s) && substr(s, i, 1) ~ /[A-Za-z]/) { type = type substr(s, i, 1) i++ } type = tolower(type) i = bib_ws(s, i) c = substr(s, i, 1) if (c == "{") { opener = "{" closer = "}" } else if (c == "(") { opener = "(" closer = ")" } else return i # stray @, not an entry if (type == "comment") return bib_skip_group(s, i, opener, closer) if (type == "string" || type == "preamble") { i = bib_skip_group(s, i, opener, closer) bib_pass(bib_trim(substr(s, at, i - at))) return i } i++ # consume opener i = bib_ws(s, i) key = "" while (i <= length(s) && substr(s, i, 1) !~ /[, \t\r\n})]/) { key = key substr(s, i, 1) i++ } i = bib_ws(s, i) if (substr(s, i, 1) == ",") i++ BIB_N = 0 while (1) { i = bib_ws(s, i) c = substr(s, i, 1) if (c == "" || c == closer) { if (c == closer) i++ break } if (c == ",") { i++ continue } name = "" while (i <= length(s) && substr(s, i, 1) !~ /[=, \t\r\n})]/) { name = name substr(s, i, 1) i++ } i = bib_ws(s, i) if (substr(s, i, 1) != "=") { # malformed; skip a char and resync i++ continue } i = bib_ws(s, i + 1) i = bib_value(s, i) BIB_N++ BIB_NAME[BIB_N] = tolower(name) BIB_VAL[BIB_N] = BIB_VALUE BIB_KIND[BIB_N] = BIB_VKIND } BIB_RAW = bib_trim(substr(s, at, i - at)) bib_entry(type, key) return i }