From c102ab995f9a86a77e40b9a952b2b23c0bd7de74 Mon Sep 17 00:00:00 2001 From: "Douglas B. Rumbaugh" Date: Sat, 6 Jun 2026 13:44:00 -0400 Subject: Fuzzing with associated fixes --- lib/bib-key.awk | 4 ++-- lib/bib-parse.awk | 5 ++++- lib/bib-select.awk | 30 ++++++++++++++++++++---------- lib/bib-strip.awk | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 lib/bib-strip.awk (limited to 'lib') diff --git a/lib/bib-key.awk b/lib/bib-key.awk index 4223155..3f4117f 100644 --- a/lib/bib-key.awk +++ b/lib/bib-key.awk @@ -38,7 +38,7 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { surname = (n > 0) ? parts[n] : "" } gsub(/[^A-Za-z0-9]/, "", surname) - surname = tolower(surname) + surname = tolower(substr(surname, 1, 30)) if (surname == "") surname = "anon" @@ -62,5 +62,5 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { break } - return surname y word + return surname y substr(word, 1, 30) } diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk index e5bf9fa..e83cb07 100644 --- a/lib/bib-parse.awk +++ b/lib/bib-parse.awk @@ -4,7 +4,8 @@ # bib_entry(type, key) - called once per regular entry. The fields are # available in BIB_N, BIB_NAME[], BIB_VAL[] and # BIB_KIND[]; the raw source text of the entry -# is in BIB_RAW. +# is in BIB_RAW, and its position in the input +# buffer bib_buf is BIB_START..BIB_END-1. # bib_pass(raw) - called for @string and @preamble blocks with # their raw source text. # @@ -211,6 +212,8 @@ function bib_entry_at(s, i, at, type, opener, closer, key, name, c) { BIB_KIND[BIB_N] = BIB_VKIND } BIB_RAW = bib_trim(substr(s, at, i - at)) + BIB_START = at + BIB_END = i bib_entry(type, key) return i } diff --git a/lib/bib-select.awk b/lib/bib-select.awk index 9aa5a37..3ebd16f 100644 --- a/lib/bib-select.awk +++ b/lib/bib-select.awk @@ -1,21 +1,31 @@ # bib-select.awk - emit entries selected by key, canonically # # Requires bib-parse.awk and bib-canon.awk. Variables (set with -v): -# keys - comma-separated list of entry keys; a key of "*" selects -# every entry (as produced by \nocite{*}) -# invert - 0: emit entries whose key is in the list -# 1: emit entries whose key is NOT in the list +# keys - comma-separated list of entry keys; a key of "*" selects +# every entry (as produced by \nocite{*}) +# keyfile - file with one key per line, for key lists too large to +# pass on the command line; merged with keys +# invert - 0: emit entries whose key is in the list +# 1: emit entries whose key is NOT in the list # -# With keys="" and invert=1 this acts as a canonicalizing filter for +# With no keys and invert=1 this acts as a canonicalizing filter for # everything. @string and @preamble blocks always pass through. +function bib_sel_add(k) { + if (k == "*") + BIB_SEL_ALL = 1 + else + BIB_SEL[k] = 1 +} + BEGIN { bib_sel_n = split(keys, bib_sel_k, ",") - for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) { - if (bib_sel_k[bib_sel_i] == "*") - BIB_SEL_ALL = 1 - else - BIB_SEL[bib_sel_k[bib_sel_i]] = 1 + for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) + bib_sel_add(bib_sel_k[bib_sel_i]) + if (keyfile != "") { + while ((getline bib_sel_line < keyfile) > 0) + bib_sel_add(bib_sel_line) + close(keyfile) } } diff --git a/lib/bib-strip.awk b/lib/bib-strip.awk new file mode 100644 index 0000000..cecca3e --- /dev/null +++ b/lib/bib-strip.awk @@ -0,0 +1,49 @@ +# bib-strip.awk - remove entries by key, preserving all other bytes +# +# Requires bib-parse.awk. Variables (set with -v): +# keys - comma-separated list of entry keys to remove +# keyfile - file with one key per line, for key lists too large to +# pass on the command line; merged with keys +# +# Unlike bib-select.awk, which re-emits entries canonically, this +# splices the matched entries' source spans out of the input and +# leaves everything else - comments, formatting, @string blocks - +# byte-for-byte intact. Used by bib-add -f so that replacing one +# entry never rewrites the rest of the database. +# +# This END block runs after bib-parse.awk's (END blocks execute in +# the order their files are given to awk), so the spans recorded by +# the hooks below are complete by the time output happens. + +BEGIN { + bib_strip_n = split(keys, bib_strip_k, ",") + for (bib_strip_i = 1; bib_strip_i <= bib_strip_n; bib_strip_i++) + BIB_DROP[bib_strip_k[bib_strip_i]] = 1 + if (keyfile != "") { + while ((getline bib_strip_line < keyfile) > 0) + BIB_DROP[bib_strip_line] = 1 + close(keyfile) + } +} + +function bib_pass(raw) { } + +function bib_entry(type, key) { + if (key in BIB_DROP) { + BIB_NSPAN++ + BIB_SPAN_S[BIB_NSPAN] = BIB_START + BIB_SPAN_E[BIB_NSPAN] = BIB_END + } +} + +END { + i = 1 + for (j = 1; j <= BIB_NSPAN; j++) { + printf "%s", substr(bib_buf, i, BIB_SPAN_S[j] - i) + i = BIB_SPAN_E[j] + # swallow the whitespace that followed the removed entry + while (i <= length(bib_buf) && substr(bib_buf, i, 1) ~ /[ \t\r\n]/) + i++ + } + printf "%s", substr(bib_buf, i) +} -- cgit v1.2.3