aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/bib-key.awk4
-rw-r--r--lib/bib-parse.awk5
-rw-r--r--lib/bib-select.awk30
-rw-r--r--lib/bib-strip.awk49
4 files changed, 75 insertions, 13 deletions
diff --git a/lib/bib-key.awk b/lib/bib-key.awk
index 4223155..3f4117f 100644
--- a/lib/bib-key.awk
+++ b/lib/bib-key.awk
@@ -38,7 +38,7 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) {
surname = (n > 0) ? parts[n] : ""
}
gsub(/[^A-Za-z0-9]/, "", surname)
- surname = tolower(surname)
+ surname = tolower(substr(surname, 1, 30))
if (surname == "")
surname = "anon"
@@ -62,5 +62,5 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) {
break
}
- return surname y word
+ return surname y substr(word, 1, 30)
}
diff --git a/lib/bib-parse.awk b/lib/bib-parse.awk
index e5bf9fa..e83cb07 100644
--- a/lib/bib-parse.awk
+++ b/lib/bib-parse.awk
@@ -4,7 +4,8 @@
# bib_entry(type, key) - called once per regular entry. The fields are
# available in BIB_N, BIB_NAME[], BIB_VAL[] and
# BIB_KIND[]; the raw source text of the entry
-# is in BIB_RAW.
+# is in BIB_RAW, and its position in the input
+# buffer bib_buf is BIB_START..BIB_END-1.
# bib_pass(raw) - called for @string and @preamble blocks with
# their raw source text.
#
@@ -211,6 +212,8 @@ function bib_entry_at(s, i, at, type, opener, closer, key, name, c) {
BIB_KIND[BIB_N] = BIB_VKIND
}
BIB_RAW = bib_trim(substr(s, at, i - at))
+ BIB_START = at
+ BIB_END = i
bib_entry(type, key)
return i
}
diff --git a/lib/bib-select.awk b/lib/bib-select.awk
index 9aa5a37..3ebd16f 100644
--- a/lib/bib-select.awk
+++ b/lib/bib-select.awk
@@ -1,21 +1,31 @@
# bib-select.awk - emit entries selected by key, canonically
#
# Requires bib-parse.awk and bib-canon.awk. Variables (set with -v):
-# keys - comma-separated list of entry keys; a key of "*" selects
-# every entry (as produced by \nocite{*})
-# invert - 0: emit entries whose key is in the list
-# 1: emit entries whose key is NOT in the list
+# keys - comma-separated list of entry keys; a key of "*" selects
+# every entry (as produced by \nocite{*})
+# keyfile - file with one key per line, for key lists too large to
+# pass on the command line; merged with keys
+# invert - 0: emit entries whose key is in the list
+# 1: emit entries whose key is NOT in the list
#
-# With keys="" and invert=1 this acts as a canonicalizing filter for
+# With no keys and invert=1 this acts as a canonicalizing filter for
# everything. @string and @preamble blocks always pass through.
+function bib_sel_add(k) {
+ if (k == "*")
+ BIB_SEL_ALL = 1
+ else
+ BIB_SEL[k] = 1
+}
+
BEGIN {
bib_sel_n = split(keys, bib_sel_k, ",")
- for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) {
- if (bib_sel_k[bib_sel_i] == "*")
- BIB_SEL_ALL = 1
- else
- BIB_SEL[bib_sel_k[bib_sel_i]] = 1
+ for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++)
+ bib_sel_add(bib_sel_k[bib_sel_i])
+ if (keyfile != "") {
+ while ((getline bib_sel_line < keyfile) > 0)
+ bib_sel_add(bib_sel_line)
+ close(keyfile)
}
}
diff --git a/lib/bib-strip.awk b/lib/bib-strip.awk
new file mode 100644
index 0000000..cecca3e
--- /dev/null
+++ b/lib/bib-strip.awk
@@ -0,0 +1,49 @@
+# bib-strip.awk - remove entries by key, preserving all other bytes
+#
+# Requires bib-parse.awk. Variables (set with -v):
+# keys - comma-separated list of entry keys to remove
+# keyfile - file with one key per line, for key lists too large to
+# pass on the command line; merged with keys
+#
+# Unlike bib-select.awk, which re-emits entries canonically, this
+# splices the matched entries' source spans out of the input and
+# leaves everything else - comments, formatting, @string blocks -
+# byte-for-byte intact. Used by bib-add -f so that replacing one
+# entry never rewrites the rest of the database.
+#
+# This END block runs after bib-parse.awk's (END blocks execute in
+# the order their files are given to awk), so the spans recorded by
+# the hooks below are complete by the time output happens.
+
+BEGIN {
+ bib_strip_n = split(keys, bib_strip_k, ",")
+ for (bib_strip_i = 1; bib_strip_i <= bib_strip_n; bib_strip_i++)
+ BIB_DROP[bib_strip_k[bib_strip_i]] = 1
+ if (keyfile != "") {
+ while ((getline bib_strip_line < keyfile) > 0)
+ BIB_DROP[bib_strip_line] = 1
+ close(keyfile)
+ }
+}
+
+function bib_pass(raw) { }
+
+function bib_entry(type, key) {
+ if (key in BIB_DROP) {
+ BIB_NSPAN++
+ BIB_SPAN_S[BIB_NSPAN] = BIB_START
+ BIB_SPAN_E[BIB_NSPAN] = BIB_END
+ }
+}
+
+END {
+ i = 1
+ for (j = 1; j <= BIB_NSPAN; j++) {
+ printf "%s", substr(bib_buf, i, BIB_SPAN_S[j] - i)
+ i = BIB_SPAN_E[j]
+ # swallow the whitespace that followed the removed entry
+ while (i <= length(bib_buf) && substr(bib_buf, i, 1) ~ /[ \t\r\n]/)
+ i++
+ }
+ printf "%s", substr(bib_buf, i)
+}