diff options
| -rwxr-xr-x | bib-add | 2 | ||||
| -rwxr-xr-x | bib-convert | 15 | ||||
| -rwxr-xr-x | bib-extract | 9 | ||||
| -rw-r--r-- | lib/bib-canon.awk | 13 | ||||
| -rw-r--r-- | lib/bib-check.awk | 2 | ||||
| -rw-r--r-- | lib/bib-key.awk | 11 | ||||
| -rw-r--r-- | lib/bib-ls.awk | 7 | ||||
| -rw-r--r-- | lib/bib-select.awk | 19 | ||||
| -rw-r--r-- | lib/bib2ref.awk | 19 | ||||
| -rw-r--r-- | lib/ref2bib.awk | 20 | ||||
| -rwxr-xr-x | tests/run-tests.sh | 6 |
11 files changed, 68 insertions, 55 deletions
@@ -44,7 +44,7 @@ awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$tmp" > "$tmpkeys" if [ -f "$db" ]; then dups=$(awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-lskeys.awk" "$db" \ - | grep -Fxf "$tmpkeys") || dups= + | grep -Fxf "$tmpkeys") if [ -n "$dups" ]; then if [ "$force" -eq 1 ]; then # rewrite the database without the entries being replaced diff --git a/bib-convert b/bib-convert index ef4c0b0..fc215b7 100755 --- a/bib-convert +++ b/bib-convert @@ -40,12 +40,15 @@ trap 'rm -f "$tmp"' EXIT INT TERM cat "$@" > "$tmp" if [ "$mode" = auto ]; then - first=$(awk 'NF { sub(/^[ \t]+/, ""); print substr($0, 1, 1); exit }' "$tmp") - case $first in - @) mode=toref ;; - %) mode=tobib ;; - *) printf 'bib-convert: cannot detect input format\n' >&2; exit 1 ;; - esac + # first line that is unambiguous wins: an @entry means bibtex, a + # %X field means refer (a bare "% comment" decides neither) + mode=$(awk ' + /^[ \t]*@/ { print "toref"; exit } + /^%[A-Z] / { print "tobib"; exit }' "$tmp") + if [ -z "$mode" ]; then + printf 'bib-convert: cannot detect input format\n' >&2 + exit 1 + fi fi if [ "$mode" = toref ]; then diff --git a/bib-extract b/bib-extract index 52aa85b..297588a 100755 --- a/bib-extract +++ b/bib-extract @@ -50,11 +50,6 @@ keys=$(awk ' [ -n "$keys" ] || exit 0 -# \nocite{*} cites everything: emit the whole database -case ",$keys," in - *,\*,*) keys= invert=1 ;; - *) invert=0 ;; -esac - +# a key of "*" (from \nocite{*}) selects the whole database exec awk -f "$LIB/bib-parse.awk" -f "$LIB/bib-canon.awk" \ - -f "$LIB/bib-select.awk" -v keys="$keys" -v invert="$invert" "$@" + -f "$LIB/bib-select.awk" -v keys="$keys" -v invert=0 "$@" diff --git a/lib/bib-canon.awk b/lib/bib-canon.awk index d11e9cb..215e4cc 100644 --- a/lib/bib-canon.awk +++ b/lib/bib-canon.awk @@ -26,3 +26,16 @@ function bib_get(name, j) { return BIB_VAL[j] return "" } + +# render a field value as plain text: strip braces, collapse whitespace +function bib_clean(v) { + gsub(/[{}]/, "", v) + gsub(/[ \t\r\n]+/, " ", v) + return bib_trim(v) +} + +# print a blank line between output records (nothing before the first) +function bib_sep() { + if (BIB_OUT_N++) + print "" +} diff --git a/lib/bib-check.awk b/lib/bib-check.awk index 4411a55..2adbd60 100644 --- a/lib/bib-check.awk +++ b/lib/bib-check.awk @@ -31,7 +31,7 @@ function problem(key, msg) { BIB_BAD = 1 } -function bib_entry(type, key, n, req, i, alts, na, j, found, t, k) { +function bib_entry(type, key, n, req, i, alts, na, j, found, t) { if (key in BIB_KEYS_SEEN) problem(key, "duplicate key") BIB_KEYS_SEEN[key] = 1 diff --git a/lib/bib-key.awk b/lib/bib-key.awk index 41534ba..4223155 100644 --- a/lib/bib-key.awk +++ b/lib/bib-key.awk @@ -4,14 +4,12 @@ # <surname><year><word>, e.g. knuth1984literate. function bib_pass(raw) { - if (bib_out_n++) - print "" + bib_sep() print raw } function bib_entry(type, key, k, n) { - if (bib_out_n++) - print "" + bib_sep() k = bib_mkkey() # disambiguate collisions with b, c, ... suffixes if (k in BIB_KEYS_SEEN) { @@ -30,10 +28,9 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { t = bib_get("title") # surname of the first author + a = bib_clean(a) if (match(a, / [Aa][Nn][Dd] /)) a = substr(a, 1, RSTART - 1) - gsub(/[{}]/, "", a) - a = bib_trim(a) if (index(a, ",") > 0) surname = substr(a, 1, index(a, ",") - 1) else { @@ -52,7 +49,7 @@ function bib_mkkey( a, y, t, surname, word, n, parts, i, w) { y = "" # first significant word of the title - gsub(/[{}]/, "", t) + t = bib_clean(t) word = "" n = split(tolower(t), parts, /[^a-z0-9]+/) for (i = 1; i <= n; i++) { diff --git a/lib/bib-ls.awk b/lib/bib-ls.awk index 909b654..c1e9c98 100644 --- a/lib/bib-ls.awk +++ b/lib/bib-ls.awk @@ -14,12 +14,9 @@ function bib_entry(type, key, a, t) { a = bib_get("author") if (a == "") a = bib_get("editor") - gsub(/[{}]/, "", a) - gsub(/[ \t\r\n]+/, " ", a) + a = bib_clean(a) if (match(a, / [Aa][Nn][Dd] /)) a = substr(a, 1, RSTART - 1) " et al." - t = bib_get("title") - gsub(/[{}]/, "", t) - gsub(/[ \t\r\n]+/, " ", t) + t = bib_clean(bib_get("title")) printf "%s\t%s\t%s\t%s\t%s\n", key, type, a, bib_get("year"), t } diff --git a/lib/bib-select.awk b/lib/bib-select.awk index 1900390..9aa5a37 100644 --- a/lib/bib-select.awk +++ b/lib/bib-select.awk @@ -1,7 +1,8 @@ # bib-select.awk - emit entries selected by key, canonically # # Requires bib-parse.awk and bib-canon.awk. Variables (set with -v): -# keys - comma-separated list of entry keys +# keys - comma-separated list of entry keys; a key of "*" selects +# every entry (as produced by \nocite{*}) # invert - 0: emit entries whose key is in the list # 1: emit entries whose key is NOT in the list # @@ -10,20 +11,22 @@ BEGIN { bib_sel_n = split(keys, bib_sel_k, ",") - for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) - BIB_SEL[bib_sel_k[bib_sel_i]] = 1 + for (bib_sel_i = 1; bib_sel_i <= bib_sel_n; bib_sel_i++) { + if (bib_sel_k[bib_sel_i] == "*") + BIB_SEL_ALL = 1 + else + BIB_SEL[bib_sel_k[bib_sel_i]] = 1 + } } function bib_pass(raw) { - if (bib_out_n++) - print "" + bib_sep() print raw } function bib_entry(type, key) { - if ((key in BIB_SEL) != invert + 0) { - if (bib_out_n++) - print "" + if (BIB_SEL_ALL || (key in BIB_SEL) != invert + 0) { + bib_sep() bib_emit(type, key) } } diff --git a/lib/bib2ref.awk b/lib/bib2ref.awk index 4d9e595..137c6ad 100644 --- a/lib/bib2ref.awk +++ b/lib/bib2ref.awk @@ -5,25 +5,20 @@ function bib_pass(raw) { } function r_field(tag, v) { - if (v != "") { - gsub(/[{}]/, "", v) - gsub(/[ \t\r\n]+/, " ", v) - printf "%%%s %s\n", tag, bib_trim(v) - } + v = bib_clean(v) + if (v != "") + printf "%%%s %s\n", tag, v } function r_names(tag, v, n, parts, i) { - gsub(/[{}]/, "", v) - gsub(/[ \t\r\n]+/, " ", v) - n = split(v, parts, / +[Aa][Nn][Dd] +/) + n = split(bib_clean(v), parts, / +[Aa][Nn][Dd] +/) for (i = 1; i <= n; i++) - if (bib_trim(parts[i]) != "") - printf "%%%s %s\n", tag, bib_trim(parts[i]) + if (parts[i] != "") + printf "%%%s %s\n", tag, parts[i] } function bib_entry(type, key, d, p, m) { - if (bib_out_n++) - print "" + bib_sep() r_names("A", bib_get("author")) r_names("E", bib_get("editor")) r_field("T", bib_get("title")) diff --git a/lib/ref2bib.awk b/lib/ref2bib.awk index 422fdd7..9384677 100644 --- a/lib/ref2bib.awk +++ b/lib/ref2bib.awk @@ -19,6 +19,14 @@ function r_emit(name, v) { printf " %s = {%s},\n", name, v } +# join arr[1..n] with " and ", as bibtex name lists expect +function r_join(arr, n, s, i) { + s = arr[1] + for (i = 2; i <= n; i++) + s = s " and " arr[i] + return s +} + { split("", val) na = 0 @@ -65,14 +73,10 @@ function r_emit(name, v) { print "" printf "@%s{FIXME,\n", type - authors = "" - for (i = 1; i <= na; i++) - authors = (i == 1) ? A[i] : authors " and " A[i] - r_emit("author", authors) - editors = "" - for (i = 1; i <= ne; i++) - editors = (i == 1) ? E[i] : editors " and " E[i] - r_emit("editor", editors) + if (na > 0) + r_emit("author", r_join(A, na)) + if (ne > 0) + r_emit("editor", r_join(E, ne)) r_emit("title", val["T"]) r_emit("journal", val["J"]) diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 70721db..8a9f49a 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -118,6 +118,12 @@ check "bib-convert refer->bibtex type guess" \ check "bib-convert refer->bibtex pages" \ sh -c "printf '%s' '$out' | grep -q ' pages = {433--460},'" +# format detection is not fooled by a leading % comment (e.g. JabRef's +# "% Encoding: UTF-8") in a bibtex file +out=$(printf '%% Encoding: UTF-8\n%s\n' "$entry" | bib-convert) +check "bib-convert detects bibtex behind %% comment" \ + sh -c "printf '%s' '$out' | grep -q '^%A Donald E. Knuth$'" + # ---- bib-gen ----------------------------------------------------------- out=$(bib-gen -t book author='Xavier Yu' title='Some Title' year=2001 publisher='Pub') check "bib-gen argument mode" \ |