From eabf1f6d74dac497ce31e3e2f441cfa25e9f74f2 Mon Sep 17 00:00:00 2001
From: "Douglas B. Rumbaugh" <doug@douglasrumbaugh.com>
Date: Sat, 6 Jun 2026 12:02:41 -0400
Subject: Initial implementation (only a few years later!)

This is pure Claude. I'd written out the plan for
this suite of scripts eons ago, but never found the
time to actual do it. Remembered it this morning,
pointed Claude at the README, and had something
that appears to work in minutes.

caveat emptor: the design is mine, but the code is
purely LLM generated at this point.
---
 bib-fetch | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100755 bib-fetch

(limited to 'bib-fetch')

diff --git a/bib-fetch b/bib-fetch
new file mode 100755
index 0000000..62f7993
--- /dev/null
+++ b/bib-fetch
@@ -0,0 +1,82 @@
+#!/bin/sh
+# bib-fetch - fetch a bibtex entry for a pdf from its DOI or arXiv id
+#
+# usage: bib-fetch [-d doi | -a arxiv-id] [file.pdf]
+#
+# Unless given with -d or -a, an identifier is extracted from the first
+# pages of the pdf (requires pdftotext): a DOI if one is found, falling
+# back to an arXiv id. DOIs are resolved through doi.org content
+# negotiation (crossref et al.), arXiv ids through arxiv.org. The entry
+# is emitted canonically on stdout with a generated key.
+
+usage() {
+  printf 'usage: bib-fetch [-d doi | -a arxiv-id] [file.pdf]\n' >&2
+  exit 2
+}
+
+doi=
+arxiv=
+while getopts d:a: opt; do
+  case $opt in
+    d) doi=$OPTARG ;;
+    a) arxiv=$OPTARG ;;
+    *) usage ;;
+  esac
+done
+shift $((OPTIND - 1))
+[ -n "$doi" ] && [ -n "$arxiv" ] && usage
+
+bibkey=$(dirname "$0")/bib-key
+[ -x "$bibkey" ] || bibkey=bib-key
+
+command -v curl > /dev/null 2>&1 || {
+  printf 'bib-fetch: curl is required\n' >&2
+  exit 1
+}
+
+if [ -z "$doi" ] && [ -z "$arxiv" ]; then
+  [ $# -eq 1 ] || usage
+  pdf=$1
+  [ -r "$pdf" ] || { printf 'bib-fetch: cannot read %s\n' "$pdf" >&2; exit 1; }
+  command -v pdftotext > /dev/null 2>&1 || {
+    printf 'bib-fetch: pdftotext is required to extract an identifier\n' >&2
+    exit 1
+  }
+  ids=$(pdftotext -l 2 "$pdf" - 2> /dev/null | awk '
+    # first DOI on a "doi" line, first arXiv stamp; prefer the DOI
+    doi == "" {
+      if (match($0, /10\.[0-9][0-9][0-9][0-9][0-9]*\/[^ \t"<>]+/)) {
+        doi = substr($0, RSTART, RLENGTH)
+        sub(/[.,;)\]]+$/, "", doi)
+      }
+    }
+    arxiv == "" {
+      # modern ids (arXiv:2104.01234v2) and old style (arXiv:cs/0101001)
+      if (match($0, /[aA][rR][xX][iI][vV]:[0-9][0-9][0-9][0-9]\.[0-9][0-9][0-9][0-9][0-9]?(v[0-9]+)?/) ||
+          match($0, /[aA][rR][xX][iI][vV]:[a-z-]+(\.[A-Z][A-Z])?\/[0-9][0-9][0-9][0-9][0-9][0-9][0-9](v[0-9]+)?/))
+        arxiv = substr($0, RSTART + 6, RLENGTH - 6)
+    }
+    END { printf "%s\t%s\n", doi, arxiv }')
+  doi=${ids%%	*}
+  arxiv=${ids#*	}
+  if [ -z "$doi" ] && [ -z "$arxiv" ]; then
+    printf 'bib-fetch: no DOI or arXiv id found in %s\n' "$pdf" >&2
+    exit 1
+  fi
+fi
+
+if [ -n "$doi" ]; then
+  entry=$(curl -sSfL -H 'Accept: application/x-bibtex' \
+      "https://doi.org/$doi") || {
+    printf 'bib-fetch: failed to fetch entry for doi %s\n' "$doi" >&2
+    exit 1
+  }
+else
+  arxiv=${arxiv#arXiv:}
+  entry=$(curl -sSfL "https://arxiv.org/bibtex/$arxiv") || {
+    printf 'bib-fetch: failed to fetch entry for arXiv id %s\n' "$arxiv" >&2
+    exit 1
+  }
+fi
+
+printf '%s\n' "$entry" | "$bibkey"
-- 
cgit v1.2.3