#!/bin/sh
# satsignal-anchor — PyPI PEP 740 provenance adapter for satsignal.provenance.v1
# ---------------------------------------------------------------------------
# The last sibling in the CI/registry cluster (GitLab CI, Bitbucket,
# Docker BuildKit, npm) for the satsignal.provenance.v1 ingest schema
# (spec: https://proof.satsignal.cloud/spec-provenance). It is a thin
# translator: it fills the canonical provenance manifest for a
# published PyPI release file and POSTs it to Satsignal, which commits
# its SHA-256 on-chain and returns a portable .mbnt bundle. POSIX-sh
# only (sha256sum + curl + jq) — same manifest shape as every other
# adapter and the GitHub Action, no SDK, no Satsignal account at
# verify time.
#
# PEP 740 attestations are a registry-publish concern, not a
# CI-platform pipeline — there is no `include: remote:`, the
# attestation lives in the index next to the file. So this is a
# standalone script (like docker-buildx / npm), run after upload. The
# subject is a published distribution (sdist by default — the
# canonical source artifact; what pip verifies); its PEP 740
# provenance object is anchored as a reference — Satsignal timestamps
# the attestation, it does not re-issue it (the "plug into
# Sigstore/SLSA, don't compete" posture).
#
# USAGE — after `twine upload` / a Trusted Publishing release:
#
#     SATSIGNAL_PYPI_SPEC='my-pkg==1.4.2' \
#       curl -fsSL https://satsignal.cloud/pypi-pep740.satsignal.sh | sh
#     # anchor a specific wheel instead of the sdist:
#     SATSIGNAL_PYPI_SPEC='my-pkg==1.4.2' \
#       SATSIGNAL_PYPI_FILENAME='my_pkg-1.4.2-py3-none-any.whl' ... | sh
#     # anchor a local dist without the index (subject = that file):
#     SATSIGNAL_PYPI_SPEC='my-pkg==1.4.2' \
#       ./pypi-pep740.satsignal.sh dist/my_pkg-1.4.2.tar.gz
#
# Required environment:
#     SATSIGNAL_API_KEY    sk_... key with the anchors:create scope
#     SATSIGNAL_FOLDER     folder slug to file the proof under (e.g. "releases")
#     SATSIGNAL_MATTER     legacy alias of SATSIGNAL_FOLDER (still accepted)
#     SATSIGNAL_PYPI_SPEC  name==version  (also accepts name@version)
#
# Optional environment:
#     SATSIGNAL_PYPI_FILENAME  pin one release file (default: the sdist)
#     SATSIGNAL_PYPI_INDEX     default https://pypi.org
#     SATSIGNAL_SUBJECT_TYPE   subject.type override (default "package")
#     SATSIGNAL_API_BASE       default https://app.satsignal.cloud
#     SATSIGNAL_NO_REGISTRY    set non-empty to skip the index entirely
#                              (requires a local dist positional arg;
#                              anchors that file alone, no attestation)
#
# Subject digest: the index publishes `urls[].digests.sha256` (hex)
# directly — that IS what pip verifies, so no download is needed. A
# local positional arg overrides it (anchor exactly the file you
# built). subject.digest is sha256; the index's blake2b_256 rides in
# claims for cross-checking.
#
# Attestation reference (best-effort, never fatal): the file's
# `urls[].provenance` URL from the JSON API, or the deterministic PEP
# 740 Integrity endpoint
# `<index>/integrity/<project>/<version>/<filename>/provenance`. Its
# sha256 is anchored as one `pypi`-type reference. If the file was
# uploaded without attestations the endpoint 404s and the script
# warns and anchors the file digest alone (still chain-bound). It is
# honest about what it anchored.
#
# Outputs (written to the working directory):
#     satsignal-proof.json     the API response (txid, manifest_hash,
#                              also written as satsignal-receipt.json (compat copy)
#                              embeddable chain-anchor-v1 envelope)
#     satsignal-manifest.json  the canonical manifest that was anchored
#     satsignal-<digest>.mbnt  the proof bundle — verify offline per
#                              /spec-provenance §5, no Satsignal call
# ---------------------------------------------------------------------------
set -eu

for bin in jq curl sha256sum; do
  command -v "$bin" >/dev/null 2>&1 || {
    echo "satsignal: required command not found: $bin" >&2; exit 1; }
done

: "${SATSIGNAL_API_KEY:?set SATSIGNAL_API_KEY (key with anchors:create)}"
: "${SATSIGNAL_FOLDER:=${SATSIGNAL_MATTER:?set SATSIGNAL_FOLDER (folder slug; SATSIGNAL_MATTER also accepted)}}"
: "${SATSIGNAL_PYPI_SPEC:?set SATSIGNAL_PYPI_SPEC (e.g. my-pkg==1.4.2)}"
SATSIGNAL_API_BASE="${SATSIGNAL_API_BASE:-https://app.satsignal.cloud}"
SATSIGNAL_SUBJECT_TYPE="${SATSIGNAL_SUBJECT_TYPE:-package}"
INDEX="${SATSIGNAL_PYPI_INDEX:-https://pypi.org}"
INDEX="${INDEX%/}"
WANT_FN="${SATSIGNAL_PYPI_FILENAME:-}"
LOCAL_DIST="${1:-}"

# Resolve name + version. PyPI's canonical separator is '==' (pip);
# also accept name@version for symmetry with the npm adapter.
case "$SATSIGNAL_PYPI_SPEC" in
  *==*) NAME="${SATSIGNAL_PYPI_SPEC%%==*}"; VERSION="${SATSIGNAL_PYPI_SPEC##*==}" ;;
  *@*)  VERSION="${SATSIGNAL_PYPI_SPEC##*@}"; NAME="${SATSIGNAL_PYPI_SPEC%@*}" ;;
  *) echo "satsignal: SATSIGNAL_PYPI_SPEC must be name==version" >&2; exit 1 ;;
esac
[ -n "$NAME" ] && [ -n "$VERSION" ] || {
  echo "satsignal: could not parse name/version from SATSIGNAL_PYPI_SPEC" >&2
  exit 1; }

# Index metadata (skipped only with SATSIGNAL_NO_REGISTRY + a local
# dist). PyPI redirects normalized names, so pass NAME as given.
FN=""
FURL=""
FSHA=""
FB2=""
FPROV=""
if [ -z "${SATSIGNAL_NO_REGISTRY:-}" ]; then
  if JSON=$(curl -sS -f "$INDEX/pypi/$NAME/$VERSION/json" 2>/dev/null); then
    # Pick the target file: an explicit filename, else the sdist (the
    # canonical source artifact), else the first uploaded file.
    SEL=$(printf '%s' "$JSON" | jq -c --arg fn "$WANT_FN" '
      .urls as $u
      | ( if $fn != "" then first($u[] | select(.filename == $fn))
          else ( first($u[] | select(.packagetype == "sdist")) // $u[0] )
          end ) // empty')
    if [ -z "$SEL" ]; then
      echo "satsignal: ${WANT_FN:+file '$WANT_FN' }not found in" >&2
      echo "  $NAME==$VERSION on $INDEX" >&2
      exit 1
    fi
    FN=$(printf   '%s' "$SEL" | jq -r '.filename // ""')
    FURL=$(printf '%s' "$SEL" | jq -r '.url // ""')
    FSHA=$(printf '%s' "$SEL" | jq -r '.digests.sha256 // ""')
    FB2=$(printf  '%s' "$SEL" | jq -r '.digests.blake2b_256 // ""')
    FPROV=$(printf '%s' "$SEL" | jq -r '.provenance // ""')
  else
    echo "satsignal: note — index metadata for $NAME==$VERSION" >&2
    echo "  unreachable; relying on the local dist if provided." >&2
  fi
fi

# Subject: a local dist arg wins (anchor exactly that file), else the
# index-published sha256 (what pip verifies — no download needed).
if [ -n "$LOCAL_DIST" ] && [ -f "$LOCAL_DIST" ]; then
  DIGEST="sha256:$(sha256sum "$LOCAL_DIST" | cut -d' ' -f1)"
  [ -n "$FN" ] || FN=$(basename "$LOCAL_DIST")
elif [ -n "$FSHA" ]; then
  DIGEST="sha256:$FSHA"
else
  echo "satsignal: no subject digest — upload first (so the index" >&2
  echo "  serves digests.sha256) or pass a local dist as the argument." >&2
  exit 1
fi

# Attestation set (best-effort, non-fatal). Prefer the JSON API's
# provenance URL; else the deterministic PEP 740 Integrity endpoint.
ATTS_JSON='[]'
if [ -z "${SATSIGNAL_NO_REGISTRY:-}" ] && [ -n "$FN" ]; then
  PROV_URL="$FPROV"
  [ -n "$PROV_URL" ] || PROV_URL="$INDEX/integrity/$NAME/$VERSION/$FN/provenance"
  if PROV=$(curl -sS -f "$PROV_URL" 2>/dev/null) && [ -n "$PROV" ]; then
    PROV_SHA=$(printf '%s' "$PROV" | sha256sum | cut -d' ' -f1)
    ATTS_JSON=$(jq -n --arg d "sha256:$PROV_SHA" '[{type:"pypi",digest:$d}]')
  fi
fi
if [ "$(printf '%s' "$ATTS_JSON" | jq 'length')" = "0" ]; then
  echo "satsignal: note — no PEP 740 provenance for ${FN:-$NAME==$VERSION};" >&2
  echo "  anchoring the file digest alone (still chain-bound). Publish" >&2
  echo "  with attestations (Trusted Publishing / twine --attestations)." >&2
fi

# Best-effort git context (this often runs from the project tree).
GIT_COMMIT=""
GIT_REF=""
if command -v git >/dev/null 2>&1 \
   && git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
  GIT_COMMIT=$(git rev-parse HEAD 2>/dev/null || echo "")
  GIT_REF=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
fi

# Build the canonical manifest with jq (no shell-quoting hazards).
# Structurally identical to gitlab-ci.satsignal.yml / the Docker + npm
# adapters — only the value sources differ (the PyPI index instead of
# CI vars). Empty identity/claims values are dropped so the manifest
# stays minimal and re-canonicalizes identically across reruns. The
# index blake2b_256 rides in claims so a downloader can cross-check;
# subject.digest is the schema-required sha256 of the same file.
jq -n \
  --arg pkg     "$NAME" \
  --arg ver     "$VERSION" \
  --arg stype   "$SATSIGNAL_SUBJECT_TYPE" \
  --arg digest  "$DIGEST" \
  --arg index   "$INDEX" \
  --arg file    "$FN" \
  --arg commit  "$GIT_COMMIT" \
  --arg ref     "$GIT_REF" \
  --arg url     "$FURL" \
  --arg b2      "$FB2" \
  --argjson atts "$ATTS_JSON" \
  '
  def norm($d): if ($d|startswith("sha256:")) then $d else "sha256:"+$d end;
  {
    schema: "satsignal.provenance.v1",
    source:  { type: "pypi", id: $pkg },
    subject: { type: $stype, digest: norm($digest) },
    identity: ( {
        provider: "pypi", package: $pkg, version: $ver,
        index: $index, file: $file, commit: $commit, ref: $ref
      } | with_entries(select(.value != "")) ),
    claims: ( {
        url: $url, blake2b_256: $b2
      } | with_entries(select(.value != "")) ),
    privacy: { onchain_mode: "hash_only" }
  }
  + ( if ($atts|length) == 0 then {}
      else { attestations: [ $atts[] | { type: .type, digest: norm(.digest) } ] }
      end )
  ' > satsignal-manifest.json

jq -n --slurpfile m satsignal-manifest.json \
  --arg matter "$SATSIGNAL_FOLDER" \
  --arg label  "pypi ${NAME}==${VERSION} ${FN}" \
  '{ folder_slug: $matter, label: $label, manifest: $m[0] }' > satsignal-body.json

echo "POST ${SATSIGNAL_API_BASE}/api/v1/provenance/anchor (folder=${SATSIGNAL_FOLDER})"
code=$(curl -sS -o satsignal-proof.json -w '%{http_code}' \
  -X POST \
  -H "Authorization: Bearer ${SATSIGNAL_API_KEY}" \
  -H 'Content-Type: application/json' \
  --data @satsignal-body.json \
  "${SATSIGNAL_API_BASE}/api/v1/provenance/anchor")
if [ "$code" != "200" ]; then
  echo "anchor failed (HTTP $code):" >&2; cat satsignal-proof.json >&2; exit 1
fi

TXID=$(jq -r '.txid' satsignal-proof.json)
MH=$(jq -r '.manifest_hash' satsignal-proof.json)
BURL=$(jq -r '.bundle_url // empty' satsignal-proof.json); cp -f satsignal-proof.json satsignal-receipt.json  # legacy filename kept for downstream globs
echo "anchored: txid=$TXID manifest_hash=$MH"

# Pull the proof bundle so the receipt is self-contained (verify
# offline per /spec-provenance §5 — no Satsignal call at verify time).
if [ -n "$BURL" ]; then
  SHORT=$(printf '%s' "$DIGEST" | sed 's/^sha256://' | cut -c1-12)
  curl -sS -H "Authorization: Bearer ${SATSIGNAL_API_KEY}" \
    -o "satsignal-${SHORT:-proof}.mbnt" "$BURL" \
    && echo "bundle saved: satsignal-${SHORT:-proof}.mbnt"
fi
