#!/usr/bin/env python3
"""merkle_row.py — selective-disclosure for tabular data on Satsignal.

You have a table (CSV rows, JSON records, eval results, log entries),
and you want to anchor the whole table on chain so that later you can
reveal one row to an auditor — and the auditor can verify that row
was part of the originally-anchored table — without revealing any of
the other rows.

Two schemes, same Merkle tree shape; pick by leaf-privacy needs:

- ``merkle-row-v1`` (standard) — leaf = sha256(JCS(row)). Anchor via
  manifest mode (POST /api/v1/anchors with items[]). Right when rows
  are high-entropy (UUIDs, hashes, free-text bodies). The on-chain
  receipt is a manifest receipt; the existing /verify page accepts
  the row + Merkle path as a "manifest leaf reveal" and verifies
  end-to-end.

- ``merkle-row-sealed-v1`` (sealed-style) — leaf = HMAC-SHA256(salt_i,
  JCS(row)), where salt_i = HKDF(master_salt, ..., info="row/" + i).
  Right when rows are low-entropy ("yes"/"no", small enums, single
  bids) where a plain sha256 leaf would be brute-forceable. The
  master salt stays in the holder's local record; per-leaf salts are
  one-way derivable from it (revealing salt_i does NOT permit
  recovering master_salt or any salt_j for j != i — see
  SPEC_v2_sealed §5.3 for the formal argument). Anchor the root via
  category=commitment.

Stdlib only. No Satsignal repo dependency. Copy freely. Cross-
runtime byte-identical canonical bytes (matches notary.canonicalize).

Usage — standard scheme:

    # 1. Build the manifest-mode anchor body + per-row reveal records.
    python3 merkle_row.py build-standard \\
        --rows-jsonl rows.jsonl \\
        --matter-slug agent-runs \\
        --label "eval batch 2026-05-08" \\
        --out-anchor anchor.json \\
        --out-reveals reveals/

    # 2. POST anchor.json to /api/v1/anchors (manifest mode does the
    #    Merkle root server-side; the receipt's bundle ships the
    #    leaves).

    # 3. Send reveals/row-7.json to your auditor. They drop your
    #    .mbnt + the row JSON into /verify and it checks out.

Usage — sealed scheme:

    # 1. Build the commit doc + sealed master record.
    python3 merkle_row.py build-sealed \\
        --rows-jsonl rows.jsonl \\
        --out-master master_record.json \\
        --out-commit commit_doc.json
    # → prints {sha256_hex, file_size} for /api/v1/anchors

    # 2. Anchor the root via category=commitment.
    SHA=$(jq -r .sha256_hex commit_doc.json)
    SIZE=$(jq -r .file_size commit_doc.json)
    curl -H "Authorization: Bearer sk_..." \\
         -H "Content-Type: application/json" \\
         -d "{\\"matter_slug\\":\\"agent-runs\\", \\
              \\"sha256_hex\\":\\"$SHA\\", \\
              \\"file_size\\":$SIZE, \\
              \\"category\\":\\"commitment\\", \\
              \\"label\\":\\"merkle-row-sealed-v1 root\\"}" \\
         https://app.satsignal.cloud/api/v1/anchors

    # 3. Generate a single-row reveal.
    python3 merkle_row.py reveal-sealed \\
        --master master_record.json \\
        --leaf-index 7 \\
        --out reveal_row7.json

    # 4. Auditor verifies: (a) reveal recomputes to commit doc's root,
    #    (b) commit doc canonicalizes to the on-chain sha256, AND
    #    (c) the commit doc's sha is actually anchored on chain
    #    (default: lookup_hash check; pass --no-chain-confirm to skip).
    python3 merkle_row.py verify-sealed \\
        --reveal reveal_row7.json \\
        --commit-doc commit_doc.json
    # → {"verified": true, "details": {...},
    #    "chain_check": {"state": "confirmed", "txid": "..."}}
"""
from __future__ import annotations

import argparse
import base64
import hashlib
import hmac
import json
import os
import secrets
import struct
import sys
import time
import unicodedata
from typing import Any, List, Optional, Sequence, Tuple


# ---- JCS-style canonicalization (matches Satsignal notary.canonicalize) ----

def _nfc_deep(value: Any) -> Any:
    if value is None or isinstance(value, bool):
        return value
    if isinstance(value, int):
        return value
    if isinstance(value, float):
        # Strict JCS forbids floats, and we keep that here for byte-
        # identical cross-runtime hashing: Python's json.dumps and
        # JS String(x) disagree on integer-valued floats (1.0 vs 1).
        # Pre-quantize: minor units as ints, decimals as strings.
        raise ValueError(
            f"floats are not allowed in canonical form: {value!r}. "
            "Pre-quantize to int (e.g. cents) or pass the decimal as a string."
        )
    if isinstance(value, str):
        return unicodedata.normalize("NFC", value)
    if isinstance(value, list):
        return [_nfc_deep(v) for v in value]
    if isinstance(value, dict):
        return {unicodedata.normalize("NFC", k): _nfc_deep(v) for k, v in value.items()}
    raise TypeError(
        f"non-canonicalizable type {type(value).__name__}: {value!r}"
    )


def canonicalize(doc: Any) -> bytes:
    """JCS-style canonical bytes: NFC + sorted keys + minimal JSON.
    Matches notary.canonicalize() in the Satsignal repo and
    canonicalize() in commit-reveal.js."""
    return json.dumps(
        _nfc_deep(doc), sort_keys=True, separators=(",", ":"),
        ensure_ascii=False, allow_nan=False,
    ).encode("utf-8")


# ---- Merkle (standard binary, sha256, last-node-duplicated odd levels) ----
# Matches notary/manifest.py and verifier.html merkleRootFromHexLeaves.

def _sha256(data: bytes) -> bytes:
    return hashlib.sha256(data).digest()


def merkle_root(leaves: Sequence[bytes]) -> bytes:
    if not leaves:
        raise ValueError("merkle_root: leaves[] must be non-empty")
    level = list(leaves)
    while len(level) > 1:
        nxt = []
        for i in range(0, len(level), 2):
            left = level[i]
            right = level[i + 1] if i + 1 < len(level) else level[i]
            nxt.append(_sha256(left + right))
        level = nxt
    return level[0]


def inclusion_proof(
    leaves: Sequence[bytes], idx: int,
) -> List[Tuple[str, str]]:
    """Return [(sib_hex, side), ...] from leaf level upward."""
    if not (0 <= idx < len(leaves)):
        raise ValueError(
            f"inclusion_proof: idx out of range ({idx} of {len(leaves)})"
        )
    proof: List[Tuple[str, str]] = []
    level = list(leaves)
    pos = idx
    while len(level) > 1:
        if pos % 2 == 0:
            sib_pos = pos + 1 if pos + 1 < len(level) else pos
            side = "R"
        else:
            sib_pos = pos - 1
            side = "L"
        proof.append((level[sib_pos].hex(), side))
        nxt = []
        for i in range(0, len(level), 2):
            left = level[i]
            right = level[i + 1] if i + 1 < len(level) else level[i]
            nxt.append(_sha256(left + right))
        level = nxt
        pos //= 2
    return proof


def verify_inclusion(
    leaf: bytes, proof: Sequence[Tuple[str, str]], root: bytes,
) -> bool:
    carry = leaf
    for sib_hex, side in proof:
        sib = bytes.fromhex(sib_hex)
        if side == "L":
            carry = _sha256(sib + carry)
        elif side == "R":
            carry = _sha256(carry + sib)
        else:
            raise ValueError(f"unknown proof side {side!r}")
    return hmac.compare_digest(carry, root)


# ---- HKDF-SHA256 (RFC 5869) — stdlib only ----

def _hkdf_extract(salt: bytes, ikm: bytes) -> bytes:
    return hmac.new(salt, ikm, hashlib.sha256).digest()


def _hkdf_expand(prk: bytes, info: bytes, length: int) -> bytes:
    if length > 255 * 32:
        raise ValueError("hkdf-expand: length too large")
    out = b""
    t = b""
    counter = 1
    while len(out) < length:
        t = hmac.new(prk, t + info + bytes([counter]), hashlib.sha256).digest()
        out += t
        counter += 1
    return out[:length]


def hkdf_sha256(*, ikm: bytes, salt: bytes, info: bytes, length: int) -> bytes:
    return _hkdf_expand(_hkdf_extract(salt, ikm), info, length)


# ---- Scheme constants ----

STANDARD_VERSION = "satsignal-merkle-row-v1"
SEALED_VERSION = "satsignal-merkle-row-sealed-v1"
SEALED_HKDF_SALT = b"satsignal-merkle-row-sealed-v1/per-leaf"
SEALED_HKDF_INFO_PREFIX = b"row/"


def per_leaf_salt(master_salt: bytes, idx: int) -> bytes:
    if len(master_salt) != 32:
        raise ValueError("master_salt must be 32 bytes")
    return hkdf_sha256(
        ikm=master_salt,
        salt=SEALED_HKDF_SALT,
        info=SEALED_HKDF_INFO_PREFIX + struct.pack(">I", idx),
        length=32,
    )


# ---- Standard scheme: layered on manifest-items-v1 ----

def build_standard(
    rows: Sequence[Any],
    *,
    matter_slug: str,
    label: Optional[str] = None,
    category: str = "evidence_bundle",
    row_labels: Optional[Sequence[str]] = None,
) -> dict:
    """Build the manifest-mode anchor body + per-row reveal records.

    Returns:
        {
          "anchor_body": { ... POST body for /api/v1/anchors ... },
          "reveals": [
            { "leaf_index", "label", "row", "row_canonical_b64",
              "row_sha256_hex" },
            ...
          ],
          "scheme": "satsignal-merkle-row-v1",
          "leaf_count": <N>,
        }

    The anchor_body uses Phase 8b manifest mode (items[]) — the
    server computes the root and ships the leaves in proofs.json.
    Auditors verify a row reveal by dropping the bundle + the row's
    canonical bytes into /verify, which already handles manifest-
    leaf reveal end-to-end.
    """
    if not rows:
        raise ValueError("rows must be non-empty")
    if row_labels is not None and len(row_labels) != len(rows):
        raise ValueError(
            f"row_labels has wrong length ({len(row_labels)} vs "
            f"{len(rows)})"
        )

    items = []
    reveals = []
    for idx, row in enumerate(rows):
        row_canonical = canonicalize(row)
        row_sha = hashlib.sha256(row_canonical).hexdigest()
        lbl = (row_labels[idx] if row_labels is not None
               else f"row-{idx}")
        items.append({"label": lbl, "sha256_hex": row_sha})
        reveals.append({
            "version": STANDARD_VERSION,
            "leaf_index": idx,
            "label": lbl,
            "row": row,
            "row_canonical_b64": base64.b64encode(row_canonical).decode("ascii"),
            "row_sha256_hex": row_sha,
        })

    body: dict = {
        "matter_slug": matter_slug,
        "items": items,
        "category": "evidence_bundle" if category is None else category,
    }
    if label:
        body["label"] = label

    return {
        "anchor_body": body,
        "reveals": reveals,
        "scheme": STANDARD_VERSION,
        "leaf_count": len(rows),
    }


# ---- Sealed scheme: HMAC-per-leaf with HKDF-derived salts ----

def build_sealed(
    rows: Sequence[Any],
    *,
    master_salt: Optional[bytes] = None,
) -> dict:
    """Build the sealed master record + commit doc + anchor metadata.

    Returns:
        {
          "master_record": { ... KEEP PRIVATE until reveal ... },
          "commit_doc":    { scheme, root, leaf_count },
          "anchor": { "sha256_hex": <64-hex>, "file_size": <int> },
          "scheme": "satsignal-merkle-row-sealed-v1",
        }

    Anchor ``anchor.sha256_hex`` via /api/v1/anchors with
    ``category="commitment"``. The master_record stays on the
    holder's disk; reveal_sealed(master_record, leaf_index) emits
    the per-row reveal payload to share with an auditor.
    """
    if not rows:
        raise ValueError("rows must be non-empty")
    if master_salt is None:
        master_salt = secrets.token_bytes(32)
    if len(master_salt) != 32:
        raise ValueError("master_salt must be 32 bytes")

    leaves_meta = []
    leaf_commitments: List[bytes] = []
    for idx, row in enumerate(rows):
        row_canonical = canonicalize(row)
        salt_i = per_leaf_salt(master_salt, idx)
        commitment = hmac.new(salt_i, row_canonical, hashlib.sha256).digest()
        leaves_meta.append({
            "leaf_index": idx,
            "label": f"row-{idx}",
            "row": row,
            "row_canonical_b64": base64.b64encode(row_canonical).decode("ascii"),
            "salt_b64": base64.b64encode(salt_i).decode("ascii"),
            "commitment_hex": commitment.hex(),
        })
        leaf_commitments.append(commitment)

    root = merkle_root(leaf_commitments)
    commit_doc = {
        "scheme": SEALED_VERSION,
        "root": root.hex(),
        "leaf_count": len(rows),
    }
    canonical = canonicalize(commit_doc)
    anchor_sha = hashlib.sha256(canonical).hexdigest()

    master_record = {
        "version": SEALED_VERSION,
        "master_salt_b64": base64.b64encode(master_salt).decode("ascii"),
        "leaf_count": len(rows),
        "root_hex": root.hex(),
        "leaves": leaves_meta,
        "anchor": {"sha256_hex": anchor_sha, "file_size": len(canonical)},
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
    }

    return {
        "master_record": master_record,
        "commit_doc": commit_doc,
        "anchor": {"sha256_hex": anchor_sha, "file_size": len(canonical)},
        "scheme": SEALED_VERSION,
    }


def reveal_sealed(master_record: dict, leaf_index: int) -> dict:
    """Build the single-row reveal payload to share with an auditor."""
    if master_record.get("version") != SEALED_VERSION:
        raise ValueError(
            f"master_record version mismatch: {master_record.get('version')!r}"
        )
    leaves = master_record.get("leaves") or []
    if not (0 <= leaf_index < len(leaves)):
        raise ValueError(
            f"leaf_index out of range ({leaf_index} of {len(leaves)})"
        )
    commitments = [bytes.fromhex(L["commitment_hex"]) for L in leaves]
    proof = inclusion_proof(commitments, leaf_index)
    leaf_meta = leaves[leaf_index]
    return {
        "version": SEALED_VERSION,
        "leaf_index": leaf_index,
        "leaf_count": master_record["leaf_count"],
        "label": leaf_meta["label"],
        "row": leaf_meta["row"],
        "row_canonical_b64": leaf_meta["row_canonical_b64"],
        "salt_b64": leaf_meta["salt_b64"],
        "commitment_hex": leaf_meta["commitment_hex"],
        "proof": [{"sib": s, "side": d} for (s, d) in proof],
        "root_hex": master_record["root_hex"],
    }


def verify_sealed_reveal(
    reveal: dict, commit_doc: dict,
) -> Tuple[bool, dict]:
    """Verify a single-row reveal against a commit_doc.

    Checks (in order; each failure short-circuits):
      1. reveal.version matches the sealed scheme.
      2. commit_doc canonicalizes to the same root as reveal.
      3. HMAC(salt_i, row_canonical) == leaf commitment.
      4. Merkle path reconstructs to commit_doc.root.

    Returns (ok, details) where details enumerate each check.
    """
    details: dict = {}

    if reveal.get("version") != SEALED_VERSION:
        return False, {**details, "step": "version", "ok": False,
                       "got": reveal.get("version"),
                       "expected": SEALED_VERSION}

    if commit_doc.get("scheme") != SEALED_VERSION:
        return False, {**details, "step": "commit_doc.scheme", "ok": False,
                       "got": commit_doc.get("scheme"),
                       "expected": SEALED_VERSION}

    if commit_doc.get("root") != reveal.get("root_hex"):
        return False, {**details, "step": "root_match", "ok": False,
                       "commit_doc_root": commit_doc.get("root"),
                       "reveal_root": reveal.get("root_hex")}
    details["root_match"] = True

    row_canonical = base64.b64decode(reveal["row_canonical_b64"])
    # Bind the human-readable `row` to the canonical bytes the HMAC
    # is computed over. Without this, an attacker could mutate the
    # `row` field while leaving `row_canonical_b64` intact, lying to
    # an auditor who reads `row` but lets the verifier do the math.
    expected_canonical = canonicalize(reveal["row"])
    if expected_canonical != row_canonical:
        return False, {**details, "step": "row_binding", "ok": False,
                       "note": "reveal.row does not canonicalize to "
                               "reveal.row_canonical_b64"}
    details["row_binding"] = True

    salt_i = base64.b64decode(reveal["salt_b64"])
    if len(salt_i) != 32:
        return False, {**details, "step": "salt_length", "ok": False,
                       "got": len(salt_i)}
    expected_commitment = hmac.new(
        salt_i, row_canonical, hashlib.sha256
    ).digest()
    declared = bytes.fromhex(reveal["commitment_hex"])
    if not hmac.compare_digest(expected_commitment, declared):
        return False, {**details, "step": "leaf_commitment", "ok": False,
                       "expected_hex": expected_commitment.hex(),
                       "declared_hex": declared.hex()}
    details["leaf_commitment"] = True

    proof = [(p["sib"], p["side"]) for p in reveal["proof"]]
    root = bytes.fromhex(reveal["root_hex"])
    if not verify_inclusion(declared, proof, root):
        return False, {**details, "step": "merkle_path", "ok": False}
    details["merkle_path"] = True

    return True, details


# ---- CLI ----

def _read_rows_jsonl(path: str) -> List[Any]:
    rows: List[Any] = []
    if path == "-":
        f = sys.stdin
        close = False
    else:
        f = open(path, "r", encoding="utf-8")
        close = True
    try:
        for lineno, line in enumerate(f, start=1):
            line = line.strip()
            if not line:
                continue
            try:
                rows.append(json.loads(line))
            except json.JSONDecodeError as exc:
                raise SystemExit(
                    f"{path}:{lineno}: invalid JSON line: {exc}"
                )
    finally:
        if close:
            f.close()
    return rows


def _cmd_build_standard(args: argparse.Namespace) -> int:
    rows = _read_rows_jsonl(args.rows_jsonl)
    out = build_standard(
        rows, matter_slug=args.matter_slug, label=args.label,
        category=args.category,
    )
    if args.out_anchor:
        with open(args.out_anchor, "w", encoding="utf-8") as f:
            json.dump(out["anchor_body"], f, indent=2, sort_keys=True)
            f.write("\n")
        sys.stderr.write(f"[build-standard] wrote {args.out_anchor}\n")
    if args.out_reveals:
        os.makedirs(args.out_reveals, exist_ok=True)
        for rev in out["reveals"]:
            p = os.path.join(args.out_reveals, f"{rev['label']}.json")
            with open(p, "w", encoding="utf-8") as f:
                json.dump(rev, f, indent=2, sort_keys=True)
                f.write("\n")
        sys.stderr.write(
            f"[build-standard] wrote {len(out['reveals'])} reveal records "
            f"into {args.out_reveals}/\n"
        )
    print(json.dumps({
        "scheme": out["scheme"],
        "leaf_count": out["leaf_count"],
        "anchor_endpoint": "POST /api/v1/anchors (manifest mode)",
    }))
    return 0


def _cmd_build_sealed(args: argparse.Namespace) -> int:
    rows = _read_rows_jsonl(args.rows_jsonl)
    master_salt = None
    if args.master_salt_hex:
        master_salt = bytes.fromhex(args.master_salt_hex)
    out = build_sealed(rows, master_salt=master_salt)
    if args.out_master:
        with open(args.out_master, "w", encoding="utf-8") as f:
            json.dump(out["master_record"], f, indent=2, sort_keys=True)
            f.write("\n")
        os.chmod(args.out_master, 0o600)
        sys.stderr.write(
            f"[build-sealed] wrote {args.out_master} (KEEP PRIVATE — "
            f"holds master_salt + plaintext rows)\n"
        )
    if args.out_commit:
        with open(args.out_commit, "w", encoding="utf-8") as f:
            # The commit doc (canonical), plus anchor sha/size for
            # convenience. The on-chain commitment is sha256 of just
            # the commit_doc canonical bytes — the wrapper here is
            # for human readability of the helper output, not for
            # hashing.
            json.dump({
                "commit_doc": out["commit_doc"],
                "sha256_hex": out["anchor"]["sha256_hex"],
                "file_size": out["anchor"]["file_size"],
            }, f, indent=2, sort_keys=True)
            f.write("\n")
        sys.stderr.write(f"[build-sealed] wrote {args.out_commit}\n")
    sys.stderr.write(
        "[build-sealed] anchor with: POST /api/v1/anchors "
        "{category: 'commitment'}\n"
    )
    print(json.dumps(out["anchor"]))
    return 0


def _cmd_reveal_sealed(args: argparse.Namespace) -> int:
    with open(args.master, "r", encoding="utf-8") as f:
        master = json.load(f)
    out = reveal_sealed(master, args.leaf_index)
    if args.out:
        with open(args.out, "w", encoding="utf-8") as f:
            json.dump(out, f, indent=2, sort_keys=True)
            f.write("\n")
        sys.stderr.write(
            f"[reveal-sealed] wrote {args.out} — safe to share with "
            f"auditor (does NOT contain master_salt)\n"
        )
    else:
        print(json.dumps(out, indent=2, sort_keys=True))
    return 0


def _lookup_hash(host: str, sha_hex: str, *, timeout: float = 10.0) -> dict:
    """Probe lookup_hash to confirm the commit-doc sha was actually
    anchored on chain. Returns one of:

      {"state": "confirmed", "txid": "...", "bundle_id": "..."}
      {"state": "missing"}                          # 200 {} from server
      {"state": "error", "note": "<reason>"}        # network / 4xx / 5xx

    Network errors fall through to "error" — caller decides whether
    to fail closed (default) or proceed with crypto-only result.
    """
    import urllib.request
    import urllib.error
    url = host.rstrip("/") + "/lookup_hash?sha=" + sha_hex
    try:
        with urllib.request.urlopen(url, timeout=timeout) as r:
            body = json.loads(r.read().decode("utf-8"))
    except urllib.error.HTTPError as e:
        return {"state": "error",
                "note": f"HTTP {e.code} from {url}"}
    except urllib.error.URLError as e:
        return {"state": "error",
                "note": f"network error reaching {url}: {e.reason}"}
    except Exception as e:  # noqa: BLE001
        return {"state": "error",
                "note": f"lookup_hash failed: {e}"}
    if not body or not body.get("txid"):
        return {"state": "missing"}
    out = {"state": "confirmed", "txid": body["txid"]}
    if body.get("bundle_id"):
        out["bundle_id"] = body["bundle_id"]
    return out


def _cmd_verify_sealed(args: argparse.Namespace) -> int:
    with open(args.reveal, "r", encoding="utf-8") as f:
        reveal = json.load(f)
    with open(args.commit_doc, "r", encoding="utf-8") as f:
        loaded = json.load(f)
    # Accept either the bare commit_doc OR the helper's wrapped form.
    commit_doc = loaded.get("commit_doc") if (
        isinstance(loaded, dict) and "commit_doc" in loaded
    ) else loaded
    ok, details = verify_sealed_reveal(reveal, commit_doc)
    out: dict = {"verified": ok, "details": details}

    if not ok:
        # Crypto failed; chain check would be misleading. Skip it.
        print(json.dumps(out, indent=2, sort_keys=True))
        return 1

    if args.chain_confirm:
        commit_doc_sha = hashlib.sha256(canonicalize(commit_doc)).hexdigest()
        chain = _lookup_hash(args.lookup_host, commit_doc_sha)
        out["chain_check"] = chain
        if chain["state"] == "confirmed":
            print(json.dumps(out, indent=2, sort_keys=True))
            return 0
        if chain["state"] == "missing":
            # Crypto passed but no on-chain anchor exists for this commit
            # doc. Either the operator has not yet anchored it, or the
            # whole bundle is a local forgery. Fail closed.
            out["verified"] = False
            out["details"]["forgery_suspected"] = True
            out["details"]["forgery_note"] = (
                "Cryptographic checks passed, but lookup_hash shows no "
                "on-chain anchor for this commit doc. Either the "
                "anchor has not yet been broadcast, or the bundle was "
                "fabricated locally. Treat as unverified until an "
                "on-chain anchor exists."
            )
            print(json.dumps(out, indent=2, sort_keys=True))
            return 1
        # state == "error": network/transient failure. Fail closed by
        # default; user can re-run, or pass --no-chain-confirm to take
        # the crypto-only result deliberately.
        out["verified"] = False
        sys.stderr.write(
            "[verify-sealed] could not confirm chain anchor: "
            f"{chain['note']}\n"
            "[verify-sealed]   re-run when network is available, or "
            "pass --no-chain-confirm to accept crypto-only verification.\n"
        )
        print(json.dumps(out, indent=2, sort_keys=True))
        return 1

    # --no-chain-confirm: explicit offline mode. Loud warning.
    out["chain_check"] = {
        "state": "skipped",
        "note": "--no-chain-confirm: cryptographic checks only; on-chain "
                "anchor was not confirmed. A locally-fabricated commit + "
                "reveal would also pass crypto. Use only when you have "
                "out-of-band confirmation of the on-chain commitment.",
    }
    sys.stderr.write(
        "[verify-sealed] WARNING: --no-chain-confirm set; cryptographic\n"
        "[verify-sealed]   checks passed but no on-chain confirmation\n"
        "[verify-sealed]   was performed. A locally-fabricated bundle\n"
        "[verify-sealed]   with no on-chain anchor would also pass.\n"
    )
    print(json.dumps(out, indent=2, sort_keys=True))
    return 0


def main(argv=None) -> int:
    p = argparse.ArgumentParser(
        prog="merkle_row", description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    sub = p.add_subparsers(dest="cmd", required=True)

    pbs = sub.add_parser("build-standard",
                         help="build manifest-mode anchor body + reveals")
    pbs.add_argument("--rows-jsonl", required=True,
                     help="JSON-lines file (one row per line), or '-' for stdin")
    pbs.add_argument("--matter-slug", required=True)
    pbs.add_argument("--label", default=None,
                     help="optional anchor-level label")
    pbs.add_argument("--category", default="evidence_bundle",
                     help="default evidence_bundle")
    pbs.add_argument("--out-anchor", default=None,
                     help="path for the /api/v1/anchors body (default: print summary only)")
    pbs.add_argument("--out-reveals", default=None,
                     help="dir for per-row reveal records (default: don't emit)")
    pbs.set_defaults(func=_cmd_build_standard)

    pbz = sub.add_parser("build-sealed",
                         help="build sealed master record + commit doc")
    pbz.add_argument("--rows-jsonl", required=True)
    pbz.add_argument("--out-master", default=None,
                     help="path for the master record (KEEP PRIVATE)")
    pbz.add_argument("--out-commit", default=None,
                     help="path for the commit doc + anchor metadata")
    pbz.add_argument("--master-salt-hex", default=None,
                     help="(testing only) supply a deterministic 64-hex master salt")
    pbz.set_defaults(func=_cmd_build_sealed)

    prv = sub.add_parser("reveal-sealed",
                         help="emit a single-row reveal payload from a master record")
    prv.add_argument("--master", required=True)
    prv.add_argument("--leaf-index", type=int, required=True)
    prv.add_argument("--out", default=None,
                     help="path for the reveal record (default: print to stdout)")
    prv.set_defaults(func=_cmd_reveal_sealed)

    pvf = sub.add_parser("verify-sealed",
                         help="verify a sealed reveal against a commit doc")
    pvf.add_argument("--reveal", required=True)
    pvf.add_argument("--commit-doc", required=True,
                     help="path to the commit doc (or build-sealed's wrapped output)")
    # Chain confirmation defaults to on. Cryptographic verification
    # alone does NOT prove the commit doc was anchored — a forger can
    # fabricate a valid HMAC + Merkle structure locally. lookup_hash
    # confirms the commit doc's sha256 is actually on chain.
    pvf_chain = pvf.add_mutually_exclusive_group()
    pvf_chain.add_argument(
        "--chain-confirm", dest="chain_confirm",
        action="store_true", default=True,
        help="confirm on-chain anchor via lookup_hash (default: on)",
    )
    pvf_chain.add_argument(
        "--no-chain-confirm", dest="chain_confirm",
        action="store_false",
        help="skip lookup_hash; verify cryptographic checks only "
             "(unsafe — locally-fabricated bundles will pass)",
    )
    pvf.add_argument(
        "--lookup-host", default="https://proof.satsignal.cloud",
        help=argparse.SUPPRESS,
    )
    pvf.set_defaults(func=_cmd_verify_sealed)

    args = p.parse_args(argv)
    return args.func(args)


if __name__ == "__main__":
    sys.exit(main())
