#!/usr/bin/env python3
"""
ada_tag_pdf.py — ADA PDF Tagging Tool (Step 2)
================================================
Takes a PDF and a JSON element catalog (from Step 1 AI analysis)
and produces a fully tagged, WCAG 2.1 AA / PDF/UA compliant PDF.

Requirements:
    pip install pikepdf

Usage:
    python ada_tag_pdf.py input.pdf elements.json
    python ada_tag_pdf.py input.pdf elements.json -o output.pdf
    python ada_tag_pdf.py input.pdf elements.json --verify-only

The JSON file should have this structure (produced by Step 1 prompt):
{
  "title": "Document Title",
  "elements": [
    {"page": 1, "type": "Heading",   "level": 1, "alt_text": "Doc Title"},
    {"page": 2, "type": "Heading",   "level": 2, "alt_text": "Slide Title"},
    {"page": 2, "type": "Paragraph",             "alt_text": "Body text..."},
    {"page": 2, "type": "Formula",               "alt_text": "Equation..."},
    {"page": 3, "type": "Figure",                "alt_text": "Figure..."}
  ]
}

Structure tree produced:
  /StructTreeRoot
    /Document
      /H1   (exactly one per doc, from level:1 Heading; /Pg bound to its page)
      /Sect (one per page)
        /H2..H6    (from level:2..6 Heading entries; /Pg bound, /ActualText set)
        /P         (from Paragraph entries; /ActualText set)
        /Figure    (from Figure/Image/Diagram/Table; /Alt set)
        /Formula   (from Formula entries; /Alt set)

The original page content is preserved but wrapped as /Artifact so that
AT tools follow the new struct tree for reading order.
"""

import argparse
import json
import sys
from pathlib import Path

try:
    import pikepdf
    from pikepdf import Dictionary, Array, Name, String
except ImportError:
    print("Error: pikepdf is required. Install it with:")
    print("  pip install pikepdf")
    sys.exit(1)


def load_elements_json(path: str) -> dict:
    """
    Robustly load the element catalog JSON, handling common AI output issues:
      - Markdown code fences (```json ... ```)
      - Duplicated/truncated output (use the last complete JSON object)
      - Control characters inside strings (literal newlines)
      - BOM markers
    """
    with open(path, "r", encoding="utf-8-sig") as f:
        raw = f.read()

    # Strip markdown code fences if the whole file is wrapped in them
    text = raw.strip()
    if text.startswith("```"):
        text = text.split("\n", 1)[-1]
    if text.endswith("```"):
        text = text.rsplit("```", 1)[0]
    text = text.strip()

    # Try standard parse first
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # Try with strict=False (tolerates control chars in strings)
    try:
        return json.loads(text, strict=False)
    except json.JSONDecodeError:
        pass

    # Common AI failure: output is duplicated (first copy truncated,
    # second copy complete), sometimes with ```json between them.
    # Strategy: find ALL occurrences of {"title" or {"elements" and
    # try parsing from each one, preferring the last that works.
    import re
    candidates = []
    for m in re.finditer(r'\{[\s\n]*"(?:title|elements)', text):
        candidates.append(m.start())

    # Try each candidate starting from the LAST (most likely complete)
    for start in reversed(candidates):
        chunk = text[start:]
        try:
            result = json.loads(chunk)
            print(f"  Note: recovered valid JSON starting at byte {start} "
                  f"(the AI may have output duplicated content)")
            return result
        except json.JSONDecodeError:
            pass
        try:
            result = json.loads(chunk, strict=False)
            print(f"  Note: recovered valid JSON starting at byte {start} "
                  f"(the AI may have output duplicated content)")
            return result
        except json.JSONDecodeError:
            pass

    # Last resort: try to extract with brace matching on each candidate
    for start in reversed(candidates):
        depth = 0
        in_str = False
        esc = False
        end = start
        for i in range(start, len(text)):
            ch = text[i]
            if esc:
                esc = False
                continue
            if ch == "\\":
                esc = True
                continue
            if ch == '"' and not esc:
                in_str = not in_str
                continue
            if in_str:
                continue
            if ch == "{":
                depth += 1
            elif ch == "}":
                depth -= 1
                if depth == 0:
                    end = i + 1
                    break

        if end > start:
            fragment = text[start:end]
            try:
                return json.loads(fragment, strict=False)
            except json.JSONDecodeError:
                pass

    print(f"Error: Could not parse JSON from '{path}'.")
    print(f"\nThe file may contain malformed AI output. Common causes:")
    print(f"  - The AI duplicated its output (first copy truncated)")
    print(f"  - Output was cut off by a token limit")
    print(f"  - Markdown code fences (```json) embedded in the text")
    print(f"\nTip: Open the file, find the LAST occurrence of")
    print(f'  {{"title":  and delete everything before it.')
    print(f"\nFirst 200 chars of file:")
    print(f"  {raw[:200]}")
    sys.exit(1)


def build_tagged_pdf(input_path: str, elements_path: str, output_path: str) -> dict:
    """Build a fully tagged PDF from the input PDF and element catalog."""

    # Load element catalog
    catalog = load_elements_json(elements_path)

    title = catalog.get("title", "Untitled Document")
    elements = catalog.get("elements", [])

    # Group elements by page (convert 1-indexed to 0-indexed)
    page_elements = {}
    for el in elements:
        pg = el["page"] - 1  # 0-indexed internally
        page_elements.setdefault(pg, []).append(el)

    # Open PDF
    pdf = pikepdf.open(input_path)
    num_pages = len(pdf.pages)

    # Count element types in the catalog up-front for the summary line
    catalog_counts = {}
    for el in elements:
        t = el.get("type", "?")
        catalog_counts[t] = catalog_counts.get(t, 0) + 1

    print(f"  Input:    {input_path}")
    print(f"  Elements: {elements_path}")
    print(f"  Output:   {output_path}")
    print(f"  Title:    {title}")
    print(f"  Pages:    {num_pages}")
    print(f"  Catalog elements ({len(elements)}):")
    for t, n in sorted(catalog_counts.items()):
        print(f"      {t:12s}  {n}")
    print()

    # ── 1. Document-level metadata ──
    pdf.Root["/MarkInfo"] = Dictionary({"/Marked": True})
    pdf.Root["/Lang"] = String("en-US")
    pdf.Root["/ViewerPreferences"] = Dictionary({"/DisplayDocTitle": True})

    # Set title in info dict
    if "/Info" not in pdf.trailer:
        pdf.trailer["/Info"] = pdf.make_indirect(Dictionary())
    pdf.trailer["/Info"]["/Title"] = String(title)

    # Set title in XMP metadata
    try:
        with pdf.open_metadata() as meta:
            meta["dc:title"] = title
            meta["dc:description"] = (
                "ADA Section 508 remediated: tagged PDF with alt-text "
                "for WCAG 2.1 AA / PDF/UA compliance"
            )
    except Exception as e:
        print(f"  Warning: Could not set XMP metadata: {e}")

    # ── 2. Structure tree ──
    str_root = pdf.make_indirect(
        Dictionary({"/Type": Name("/StructTreeRoot")})
    )
    pdf.Root["/StructTreeRoot"] = str_root

    doc_elem = pdf.make_indirect(Dictionary({
        "/Type": Name("/StructElem"),
        "/S": Name("/Document"),
        "/P": str_root,
        "/K": Array([]),
    }))
    str_root["/K"] = doc_elem

    pt_nums = Array([])  # ParentTree /Nums

    # ── 2a. Locate the level:1 heading (required: exactly one per doc). ──
    # It may appear on any page, though it's nearly always page 1. We pluck
    # it out of its page's element list so it doesn't get re-processed under
    # that page's /Sect — the /H1 is a direct child of /Document.
    h1_element = None
    h1_page_idx = None
    for pg_idx in sorted(page_elements.keys()):
        els = page_elements[pg_idx]
        for i, el in enumerate(els):
            if el.get("type") == "Heading" and el.get("level") == 1:
                h1_element = els.pop(i)
                h1_page_idx = pg_idx
                break
        if h1_element is not None:
            break

    synthesized_h1 = False
    if h1_element is None:
        # Fallback: no level:1 heading in the JSON. Synthesize one from the
        # document title, bound to page 1. Panorama requires exactly one /H1.
        print("  Note: no level:1 Heading in JSON — synthesizing /H1 from title")
        h1_element = {"type": "Heading", "level": 1, "alt_text": title}
        h1_page_idx = 0
        synthesized_h1 = True

    # Create the /H1 struct element now (child of /Document). We'll fill in
    # the /K MCID reference when we process the H1's page.
    h1_struct = pdf.make_indirect(Dictionary({
        "/Type": Name("/StructElem"),
        "/S": Name("/H1"),
        "/P": doc_elem,
        "/Pg": pdf.pages[h1_page_idx].obj,
        "/ActualText": String(h1_element.get("alt_text", title)),
        # /K assigned during the page loop below
    }))
    doc_elem["/K"].append(h1_struct)  # H1 is the first child of /Document

    # ── 3. Process each page ──
    type_counts = {}
    # Track counts used by the verifier
    if synthesized_h1:
        type_counts["Heading"] = 1  # count the synthesized one
    for page_idx in range(num_pages):
        page = pdf.pages[page_idx]
        page["/StructParents"] = page_idx
        mcid_elems = []      # ordered by MCID; mcid_elems[i] owns MCID i
        content_markers = [] # (tag_name, mcid) pairs to emit as BDC/EMC
        local_mcid = 0

        pg_els = page_elements.get(page_idx, [])

        # /Sect for this page (always created, even if empty — keeps the
        # per-page structure consistent and gives a /Pg-bound container for
        # any body elements on the page).
        sect = pdf.make_indirect(Dictionary({
            "/Type": Name("/StructElem"),
            "/S": Name("/Sect"),
            "/P": doc_elem,
            "/Pg": page.obj,
            "/K": Array([]),
        }))

        # If the level:1 heading belongs to this page, assign its MCID here.
        if page_idx == h1_page_idx:
            h1_mcid = local_mcid
            h1_struct["/K"] = Dictionary({
                "/Type": Name("/MCR"),
                "/MCID": h1_mcid,
                "/Pg": page.obj,
            })
            mcid_elems.append(h1_struct)
            content_markers.append(("H1", h1_mcid))
            local_mcid += 1

        # Remaining page elements, in JSON order → struct children of /Sect
        for el in pg_els:
            etype = el.get("type", "")
            alt = el.get("alt_text", "")
            type_counts[etype] = type_counts.get(etype, 0) + 1

            if etype == "Heading":
                try:
                    level = max(2, min(6, int(el.get("level", 2))))
                except (TypeError, ValueError):
                    level = 2
                tag_name = f"H{level}"
                se = pdf.make_indirect(Dictionary({
                    "/Type": Name("/StructElem"),
                    "/S": Name(f"/{tag_name}"),
                    "/P": sect,
                    "/Pg": page.obj,
                    "/ActualText": String(alt),
                    "/K": Dictionary({
                        "/Type": Name("/MCR"),
                        "/MCID": local_mcid,
                        "/Pg": page.obj,
                    }),
                }))
                sect["/K"].append(se)

            elif etype == "Paragraph":
                tag_name = "P"
                se = pdf.make_indirect(Dictionary({
                    "/Type": Name("/StructElem"),
                    "/S": Name("/P"),
                    "/P": sect,
                    "/Pg": page.obj,
                    "/ActualText": String(alt),
                    "/K": Dictionary({
                        "/Type": Name("/MCR"),
                        "/MCID": local_mcid,
                        "/Pg": page.obj,
                    }),
                }))
                sect["/K"].append(se)

            elif etype in ("Figure", "Image", "Diagram", "Table", "Formula"):
                tag_name = "Formula" if etype == "Formula" else "Figure"
                se = pdf.make_indirect(Dictionary({
                    "/Type": Name("/StructElem"),
                    "/S": Name(f"/{tag_name}"),
                    "/P": sect,
                    "/Pg": page.obj,
                    "/Alt": String(alt),
                    "/K": Dictionary({
                        "/Type": Name("/MCR"),
                        "/MCID": local_mcid,
                        "/Pg": page.obj,
                    }),
                }))
                sect["/K"].append(se)

            else:
                print(f"  Warning: page {page_idx+1}, unknown element type "
                      f"'{etype}' — skipped")
                continue

            mcid_elems.append(se)
            content_markers.append((tag_name, local_mcid))
            local_mcid += 1

        doc_elem["/K"].append(sect)

        # ── Rewrite content stream with BDC/EMC markers ──
        contents = page.get("/Contents")
        if contents is None:
            original = b""
        elif isinstance(contents, pikepdf.Array):
            original = b"\n".join(ref.read_bytes() for ref in contents)
        else:
            original = contents.read_bytes()

        parts = []
        # Emit one BDC/EMC region per struct element, in MCID order. Each
        # region contains a minimal-but-non-empty invisible path operation
        # (stroke a zero-length segment outside the page). This satisfies
        # PDF/UA checkers (like Panorama) that reject empty marked-content
        # regions as missing headings, without altering visual output.
        for tag_name, mcid in content_markers:
            parts.append(
                f"/{tag_name} <</MCID {mcid}>> BDC\n".encode()
                + b"q 0 w -1000 -1000 m -1000 -1000 l S Q\n"
                + b"EMC\n"
            )

        # Preserve the original rendered content so the PDF still LOOKS the
        # same, but wrap it as /Artifact so AT tools ignore it for reading
        # order and follow our new struct tree via /ActualText and /Alt.
        parts.append(b"/Artifact BMC\n")
        parts.append(original)
        parts.append(b"\nEMC\n")

        page["/Contents"] = pdf.make_stream(b"".join(parts))

        # ParentTree entry — array position corresponds to MCID
        pt_nums.append(page_idx)
        pt_nums.append(pdf.make_indirect(Array(mcid_elems)))

    # ── 4. Finalize ──
    str_root["/ParentTree"] = pdf.make_indirect(
        Dictionary({"/Nums": pt_nums})
    )
    str_root["/ParentTreeNextKey"] = num_pages

    # Save
    pdf.save(output_path, linearize=True)
    pdf.close()

    return {
        "title": title,
        "num_pages": num_pages,
        "num_elements": len(elements),
        "type_counts": type_counts,
    }


def verify_tagged_pdf(output_path: str, catalog: dict) -> list:
    """Verify the tagged PDF meets all WCAG 2.1 AA / PDF/UA requirements.

    catalog is the Step-1 JSON catalog dict; the verifier uses it to compute
    expected struct-element counts by type.
    """

    elements = catalog.get("elements", [])

    # Expected counts, derived from the Step-1 JSON catalog.
    expected_heading = sum(
        1 for e in elements if e.get("type") == "Heading"
    )
    # If the JSON omitted a level:1 heading, the builder synthesizes one
    # from the document title, so the rendered tree has one extra /H1.
    has_level_1 = any(
        e.get("type") == "Heading" and e.get("level") == 1 for e in elements
    )
    if not has_level_1:
        expected_heading += 1

    expected_paragraph = sum(
        1 for e in elements if e.get("type") == "Paragraph"
    )
    expected_visual = sum(
        1 for e in elements
        if e.get("type") in ("Figure", "Image", "Diagram", "Table", "Formula")
    )

    pdf = pikepdf.open(output_path)
    checks = []

    # 1. Document is tagged
    marked = bool(pdf.Root.get("/MarkInfo", {}).get("/Marked", False))
    checks.append(("Document is tagged (/MarkInfo /Marked true)", marked))

    # 2. StructTreeRoot
    has_struct = "/StructTreeRoot" in pdf.Root
    checks.append(("StructTreeRoot present", has_struct))

    # 3. Language
    has_lang = "/Lang" in pdf.Root
    checks.append(("Document language set (/Lang)", has_lang))

    # 4. Title
    title_val = ""
    if "/Info" in pdf.trailer:
        title_val = str(pdf.trailer["/Info"].get("/Title", ""))
    checks.append((
        f"Document title set: \"{title_val[:50]}...\"" if len(title_val) > 50
        else f"Document title set: \"{title_val}\"",
        len(title_val) > 3
    ))

    # 5. DisplayDocTitle
    ddt = False
    vp = pdf.Root.get("/ViewerPreferences")
    if vp:
        ddt = bool(vp.get("/DisplayDocTitle", False))
    checks.append(("DisplayDocTitle in ViewerPreferences", ddt))

    # 6. Root is /Document
    doc_tag = ""
    if has_struct:
        k = pdf.Root["/StructTreeRoot"].get("/K")
        if k:
            doc_tag = str(k.get("/S", ""))
    checks.append(("Root structure element is /Document", doc_tag == "/Document"))

    # 7. Walk the structure tree, collecting struct elements in document order.
    counts = {
        "H1": 0, "H2": 0, "H3": 0, "H4": 0, "H5": 0, "H6": 0,
        "P": 0, "Sect": 0, "Figure": 0, "Formula": 0,
    }
    alt_items = []          # (tag, has_alt, preview) for Figure/Formula
    heading_actualtext = [] # (tag, has_actualtext, preview) for Hn
    paragraph_actualtext = []
    heading_order = []      # ordered list of heading levels in tree order
    headings_with_pg = 0
    headings_total = 0

    def walk(elem):
        nonlocal headings_with_pg, headings_total
        if not hasattr(elem, "get"):
            return
        s = str(elem.get("/S", "")).lstrip("/")
        if s in counts:
            counts[s] += 1
        # Figure/Formula: alt-text check
        if s in ("Figure", "Formula"):
            a = str(elem.get("/Alt", ""))
            alt_items.append((s, len(a) > 10, a[:70]))
        # Heading: ActualText + /Pg + hierarchy
        if len(s) == 2 and s.startswith("H") and s[1].isdigit():
            level = int(s[1])
            heading_order.append(level)
            at = str(elem.get("/ActualText", ""))
            heading_actualtext.append((s, len(at) > 3, at[:70]))
            headings_total += 1
            if "/Pg" in elem:
                headings_with_pg += 1
        if s == "P":
            at = str(elem.get("/ActualText", ""))
            # Many /Ps in practice may not carry /ActualText (they wrap real
            # text ops). We only count this when the element has one, for
            # informational purposes; the test below is soft.
            paragraph_actualtext.append((s, len(at) > 3, at[:70]))
        kids = elem.get("/K")
        if kids is None:
            return
        if isinstance(kids, pikepdf.Array):
            for k in kids:
                walk(k)
        elif hasattr(kids, "get") and "/S" in kids:
            walk(kids)

    if has_struct:
        walk(pdf.Root["/StructTreeRoot"]["/K"])

    num_pages = len(pdf.pages)

    # 7a. Exactly one /H1
    checks.append((
        f"Exactly one /H1 in document: {counts['H1']}",
        counts["H1"] == 1,
    ))

    # 7b. First heading is /H1
    first_is_h1 = bool(heading_order) and heading_order[0] == 1
    checks.append((
        f"First heading in tree order is /H1: "
        f"{'H' + str(heading_order[0]) if heading_order else 'none'}",
        first_is_h1,
    ))

    # 7c. No heading-level skips
    level_skips = []
    for prev, curr in zip(heading_order, heading_order[1:]):
        if curr - prev > 1:
            level_skips.append((prev, curr))
    checks.append((
        f"No heading-level skips: "
        f"{'OK' if not level_skips else f'skips found {level_skips[:3]}'}",
        not level_skips,
    ))

    # 7d. All headings have /Pg
    checks.append((
        f"Headings with /Pg: {headings_with_pg}/{headings_total}",
        headings_total > 0 and headings_with_pg == headings_total,
    ))

    # 7e. All headings have /ActualText
    at_ok = sum(1 for _, ok, _ in heading_actualtext if ok)
    checks.append((
        f"Headings with /ActualText: {at_ok}/{len(heading_actualtext)}",
        len(heading_actualtext) > 0 and at_ok == len(heading_actualtext),
    ))

    # 7f. Heading count (H1-H6) matches JSON
    total_h = sum(counts[f"H{i}"] for i in range(1, 7))
    checks.append((
        f"Heading elements (/H1-/H6): {total_h} (expected {expected_heading})",
        total_h == expected_heading,
    ))

    # 7g. Paragraph count matches JSON
    checks.append((
        f"Paragraph elements (/P): {counts['P']} "
        f"(expected {expected_paragraph})",
        counts["P"] == expected_paragraph,
    ))

    # 7h. Section count == pages
    checks.append((
        f"Section elements (/Sect): {counts['Sect']} (expected {num_pages})",
        counts["Sect"] == num_pages,
    ))

    # 7i. Visual (Figure+Formula) count matches JSON
    total_visual = counts["Figure"] + counts["Formula"]
    checks.append((
        f"Visual elements (Figure+Formula): {total_visual} "
        f"(expected {expected_visual})",
        total_visual == expected_visual,
    ))

    # 7j. All visual elements have substantive /Alt
    checks.append((
        f"All {len(alt_items)} visual elements have /Alt text",
        bool(alt_items) and all(h for _, h, _ in alt_items),
    ))

    # 8. StructParents on pages
    sp = sum(1 for p in pdf.pages if "/StructParents" in p)
    checks.append((f"Pages with /StructParents: {sp}/{num_pages}", sp == num_pages))

    # 9. ParentTree
    has_pt = False
    if has_struct:
        has_pt = "/ParentTree" in pdf.Root["/StructTreeRoot"]
    checks.append(("ParentTree present", has_pt))

    # 10. Marked content
    mc = sum(
        1 for p in pdf.pages
        if b"BDC" in p["/Contents"].read_bytes()
    )
    checks.append((f"Pages with marked content: {mc}/{num_pages}", mc == num_pages))

    pdf.close()

    return checks, alt_items


def main():
    parser = argparse.ArgumentParser(
        description="ADA PDF Tagging Tool — produces WCAG 2.1 AA / PDF/UA compliant PDFs",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Step 1: Use the Step 1 prompt with any AI (including free tiers) to
        generate the elements.json catalog from your PDF.

Step 2: Run this script:
    python ada_tag_pdf.py lecture.pdf elements.json
    python ada_tag_pdf.py lecture.pdf elements.json -o lecture_accessible.pdf

The script will:
  • Add /MarkInfo, /Lang, /ViewerPreferences, and document /Title
  • Build a complete /StructTreeRoot with:
      /Document > /H1 (once) + per-page /Sect > /H2..H6, /P, /Figure, /Formula
  • Use each Heading entry's `level` to produce the correct /Hn tag
  • Set /ActualText on /Hn and /P struct elements and /Alt on Figure/Formula
  • Wrap the original page content as /Artifact so AT follows the new tree
  • Inject BDC/EMC marked content operators with non-empty regions
  • Build a /ParentTree mapping MCIDs to structure elements
  • Set /StructParents on every page
  • Verify all requirements (including heading hierarchy) and print pass/fail
""",
    )
    parser.add_argument("input_pdf", help="Path to the input PDF")
    parser.add_argument("elements_json", help="Path to the JSON element catalog from Step 1")
    parser.add_argument(
        "-o", "--output",
        default=None,
        help="Output PDF path (default: <input>_ada.pdf)",
    )
    parser.add_argument(
        "--verify-only",
        action="store_true",
        help="Only verify an already-tagged PDF (skip tagging)",
    )
    parser.add_argument(
        "--no-verify",
        action="store_true",
        help="Skip verification after tagging",
    )

    args = parser.parse_args()

    # Determine output path
    if args.output:
        output_path = args.output
    else:
        stem = Path(args.input_pdf).stem
        output_path = str(Path(args.input_pdf).parent / f"{stem}_ada.pdf")

    # Load catalog once for verification
    catalog = load_elements_json(args.elements_json)
    num_elements = len(catalog.get("elements", []))

    print("=" * 62)
    print("ADA PDF TAGGING TOOL")
    print("WCAG 2.1 AA / PDF/UA Compliance")
    print("=" * 62)
    print()

    if not args.verify_only:
        # ── Build tagged PDF ──
        print("Building tagged PDF...")
        stats = build_tagged_pdf(args.input_pdf, args.elements_json, output_path)
        print(f"\n  ✓ Tagged PDF saved → {output_path}")
        print(f"    Elements by type:")
        for t, c in sorted(stats["type_counts"].items()):
            print(f"      {t:12s}  {c}")
        print()
    else:
        output_path = args.input_pdf
        print(f"  Verify-only mode: checking {output_path}\n")

    if not args.no_verify:
        # ── Verify ──
        print("─" * 62)
        print("VERIFICATION")
        print("─" * 62)

        checks, alt_items = verify_tagged_pdf(output_path, catalog)

        if alt_items:
            print("\n  Visual elements with /Alt text:")
            for s, has, preview in alt_items:
                icon = "✓" if has else "✗"
                print(f"    {icon} [{s:7s}] {preview}...")

        print("\n  Structural checks:")
        all_pass = True
        for label, passed in checks:
            icon = "✓" if passed else "✗"
            print(f"    {icon} {label}")
            if not passed:
                all_pass = False

        print()
        if all_pass:
            print("  ✓ ALL CHECKS PASSED")
        else:
            print("  ✗ SOME CHECKS FAILED — review the output above")

        print("=" * 62)

        # Save JSON report
        report_path = Path(output_path).with_suffix(".report.json")
        report = {
            "input": args.input_pdf,
            "elements_json": args.elements_json,
            "output": output_path,
            "num_elements": num_elements,
            "checks": [{"label": l, "passed": p} for l, p in checks],
            "all_passed": all_pass,
        }
        with open(report_path, "w") as f:
            json.dump(report, f, indent=2)
        print(f"  Report → {report_path}")
    else:
        print("  Verification skipped (--no-verify)")
        print("=" * 62)


if __name__ == "__main__":
    main()
