"""End-to-end pipeline integration tests for pdf2imos.""" import json import shutil import tempfile from pathlib import Path import ezdxf import pytest from typer.testing import CliRunner from pdf2imos.cli import app from pdf2imos.schema.validator import validate_metadata runner = CliRunner() INPUT_DIR = Path(__file__).parents[1] / "fixtures" / "input" def _run_single_pdf(pdf_name: str, tmpdir: Path): """Copy one PDF to a temp input dir and run the CLI on it. Returns (exit_code, output_dir, CliRunner result). """ input_dir = tmpdir / "input" output_dir = tmpdir / "output" input_dir.mkdir(parents=True, exist_ok=True) shutil.copy2(INPUT_DIR / pdf_name, input_dir) result = runner.invoke(app, [str(input_dir), str(output_dir)]) return result.exit_code, output_dir, result class TestSimplePanelE2E: """simple_panel.pdf → DXF + JSON, audit, schema, 600×720×18mm.""" def test_simple_panel_e2e(self): with tempfile.TemporaryDirectory() as tmpdir: code, out, res = _run_single_pdf( "simple_panel.pdf", Path(tmpdir), ) assert code == 0, res.output dxf_path = out / "simple_panel.dxf" json_path = out / "simple_panel.json" assert dxf_path.exists() assert json_path.exists() # DXF audit clean doc = ezdxf.readfile(str(dxf_path)) auditor = doc.audit() assert len(auditor.errors) == 0 # JSON schema valid with open(json_path) as f: data = json.load(f) validate_metadata(data) # Dimensions 600×720×18mm ±0.5mm dims = data["overall_dimensions"] assert abs(dims["width_mm"] - 600) <= 0.5 assert abs(dims["height_mm"] - 720) <= 0.5 assert abs(dims["depth_mm"] - 18) <= 0.5 class TestCabinetBasicE2E: """cabinet_basic.pdf → DXF + JSON, material annotation present.""" def test_cabinet_basic_e2e(self): with tempfile.TemporaryDirectory() as tmpdir: code, out, res = _run_single_pdf( "cabinet_basic.pdf", Path(tmpdir), ) assert code == 0, res.output dxf_path = out / "cabinet_basic.dxf" json_path = out / "cabinet_basic.json" assert dxf_path.exists() assert json_path.exists() # DXF audit clean doc = ezdxf.readfile(str(dxf_path)) auditor = doc.audit() assert len(auditor.errors) == 0 # JSON schema valid with open(json_path) as f: data = json.load(f) validate_metadata(data) # Material annotation in parts or raw_annotations has_material = any( p.get("material") for p in data.get("parts", []) ) if not has_material: raw = " ".join( data.get("raw_annotations", []), ).lower() has_material = any( kw in raw for kw in ("material", "melamine", "mdf") ) assert has_material, ( "No material annotation found in output" ) class TestPanelWithDrillingE2E: """panel_with_drilling.pdf → JSON has drilling data.""" def test_panel_with_drilling_e2e(self): with tempfile.TemporaryDirectory() as tmpdir: code, out, res = _run_single_pdf( "panel_with_drilling.pdf", Path(tmpdir), ) assert code == 0, res.output dxf_path = out / "panel_with_drilling.dxf" json_path = out / "panel_with_drilling.json" assert dxf_path.exists() assert json_path.exists() # DXF audit clean doc = ezdxf.readfile(str(dxf_path)) auditor = doc.audit() assert len(auditor.errors) == 0 # JSON schema valid with open(json_path) as f: data = json.load(f) validate_metadata(data) # Drilling data in parts or raw_annotations has_drilling = any( p.get("drilling") for p in data.get("parts", []) ) if not has_drilling: raw = " ".join( data.get("raw_annotations", []), ).lower() has_drilling = any( kw in raw for kw in ("drill", "shelf", "pin", "hole") ) assert has_drilling, ( "No drilling data found in output" ) class TestEdgeCasesE2E: """edge_cases.pdf → completes without crash.""" def test_edge_cases_e2e(self): with tempfile.TemporaryDirectory() as tmpdir: code, out, res = _run_single_pdf( "edge_cases.pdf", Path(tmpdir), ) # Single PDF: 0=success, 2=assembly failure (graceful) assert code in (0, 2), ( f"Unexpected exit code {code}: {res.output}" ) if code == 0: dxf = out / "edge_cases.dxf" jsn = out / "edge_cases.json" assert dxf.exists() assert jsn.exists() # DXF audit clean doc = ezdxf.readfile(str(dxf)) auditor = doc.audit() assert len(auditor.errors) == 0 # JSON schema valid with open(jsn) as f: data = json.load(f) validate_metadata(data) class TestStageFlag: """--stage flag produces intermediate JSON at each stage.""" @pytest.mark.parametrize("stage", [ "extract", "classify", "dimensions", ]) def test_stage_produces_json(self, stage): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) input_dir = tmpdir / "input" output_dir = tmpdir / "output" input_dir.mkdir() shutil.copy2( INPUT_DIR / "simple_panel.pdf", input_dir, ) result = runner.invoke( app, [ str(input_dir), str(output_dir), f"--stage={stage}", ], ) assert result.exit_code == 0, result.output # Intermediate JSON produced intermediates = list( output_dir.glob(f"*_{stage}.json"), ) assert len(intermediates) == 1 # Verify content structure with open(intermediates[0]) as f: data = json.load(f) assert data["stage"] == stage assert "data" in data # No DXF output in stage mode assert len(list(output_dir.glob("*.dxf"))) == 0