Files
pdf2cad/tests/integration/test_pipeline.py
2026-03-03 21:24:02 +00:00

217 lines
6.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""End-to-end pipeline integration tests for pdf2imos."""
import json
import shutil
import tempfile
from pathlib import Path
import ezdxf
import pytest
from typer.testing import CliRunner
from pdf2imos.cli import app
from pdf2imos.schema.validator import validate_metadata
runner = CliRunner()
INPUT_DIR = Path(__file__).parents[1] / "fixtures" / "input"
def _run_single_pdf(pdf_name: str, tmpdir: Path):
"""Copy one PDF to a temp input dir and run the CLI on it.
Returns (exit_code, output_dir, CliRunner result).
"""
input_dir = tmpdir / "input"
output_dir = tmpdir / "output"
input_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(INPUT_DIR / pdf_name, input_dir)
result = runner.invoke(app, [str(input_dir), str(output_dir)])
return result.exit_code, output_dir, result
class TestSimplePanelE2E:
"""simple_panel.pdf → DXF + JSON, audit, schema, 600×720×18mm."""
def test_simple_panel_e2e(self):
with tempfile.TemporaryDirectory() as tmpdir:
code, out, res = _run_single_pdf(
"simple_panel.pdf", Path(tmpdir),
)
assert code == 0, res.output
dxf_path = out / "simple_panel.dxf"
json_path = out / "simple_panel.json"
assert dxf_path.exists()
assert json_path.exists()
# DXF audit clean
doc = ezdxf.readfile(str(dxf_path))
auditor = doc.audit()
assert len(auditor.errors) == 0
# JSON schema valid
with open(json_path) as f:
data = json.load(f)
validate_metadata(data)
# Dimensions 600×720×18mm ±0.5mm
dims = data["overall_dimensions"]
assert abs(dims["width_mm"] - 600) <= 0.5
assert abs(dims["height_mm"] - 720) <= 0.5
assert abs(dims["depth_mm"] - 18) <= 0.5
class TestCabinetBasicE2E:
"""cabinet_basic.pdf → DXF + JSON, material annotation present."""
def test_cabinet_basic_e2e(self):
with tempfile.TemporaryDirectory() as tmpdir:
code, out, res = _run_single_pdf(
"cabinet_basic.pdf", Path(tmpdir),
)
assert code == 0, res.output
dxf_path = out / "cabinet_basic.dxf"
json_path = out / "cabinet_basic.json"
assert dxf_path.exists()
assert json_path.exists()
# DXF audit clean
doc = ezdxf.readfile(str(dxf_path))
auditor = doc.audit()
assert len(auditor.errors) == 0
# JSON schema valid
with open(json_path) as f:
data = json.load(f)
validate_metadata(data)
# Material annotation in parts or raw_annotations
has_material = any(
p.get("material") for p in data.get("parts", [])
)
if not has_material:
raw = " ".join(
data.get("raw_annotations", []),
).lower()
has_material = any(
kw in raw
for kw in ("material", "melamine", "mdf")
)
assert has_material, (
"No material annotation found in output"
)
class TestPanelWithDrillingE2E:
"""panel_with_drilling.pdf → JSON has drilling data."""
def test_panel_with_drilling_e2e(self):
with tempfile.TemporaryDirectory() as tmpdir:
code, out, res = _run_single_pdf(
"panel_with_drilling.pdf", Path(tmpdir),
)
assert code == 0, res.output
dxf_path = out / "panel_with_drilling.dxf"
json_path = out / "panel_with_drilling.json"
assert dxf_path.exists()
assert json_path.exists()
# DXF audit clean
doc = ezdxf.readfile(str(dxf_path))
auditor = doc.audit()
assert len(auditor.errors) == 0
# JSON schema valid
with open(json_path) as f:
data = json.load(f)
validate_metadata(data)
# Drilling data in parts or raw_annotations
has_drilling = any(
p.get("drilling") for p in data.get("parts", [])
)
if not has_drilling:
raw = " ".join(
data.get("raw_annotations", []),
).lower()
has_drilling = any(
kw in raw
for kw in ("drill", "shelf", "pin", "hole")
)
assert has_drilling, (
"No drilling data found in output"
)
class TestEdgeCasesE2E:
"""edge_cases.pdf → completes without crash."""
def test_edge_cases_e2e(self):
with tempfile.TemporaryDirectory() as tmpdir:
code, out, res = _run_single_pdf(
"edge_cases.pdf", Path(tmpdir),
)
# Single PDF: 0=success, 2=assembly failure (graceful)
assert code in (0, 2), (
f"Unexpected exit code {code}: {res.output}"
)
if code == 0:
dxf = out / "edge_cases.dxf"
jsn = out / "edge_cases.json"
assert dxf.exists()
assert jsn.exists()
# DXF audit clean
doc = ezdxf.readfile(str(dxf))
auditor = doc.audit()
assert len(auditor.errors) == 0
# JSON schema valid
with open(jsn) as f:
data = json.load(f)
validate_metadata(data)
class TestStageFlag:
"""--stage flag produces intermediate JSON at each stage."""
@pytest.mark.parametrize("stage", [
"extract", "classify", "dimensions",
])
def test_stage_produces_json(self, stage):
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
input_dir = tmpdir / "input"
output_dir = tmpdir / "output"
input_dir.mkdir()
shutil.copy2(
INPUT_DIR / "simple_panel.pdf", input_dir,
)
result = runner.invoke(
app,
[
str(input_dir),
str(output_dir),
f"--stage={stage}",
],
)
assert result.exit_code == 0, result.output
# Intermediate JSON produced
intermediates = list(
output_dir.glob(f"*_{stage}.json"),
)
assert len(intermediates) == 1
# Verify content structure
with open(intermediates[0]) as f:
data = json.load(f)
assert data["stage"] == stage
assert "data" in data
# No DXF output in stage mode
assert len(list(output_dir.glob("*.dxf"))) == 0