"""Tests for line role classification.""" from collections import Counter import pymupdf from pdf2imos.extract.geometry import extract_geometry from pdf2imos.interpret.line_classifier import ( _parse_dashes, classify_lines, ) from pdf2imos.models import ClassifiedLine, LineRole class TestParseDashes: def test_solid_line_returns_none(self): assert _parse_dashes("") is None assert _parse_dashes("[] 0") is None def test_dashed_line_parsed(self): result = _parse_dashes("[3 2] 0") assert result == [3.0, 2.0] def test_dash_dot_line_parsed(self): result = _parse_dashes("[6 2 2 2] 0") assert result == [6.0, 2.0, 2.0, 2.0] class TestClassifyLines: def test_returns_classified_lines(self, simple_panel_pdf): doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) assert isinstance(result, list) assert all(isinstance(c, ClassifiedLine) for c in result) def test_geometry_lines_found(self, simple_panel_pdf): """Panel drawing should have geometry lines.""" doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) roles = Counter(c.role for c in result) assert roles.get(LineRole.GEOMETRY, 0) > 0, f"No GEOMETRY lines: {dict(roles)}" def test_dimension_lines_found(self, simple_panel_pdf): """Panel drawing should have dimension lines.""" doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) roles = Counter(c.role for c in result) assert roles.get(LineRole.DIMENSION, 0) > 0, ( f"No DIMENSION lines: {dict(roles)}" ) def test_all_lines_have_role(self, simple_panel_pdf): """All classified lines have a non-None role.""" doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) for line in result: assert line.role is not None assert isinstance(line.role, LineRole) def test_confidence_between_0_and_1(self, simple_panel_pdf): """Confidence values between 0 and 1.""" doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) for line in result: assert 0.0 <= line.confidence <= 1.0 def test_dashed_lines_classified_hidden(self, simple_panel_pdf): """Dashed paths should be classified as HIDDEN.""" doc = pymupdf.open(str(simple_panel_pdf)) extraction = extract_geometry(doc[0]) dashed = [p for p in extraction.paths if _parse_dashes(p.dashes) is not None] if dashed: classified = classify_lines(dashed) for c in classified: assert c.role in (LineRole.HIDDEN, LineRole.CENTER), ( f"Dashed line classified as {c.role}" ) def test_all_fixtures_processable(self, all_fixture_pdfs): """All fixture PDFs can be classified without error.""" for pdf_path in all_fixture_pdfs: doc = pymupdf.open(str(pdf_path)) extraction = extract_geometry(doc[0]) result = classify_lines(list(extraction.paths)) assert len(result) > 0, f"No classified lines for {pdf_path.name}"