91 lines
3.5 KiB
Python
91 lines
3.5 KiB
Python
"""Tests for line role classification."""
|
|
|
|
from collections import Counter
|
|
|
|
import pymupdf
|
|
|
|
from pdf2imos.extract.geometry import extract_geometry
|
|
from pdf2imos.interpret.line_classifier import (
|
|
_parse_dashes,
|
|
classify_lines,
|
|
)
|
|
from pdf2imos.models import ClassifiedLine, LineRole
|
|
|
|
|
|
class TestParseDashes:
|
|
def test_solid_line_returns_none(self):
|
|
assert _parse_dashes("") is None
|
|
assert _parse_dashes("[] 0") is None
|
|
|
|
def test_dashed_line_parsed(self):
|
|
result = _parse_dashes("[3 2] 0")
|
|
assert result == [3.0, 2.0]
|
|
|
|
def test_dash_dot_line_parsed(self):
|
|
result = _parse_dashes("[6 2 2 2] 0")
|
|
assert result == [6.0, 2.0, 2.0, 2.0]
|
|
|
|
|
|
class TestClassifyLines:
|
|
def test_returns_classified_lines(self, simple_panel_pdf):
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
assert isinstance(result, list)
|
|
assert all(isinstance(c, ClassifiedLine) for c in result)
|
|
|
|
def test_geometry_lines_found(self, simple_panel_pdf):
|
|
"""Panel drawing should have geometry lines."""
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
roles = Counter(c.role for c in result)
|
|
assert roles.get(LineRole.GEOMETRY, 0) > 0, f"No GEOMETRY lines: {dict(roles)}"
|
|
|
|
def test_dimension_lines_found(self, simple_panel_pdf):
|
|
"""Panel drawing should have dimension lines."""
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
roles = Counter(c.role for c in result)
|
|
assert roles.get(LineRole.DIMENSION, 0) > 0, (
|
|
f"No DIMENSION lines: {dict(roles)}"
|
|
)
|
|
|
|
def test_all_lines_have_role(self, simple_panel_pdf):
|
|
"""All classified lines have a non-None role."""
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
for line in result:
|
|
assert line.role is not None
|
|
assert isinstance(line.role, LineRole)
|
|
|
|
def test_confidence_between_0_and_1(self, simple_panel_pdf):
|
|
"""Confidence values between 0 and 1."""
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
for line in result:
|
|
assert 0.0 <= line.confidence <= 1.0
|
|
|
|
def test_dashed_lines_classified_hidden(self, simple_panel_pdf):
|
|
"""Dashed paths should be classified as HIDDEN."""
|
|
doc = pymupdf.open(str(simple_panel_pdf))
|
|
extraction = extract_geometry(doc[0])
|
|
dashed = [p for p in extraction.paths if _parse_dashes(p.dashes) is not None]
|
|
if dashed:
|
|
classified = classify_lines(dashed)
|
|
for c in classified:
|
|
assert c.role in (LineRole.HIDDEN, LineRole.CENTER), (
|
|
f"Dashed line classified as {c.role}"
|
|
)
|
|
|
|
def test_all_fixtures_processable(self, all_fixture_pdfs):
|
|
"""All fixture PDFs can be classified without error."""
|
|
for pdf_path in all_fixture_pdfs:
|
|
doc = pymupdf.open(str(pdf_path))
|
|
extraction = extract_geometry(doc[0])
|
|
result = classify_lines(list(extraction.paths))
|
|
assert len(result) > 0, f"No classified lines for {pdf_path.name}"
|