Files
pdf2cad/tests/test_line_classifier.py
2026-03-03 21:24:02 +00:00

91 lines
3.5 KiB
Python

"""Tests for line role classification."""
from collections import Counter
import pymupdf
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.interpret.line_classifier import (
_parse_dashes,
classify_lines,
)
from pdf2imos.models import ClassifiedLine, LineRole
class TestParseDashes:
def test_solid_line_returns_none(self):
assert _parse_dashes("") is None
assert _parse_dashes("[] 0") is None
def test_dashed_line_parsed(self):
result = _parse_dashes("[3 2] 0")
assert result == [3.0, 2.0]
def test_dash_dot_line_parsed(self):
result = _parse_dashes("[6 2 2 2] 0")
assert result == [6.0, 2.0, 2.0, 2.0]
class TestClassifyLines:
def test_returns_classified_lines(self, simple_panel_pdf):
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
assert isinstance(result, list)
assert all(isinstance(c, ClassifiedLine) for c in result)
def test_geometry_lines_found(self, simple_panel_pdf):
"""Panel drawing should have geometry lines."""
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
roles = Counter(c.role for c in result)
assert roles.get(LineRole.GEOMETRY, 0) > 0, f"No GEOMETRY lines: {dict(roles)}"
def test_dimension_lines_found(self, simple_panel_pdf):
"""Panel drawing should have dimension lines."""
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
roles = Counter(c.role for c in result)
assert roles.get(LineRole.DIMENSION, 0) > 0, (
f"No DIMENSION lines: {dict(roles)}"
)
def test_all_lines_have_role(self, simple_panel_pdf):
"""All classified lines have a non-None role."""
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
for line in result:
assert line.role is not None
assert isinstance(line.role, LineRole)
def test_confidence_between_0_and_1(self, simple_panel_pdf):
"""Confidence values between 0 and 1."""
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
for line in result:
assert 0.0 <= line.confidence <= 1.0
def test_dashed_lines_classified_hidden(self, simple_panel_pdf):
"""Dashed paths should be classified as HIDDEN."""
doc = pymupdf.open(str(simple_panel_pdf))
extraction = extract_geometry(doc[0])
dashed = [p for p in extraction.paths if _parse_dashes(p.dashes) is not None]
if dashed:
classified = classify_lines(dashed)
for c in classified:
assert c.role in (LineRole.HIDDEN, LineRole.CENTER), (
f"Dashed line classified as {c.role}"
)
def test_all_fixtures_processable(self, all_fixture_pdfs):
"""All fixture PDFs can be classified without error."""
for pdf_path in all_fixture_pdfs:
doc = pymupdf.open(str(pdf_path))
extraction = extract_geometry(doc[0])
result = classify_lines(list(extraction.paths))
assert len(result) > 0, f"No classified lines for {pdf_path.name}"