Files
pdf2cad/tests/test_dimension_extractor.py
2026-03-03 21:24:02 +00:00

131 lines
5.3 KiB
Python

"""Tests for dimension extraction."""
import pytest
import pymupdf
from pathlib import Path
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.extract.text import extract_text
from pdf2imos.interpret.title_block import detect_title_block
from pdf2imos.interpret.view_segmenter import segment_views
from pdf2imos.interpret.line_classifier import classify_lines
from pdf2imos.parse.dimensions import extract_dimensions
from pdf2imos.models import (
PageExtraction,
ViewType,
DimensionAnnotation,
DimensionDirection,
)
def make_pipeline(pdf_path):
"""Run full pipeline up to dimension extraction."""
doc = pymupdf.open(str(pdf_path))
page = doc[0]
page_height = page.rect.height
geo = extract_geometry(page)
texts = extract_text(page)
extraction = PageExtraction(
paths=geo.paths,
texts=tuple(texts),
page_width=geo.page_width,
page_height=page_height,
)
_, filtered = detect_title_block(extraction)
views = segment_views(filtered)
return views, page_height
class TestExtractDimensions:
def test_returns_list(self, simple_panel_pdf):
views, page_height = make_pipeline(simple_panel_pdf)
if not views:
pytest.skip("No views detected")
view = views[0]
classified = classify_lines(list(view.paths))
result = extract_dimensions(view, classified, page_height)
assert isinstance(result, list)
def test_dimension_annotations_type(self, simple_panel_pdf):
views, page_height = make_pipeline(simple_panel_pdf)
if not views:
pytest.skip("No views detected")
view = views[0]
classified = classify_lines(list(view.paths))
result = extract_dimensions(view, classified, page_height)
assert all(isinstance(d, DimensionAnnotation) for d in result)
def test_finds_dimensions_in_largest_view(self, simple_panel_pdf):
"""The largest view (by text count) should have dimension values."""
views, page_height = make_pipeline(simple_panel_pdf)
if not views:
pytest.skip("No views detected")
# Pick the view with the most texts (most likely the main dimensioned view)
main_view = max(views, key=lambda v: len(v.texts))
if not main_view.texts:
pytest.skip("No texts in any view")
classified = classify_lines(list(main_view.paths))
result = extract_dimensions(main_view, classified, page_height)
assert len(result) > 0, (
f"No dimensions found in {main_view.view_type.value} view "
f"({len(main_view.texts)} texts, {len(main_view.paths)} paths)"
)
def test_dimension_values_reasonable(self, simple_panel_pdf):
"""Dimension values should be positive and reasonable (1-3000mm range)."""
views, page_height = make_pipeline(simple_panel_pdf)
for view in views:
classified = classify_lines(list(view.paths))
dims = extract_dimensions(view, classified, page_height)
for d in dims:
assert d.value_mm > 0, f"Negative dimension: {d.value_mm}"
assert d.value_mm < 10000, f"Unreasonably large dimension: {d.value_mm}"
def test_direction_is_enum(self, simple_panel_pdf):
"""Direction field is a DimensionDirection enum value."""
views, page_height = make_pipeline(simple_panel_pdf)
for view in views:
classified = classify_lines(list(view.paths))
dims = extract_dimensions(view, classified, page_height)
for d in dims:
assert isinstance(d.direction, DimensionDirection)
def test_finds_600mm_or_720mm_dimension(self, simple_panel_pdf):
"""simple_panel.pdf front view should have 600 or 720mm dimensions."""
views, page_height = make_pipeline(simple_panel_pdf)
all_dims = []
for view in views:
classified = classify_lines(list(view.paths))
all_dims.extend(extract_dimensions(view, classified, page_height))
values = {d.value_mm for d in all_dims}
# At least one of the main panel dimensions should be found
assert any(
580 <= v <= 620 or 700 <= v <= 740 or 15 <= v <= 21 for v in values
), f"No expected dimension found in: {sorted(values)}"
def test_all_fixtures_processable(self, all_fixture_pdfs):
"""All fixture PDFs process without error."""
for pdf_path in all_fixture_pdfs:
views, page_height = make_pipeline(pdf_path)
for view in views:
classified = classify_lines(list(view.paths))
dims = extract_dimensions(view, classified, page_height)
assert isinstance(dims, list)
def test_horizontal_vertical_present(self, simple_panel_pdf):
"""Both H and V dimensions expected in a panel drawing."""
views, page_height = make_pipeline(simple_panel_pdf)
all_dims = []
for view in views:
classified = classify_lines(list(view.paths))
all_dims.extend(extract_dimensions(view, classified, page_height))
if not all_dims:
pytest.skip("No dimensions extracted")
directions = {d.direction for d in all_dims}
# Should have at least one direction type
assert len(directions) > 0