131 lines
5.3 KiB
Python
131 lines
5.3 KiB
Python
"""Tests for dimension extraction."""
|
|
|
|
import pytest
|
|
import pymupdf
|
|
from pathlib import Path
|
|
|
|
from pdf2imos.extract.geometry import extract_geometry
|
|
from pdf2imos.extract.text import extract_text
|
|
from pdf2imos.interpret.title_block import detect_title_block
|
|
from pdf2imos.interpret.view_segmenter import segment_views
|
|
from pdf2imos.interpret.line_classifier import classify_lines
|
|
from pdf2imos.parse.dimensions import extract_dimensions
|
|
from pdf2imos.models import (
|
|
PageExtraction,
|
|
ViewType,
|
|
DimensionAnnotation,
|
|
DimensionDirection,
|
|
)
|
|
|
|
|
|
def make_pipeline(pdf_path):
|
|
"""Run full pipeline up to dimension extraction."""
|
|
doc = pymupdf.open(str(pdf_path))
|
|
page = doc[0]
|
|
page_height = page.rect.height
|
|
|
|
geo = extract_geometry(page)
|
|
texts = extract_text(page)
|
|
extraction = PageExtraction(
|
|
paths=geo.paths,
|
|
texts=tuple(texts),
|
|
page_width=geo.page_width,
|
|
page_height=page_height,
|
|
)
|
|
_, filtered = detect_title_block(extraction)
|
|
views = segment_views(filtered)
|
|
|
|
return views, page_height
|
|
|
|
|
|
class TestExtractDimensions:
|
|
def test_returns_list(self, simple_panel_pdf):
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
if not views:
|
|
pytest.skip("No views detected")
|
|
view = views[0]
|
|
classified = classify_lines(list(view.paths))
|
|
result = extract_dimensions(view, classified, page_height)
|
|
assert isinstance(result, list)
|
|
|
|
def test_dimension_annotations_type(self, simple_panel_pdf):
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
if not views:
|
|
pytest.skip("No views detected")
|
|
view = views[0]
|
|
classified = classify_lines(list(view.paths))
|
|
result = extract_dimensions(view, classified, page_height)
|
|
assert all(isinstance(d, DimensionAnnotation) for d in result)
|
|
|
|
def test_finds_dimensions_in_largest_view(self, simple_panel_pdf):
|
|
"""The largest view (by text count) should have dimension values."""
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
if not views:
|
|
pytest.skip("No views detected")
|
|
# Pick the view with the most texts (most likely the main dimensioned view)
|
|
main_view = max(views, key=lambda v: len(v.texts))
|
|
if not main_view.texts:
|
|
pytest.skip("No texts in any view")
|
|
classified = classify_lines(list(main_view.paths))
|
|
result = extract_dimensions(main_view, classified, page_height)
|
|
assert len(result) > 0, (
|
|
f"No dimensions found in {main_view.view_type.value} view "
|
|
f"({len(main_view.texts)} texts, {len(main_view.paths)} paths)"
|
|
)
|
|
|
|
def test_dimension_values_reasonable(self, simple_panel_pdf):
|
|
"""Dimension values should be positive and reasonable (1-3000mm range)."""
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
for view in views:
|
|
classified = classify_lines(list(view.paths))
|
|
dims = extract_dimensions(view, classified, page_height)
|
|
for d in dims:
|
|
assert d.value_mm > 0, f"Negative dimension: {d.value_mm}"
|
|
assert d.value_mm < 10000, f"Unreasonably large dimension: {d.value_mm}"
|
|
|
|
def test_direction_is_enum(self, simple_panel_pdf):
|
|
"""Direction field is a DimensionDirection enum value."""
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
for view in views:
|
|
classified = classify_lines(list(view.paths))
|
|
dims = extract_dimensions(view, classified, page_height)
|
|
for d in dims:
|
|
assert isinstance(d.direction, DimensionDirection)
|
|
|
|
def test_finds_600mm_or_720mm_dimension(self, simple_panel_pdf):
|
|
"""simple_panel.pdf front view should have 600 or 720mm dimensions."""
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
all_dims = []
|
|
for view in views:
|
|
classified = classify_lines(list(view.paths))
|
|
all_dims.extend(extract_dimensions(view, classified, page_height))
|
|
|
|
values = {d.value_mm for d in all_dims}
|
|
# At least one of the main panel dimensions should be found
|
|
assert any(
|
|
580 <= v <= 620 or 700 <= v <= 740 or 15 <= v <= 21 for v in values
|
|
), f"No expected dimension found in: {sorted(values)}"
|
|
|
|
def test_all_fixtures_processable(self, all_fixture_pdfs):
|
|
"""All fixture PDFs process without error."""
|
|
for pdf_path in all_fixture_pdfs:
|
|
views, page_height = make_pipeline(pdf_path)
|
|
for view in views:
|
|
classified = classify_lines(list(view.paths))
|
|
dims = extract_dimensions(view, classified, page_height)
|
|
assert isinstance(dims, list)
|
|
|
|
def test_horizontal_vertical_present(self, simple_panel_pdf):
|
|
"""Both H and V dimensions expected in a panel drawing."""
|
|
views, page_height = make_pipeline(simple_panel_pdf)
|
|
all_dims = []
|
|
for view in views:
|
|
classified = classify_lines(list(view.paths))
|
|
all_dims.extend(extract_dimensions(view, classified, page_height))
|
|
|
|
if not all_dims:
|
|
pytest.skip("No dimensions extracted")
|
|
directions = {d.direction for d in all_dims}
|
|
# Should have at least one direction type
|
|
assert len(directions) > 0
|