feat: pdf2cad
This commit is contained in:
130
tests/test_dimension_extractor.py
Normal file
130
tests/test_dimension_extractor.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""Tests for dimension extraction."""
|
||||
|
||||
import pytest
|
||||
import pymupdf
|
||||
from pathlib import Path
|
||||
|
||||
from pdf2imos.extract.geometry import extract_geometry
|
||||
from pdf2imos.extract.text import extract_text
|
||||
from pdf2imos.interpret.title_block import detect_title_block
|
||||
from pdf2imos.interpret.view_segmenter import segment_views
|
||||
from pdf2imos.interpret.line_classifier import classify_lines
|
||||
from pdf2imos.parse.dimensions import extract_dimensions
|
||||
from pdf2imos.models import (
|
||||
PageExtraction,
|
||||
ViewType,
|
||||
DimensionAnnotation,
|
||||
DimensionDirection,
|
||||
)
|
||||
|
||||
|
||||
def make_pipeline(pdf_path):
|
||||
"""Run full pipeline up to dimension extraction."""
|
||||
doc = pymupdf.open(str(pdf_path))
|
||||
page = doc[0]
|
||||
page_height = page.rect.height
|
||||
|
||||
geo = extract_geometry(page)
|
||||
texts = extract_text(page)
|
||||
extraction = PageExtraction(
|
||||
paths=geo.paths,
|
||||
texts=tuple(texts),
|
||||
page_width=geo.page_width,
|
||||
page_height=page_height,
|
||||
)
|
||||
_, filtered = detect_title_block(extraction)
|
||||
views = segment_views(filtered)
|
||||
|
||||
return views, page_height
|
||||
|
||||
|
||||
class TestExtractDimensions:
|
||||
def test_returns_list(self, simple_panel_pdf):
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
if not views:
|
||||
pytest.skip("No views detected")
|
||||
view = views[0]
|
||||
classified = classify_lines(list(view.paths))
|
||||
result = extract_dimensions(view, classified, page_height)
|
||||
assert isinstance(result, list)
|
||||
|
||||
def test_dimension_annotations_type(self, simple_panel_pdf):
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
if not views:
|
||||
pytest.skip("No views detected")
|
||||
view = views[0]
|
||||
classified = classify_lines(list(view.paths))
|
||||
result = extract_dimensions(view, classified, page_height)
|
||||
assert all(isinstance(d, DimensionAnnotation) for d in result)
|
||||
|
||||
def test_finds_dimensions_in_largest_view(self, simple_panel_pdf):
|
||||
"""The largest view (by text count) should have dimension values."""
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
if not views:
|
||||
pytest.skip("No views detected")
|
||||
# Pick the view with the most texts (most likely the main dimensioned view)
|
||||
main_view = max(views, key=lambda v: len(v.texts))
|
||||
if not main_view.texts:
|
||||
pytest.skip("No texts in any view")
|
||||
classified = classify_lines(list(main_view.paths))
|
||||
result = extract_dimensions(main_view, classified, page_height)
|
||||
assert len(result) > 0, (
|
||||
f"No dimensions found in {main_view.view_type.value} view "
|
||||
f"({len(main_view.texts)} texts, {len(main_view.paths)} paths)"
|
||||
)
|
||||
|
||||
def test_dimension_values_reasonable(self, simple_panel_pdf):
|
||||
"""Dimension values should be positive and reasonable (1-3000mm range)."""
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
for view in views:
|
||||
classified = classify_lines(list(view.paths))
|
||||
dims = extract_dimensions(view, classified, page_height)
|
||||
for d in dims:
|
||||
assert d.value_mm > 0, f"Negative dimension: {d.value_mm}"
|
||||
assert d.value_mm < 10000, f"Unreasonably large dimension: {d.value_mm}"
|
||||
|
||||
def test_direction_is_enum(self, simple_panel_pdf):
|
||||
"""Direction field is a DimensionDirection enum value."""
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
for view in views:
|
||||
classified = classify_lines(list(view.paths))
|
||||
dims = extract_dimensions(view, classified, page_height)
|
||||
for d in dims:
|
||||
assert isinstance(d.direction, DimensionDirection)
|
||||
|
||||
def test_finds_600mm_or_720mm_dimension(self, simple_panel_pdf):
|
||||
"""simple_panel.pdf front view should have 600 or 720mm dimensions."""
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
all_dims = []
|
||||
for view in views:
|
||||
classified = classify_lines(list(view.paths))
|
||||
all_dims.extend(extract_dimensions(view, classified, page_height))
|
||||
|
||||
values = {d.value_mm for d in all_dims}
|
||||
# At least one of the main panel dimensions should be found
|
||||
assert any(
|
||||
580 <= v <= 620 or 700 <= v <= 740 or 15 <= v <= 21 for v in values
|
||||
), f"No expected dimension found in: {sorted(values)}"
|
||||
|
||||
def test_all_fixtures_processable(self, all_fixture_pdfs):
|
||||
"""All fixture PDFs process without error."""
|
||||
for pdf_path in all_fixture_pdfs:
|
||||
views, page_height = make_pipeline(pdf_path)
|
||||
for view in views:
|
||||
classified = classify_lines(list(view.paths))
|
||||
dims = extract_dimensions(view, classified, page_height)
|
||||
assert isinstance(dims, list)
|
||||
|
||||
def test_horizontal_vertical_present(self, simple_panel_pdf):
|
||||
"""Both H and V dimensions expected in a panel drawing."""
|
||||
views, page_height = make_pipeline(simple_panel_pdf)
|
||||
all_dims = []
|
||||
for view in views:
|
||||
classified = classify_lines(list(view.paths))
|
||||
all_dims.extend(extract_dimensions(view, classified, page_height))
|
||||
|
||||
if not all_dims:
|
||||
pytest.skip("No dimensions extracted")
|
||||
directions = {d.direction for d in all_dims}
|
||||
# Should have at least one direction type
|
||||
assert len(directions) > 0
|
||||
Reference in New Issue
Block a user