"""Tests for dimension extraction.""" import pytest import pymupdf from pathlib import Path from pdf2imos.extract.geometry import extract_geometry from pdf2imos.extract.text import extract_text from pdf2imos.interpret.title_block import detect_title_block from pdf2imos.interpret.view_segmenter import segment_views from pdf2imos.interpret.line_classifier import classify_lines from pdf2imos.parse.dimensions import extract_dimensions from pdf2imos.models import ( PageExtraction, ViewType, DimensionAnnotation, DimensionDirection, ) def make_pipeline(pdf_path): """Run full pipeline up to dimension extraction.""" doc = pymupdf.open(str(pdf_path)) page = doc[0] page_height = page.rect.height geo = extract_geometry(page) texts = extract_text(page) extraction = PageExtraction( paths=geo.paths, texts=tuple(texts), page_width=geo.page_width, page_height=page_height, ) _, filtered = detect_title_block(extraction) views = segment_views(filtered) return views, page_height class TestExtractDimensions: def test_returns_list(self, simple_panel_pdf): views, page_height = make_pipeline(simple_panel_pdf) if not views: pytest.skip("No views detected") view = views[0] classified = classify_lines(list(view.paths)) result = extract_dimensions(view, classified, page_height) assert isinstance(result, list) def test_dimension_annotations_type(self, simple_panel_pdf): views, page_height = make_pipeline(simple_panel_pdf) if not views: pytest.skip("No views detected") view = views[0] classified = classify_lines(list(view.paths)) result = extract_dimensions(view, classified, page_height) assert all(isinstance(d, DimensionAnnotation) for d in result) def test_finds_dimensions_in_largest_view(self, simple_panel_pdf): """The largest view (by text count) should have dimension values.""" views, page_height = make_pipeline(simple_panel_pdf) if not views: pytest.skip("No views detected") # Pick the view with the most texts (most likely the main dimensioned view) main_view = max(views, key=lambda v: len(v.texts)) if not main_view.texts: pytest.skip("No texts in any view") classified = classify_lines(list(main_view.paths)) result = extract_dimensions(main_view, classified, page_height) assert len(result) > 0, ( f"No dimensions found in {main_view.view_type.value} view " f"({len(main_view.texts)} texts, {len(main_view.paths)} paths)" ) def test_dimension_values_reasonable(self, simple_panel_pdf): """Dimension values should be positive and reasonable (1-3000mm range).""" views, page_height = make_pipeline(simple_panel_pdf) for view in views: classified = classify_lines(list(view.paths)) dims = extract_dimensions(view, classified, page_height) for d in dims: assert d.value_mm > 0, f"Negative dimension: {d.value_mm}" assert d.value_mm < 10000, f"Unreasonably large dimension: {d.value_mm}" def test_direction_is_enum(self, simple_panel_pdf): """Direction field is a DimensionDirection enum value.""" views, page_height = make_pipeline(simple_panel_pdf) for view in views: classified = classify_lines(list(view.paths)) dims = extract_dimensions(view, classified, page_height) for d in dims: assert isinstance(d.direction, DimensionDirection) def test_finds_600mm_or_720mm_dimension(self, simple_panel_pdf): """simple_panel.pdf front view should have 600 or 720mm dimensions.""" views, page_height = make_pipeline(simple_panel_pdf) all_dims = [] for view in views: classified = classify_lines(list(view.paths)) all_dims.extend(extract_dimensions(view, classified, page_height)) values = {d.value_mm for d in all_dims} # At least one of the main panel dimensions should be found assert any( 580 <= v <= 620 or 700 <= v <= 740 or 15 <= v <= 21 for v in values ), f"No expected dimension found in: {sorted(values)}" def test_all_fixtures_processable(self, all_fixture_pdfs): """All fixture PDFs process without error.""" for pdf_path in all_fixture_pdfs: views, page_height = make_pipeline(pdf_path) for view in views: classified = classify_lines(list(view.paths)) dims = extract_dimensions(view, classified, page_height) assert isinstance(dims, list) def test_horizontal_vertical_present(self, simple_panel_pdf): """Both H and V dimensions expected in a panel drawing.""" views, page_height = make_pipeline(simple_panel_pdf) all_dims = [] for view in views: classified = classify_lines(list(view.paths)) all_dims.extend(extract_dimensions(view, classified, page_height)) if not all_dims: pytest.skip("No dimensions extracted") directions = {d.direction for d in all_dims} # Should have at least one direction type assert len(directions) > 0