"""Tests for title block detection and exclusion.""" import pytest import pymupdf from pathlib import Path from pdf2imos.extract.geometry import extract_geometry from pdf2imos.extract.text import extract_text from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info from pdf2imos.models import PageExtraction def make_extraction(pdf_path: Path) -> PageExtraction: """Create a PageExtraction from a PDF path.""" doc = pymupdf.open(str(pdf_path)) page = doc[0] geo = extract_geometry(page) texts = extract_text(page) return PageExtraction( paths=geo.paths, texts=tuple(texts), page_width=geo.page_width, page_height=geo.page_height, ) class TestDetectTitleBlock: def test_title_block_detected(self, simple_panel_pdf): """Title block should be detected in simple_panel.pdf.""" extraction = make_extraction(simple_panel_pdf) title_rect, filtered = detect_title_block(extraction) assert title_rect is not None, "Title block not detected" def test_title_rect_in_bottom_right(self, simple_panel_pdf): """Title block rect should be in bottom-right quadrant.""" extraction = make_extraction(simple_panel_pdf) title_rect, _ = detect_title_block(extraction) if title_rect is None: pytest.skip("Title block not detected") x0, y0, x1, y1 = title_rect cx = (x0 + x1) / 2 cy = (y0 + y1) / 2 # In CAD coords: center x should be > 40% of page width assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half" def test_filtered_has_fewer_paths(self, simple_panel_pdf): """After filtering, extraction should have fewer paths.""" extraction = make_extraction(simple_panel_pdf) title_rect, filtered = detect_title_block(extraction) if title_rect is None: pytest.skip("Title block not detected") assert len(filtered.paths) < len(extraction.paths), \ "No paths were removed during title block filtering" def test_all_fixtures_process_without_crash(self, all_fixture_pdfs): """All fixture PDFs can be processed without crashing.""" for pdf_path in all_fixture_pdfs: extraction = make_extraction(pdf_path) title_rect, filtered = detect_title_block(extraction) # Either finds a title block or returns None gracefully assert isinstance(filtered, PageExtraction) def test_returns_page_extraction_type(self, simple_panel_pdf): """detect_title_block returns PageExtraction for filtered result.""" extraction = make_extraction(simple_panel_pdf) _, filtered = detect_title_block(extraction) assert isinstance(filtered, PageExtraction) class TestExtractTitleBlockInfo: def test_extracts_info_dict(self, simple_panel_pdf): """extract_title_block_info returns a dict.""" extraction = make_extraction(simple_panel_pdf) title_rect, _ = detect_title_block(extraction) if title_rect is None: pytest.skip("Title block not detected") info = extract_title_block_info(extraction, title_rect) assert isinstance(info, dict) assert "part_name" in info assert "material" in info assert "scale" in info