feat: pdf2cad

2026-03-03 21:24:02 +00:00
commit 112213da6e
61 changed files with 7290 additions and 0 deletions
--- a/tests/test_title_block.py
+++ b/tests/test_title_block.py
@@ -0,0 +1,79 @@
+"""Tests for title block detection and exclusion."""
+import pytest
+import pymupdf
+from pathlib import Path
+from pdf2imos.extract.geometry import extract_geometry
+from pdf2imos.extract.text import extract_text
+from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info
+from pdf2imos.models import PageExtraction
+
+
+def make_extraction(pdf_path: Path) -> PageExtraction:
+    """Create a PageExtraction from a PDF path."""
+    doc = pymupdf.open(str(pdf_path))
+    page = doc[0]
+    geo = extract_geometry(page)
+    texts = extract_text(page)
+    return PageExtraction(
+        paths=geo.paths,
+        texts=tuple(texts),
+        page_width=geo.page_width,
+        page_height=geo.page_height,
+    )
+
+
+class TestDetectTitleBlock:
+    def test_title_block_detected(self, simple_panel_pdf):
+        """Title block should be detected in simple_panel.pdf."""
+        extraction = make_extraction(simple_panel_pdf)
+        title_rect, filtered = detect_title_block(extraction)
+        assert title_rect is not None, "Title block not detected"
+    
+    def test_title_rect_in_bottom_right(self, simple_panel_pdf):
+        """Title block rect should be in bottom-right quadrant."""
+        extraction = make_extraction(simple_panel_pdf)
+        title_rect, _ = detect_title_block(extraction)
+        if title_rect is None:
+            pytest.skip("Title block not detected")
+        x0, y0, x1, y1 = title_rect
+        cx = (x0 + x1) / 2
+        cy = (y0 + y1) / 2
+        # In CAD coords: center x should be > 40% of page width
+        assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half"
+    
+    def test_filtered_has_fewer_paths(self, simple_panel_pdf):
+        """After filtering, extraction should have fewer paths."""
+        extraction = make_extraction(simple_panel_pdf)
+        title_rect, filtered = detect_title_block(extraction)
+        if title_rect is None:
+            pytest.skip("Title block not detected")
+        assert len(filtered.paths) < len(extraction.paths), \
+            "No paths were removed during title block filtering"
+    
+    def test_all_fixtures_process_without_crash(self, all_fixture_pdfs):
+        """All fixture PDFs can be processed without crashing."""
+        for pdf_path in all_fixture_pdfs:
+            extraction = make_extraction(pdf_path)
+            title_rect, filtered = detect_title_block(extraction)
+            # Either finds a title block or returns None gracefully
+            assert isinstance(filtered, PageExtraction)
+    
+    def test_returns_page_extraction_type(self, simple_panel_pdf):
+        """detect_title_block returns PageExtraction for filtered result."""
+        extraction = make_extraction(simple_panel_pdf)
+        _, filtered = detect_title_block(extraction)
+        assert isinstance(filtered, PageExtraction)
+
+
+class TestExtractTitleBlockInfo:
+    def test_extracts_info_dict(self, simple_panel_pdf):
+        """extract_title_block_info returns a dict."""
+        extraction = make_extraction(simple_panel_pdf)
+        title_rect, _ = detect_title_block(extraction)
+        if title_rect is None:
+            pytest.skip("Title block not detected")
+        info = extract_title_block_info(extraction, title_rect)
+        assert isinstance(info, dict)
+        assert "part_name" in info
+        assert "material" in info
+        assert "scale" in info