pdf2cad/tests/test_title_block.py

"""Tests for title block detection and exclusion."""
import pytest
import pymupdf
from pathlib import Path
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.extract.text import extract_text
from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info
from pdf2imos.models import PageExtraction


def make_extraction(pdf_path: Path) -> PageExtraction:
    """Create a PageExtraction from a PDF path."""
    doc = pymupdf.open(str(pdf_path))
    page = doc[0]
    geo = extract_geometry(page)
    texts = extract_text(page)
    return PageExtraction(
        paths=geo.paths,
        texts=tuple(texts),
        page_width=geo.page_width,
        page_height=geo.page_height,
    )


class TestDetectTitleBlock:
    def test_title_block_detected(self, simple_panel_pdf):
        """Title block should be detected in simple_panel.pdf."""
        extraction = make_extraction(simple_panel_pdf)
        title_rect, filtered = detect_title_block(extraction)
        assert title_rect is not None, "Title block not detected"

    def test_title_rect_in_bottom_right(self, simple_panel_pdf):
        """Title block rect should be in bottom-right quadrant."""
        extraction = make_extraction(simple_panel_pdf)
        title_rect, _ = detect_title_block(extraction)
        if title_rect is None:
            pytest.skip("Title block not detected")
        x0, y0, x1, y1 = title_rect
        cx = (x0 + x1) / 2
        cy = (y0 + y1) / 2
        # In CAD coords: center x should be > 40% of page width
        assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half"

    def test_filtered_has_fewer_paths(self, simple_panel_pdf):
        """After filtering, extraction should have fewer paths."""
        extraction = make_extraction(simple_panel_pdf)
        title_rect, filtered = detect_title_block(extraction)
        if title_rect is None:
            pytest.skip("Title block not detected")
        assert len(filtered.paths) < len(extraction.paths), \
            "No paths were removed during title block filtering"

    def test_all_fixtures_process_without_crash(self, all_fixture_pdfs):
        """All fixture PDFs can be processed without crashing."""
        for pdf_path in all_fixture_pdfs:
            extraction = make_extraction(pdf_path)
            title_rect, filtered = detect_title_block(extraction)
            # Either finds a title block or returns None gracefully
            assert isinstance(filtered, PageExtraction)

    def test_returns_page_extraction_type(self, simple_panel_pdf):
        """detect_title_block returns PageExtraction for filtered result."""
        extraction = make_extraction(simple_panel_pdf)
        _, filtered = detect_title_block(extraction)
        assert isinstance(filtered, PageExtraction)


class TestExtractTitleBlockInfo:
    def test_extracts_info_dict(self, simple_panel_pdf):
        """extract_title_block_info returns a dict."""
        extraction = make_extraction(simple_panel_pdf)
        title_rect, _ = detect_title_block(extraction)
        if title_rect is None:
            pytest.skip("Title block not detected")
        info = extract_title_block_info(extraction, title_rect)
        assert isinstance(info, dict)
        assert "part_name" in info
        assert "material" in info
        assert "scale" in info