80 lines
3.3 KiB
Python
80 lines
3.3 KiB
Python
"""Tests for title block detection and exclusion."""
|
|
import pytest
|
|
import pymupdf
|
|
from pathlib import Path
|
|
from pdf2imos.extract.geometry import extract_geometry
|
|
from pdf2imos.extract.text import extract_text
|
|
from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info
|
|
from pdf2imos.models import PageExtraction
|
|
|
|
|
|
def make_extraction(pdf_path: Path) -> PageExtraction:
|
|
"""Create a PageExtraction from a PDF path."""
|
|
doc = pymupdf.open(str(pdf_path))
|
|
page = doc[0]
|
|
geo = extract_geometry(page)
|
|
texts = extract_text(page)
|
|
return PageExtraction(
|
|
paths=geo.paths,
|
|
texts=tuple(texts),
|
|
page_width=geo.page_width,
|
|
page_height=geo.page_height,
|
|
)
|
|
|
|
|
|
class TestDetectTitleBlock:
|
|
def test_title_block_detected(self, simple_panel_pdf):
|
|
"""Title block should be detected in simple_panel.pdf."""
|
|
extraction = make_extraction(simple_panel_pdf)
|
|
title_rect, filtered = detect_title_block(extraction)
|
|
assert title_rect is not None, "Title block not detected"
|
|
|
|
def test_title_rect_in_bottom_right(self, simple_panel_pdf):
|
|
"""Title block rect should be in bottom-right quadrant."""
|
|
extraction = make_extraction(simple_panel_pdf)
|
|
title_rect, _ = detect_title_block(extraction)
|
|
if title_rect is None:
|
|
pytest.skip("Title block not detected")
|
|
x0, y0, x1, y1 = title_rect
|
|
cx = (x0 + x1) / 2
|
|
cy = (y0 + y1) / 2
|
|
# In CAD coords: center x should be > 40% of page width
|
|
assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half"
|
|
|
|
def test_filtered_has_fewer_paths(self, simple_panel_pdf):
|
|
"""After filtering, extraction should have fewer paths."""
|
|
extraction = make_extraction(simple_panel_pdf)
|
|
title_rect, filtered = detect_title_block(extraction)
|
|
if title_rect is None:
|
|
pytest.skip("Title block not detected")
|
|
assert len(filtered.paths) < len(extraction.paths), \
|
|
"No paths were removed during title block filtering"
|
|
|
|
def test_all_fixtures_process_without_crash(self, all_fixture_pdfs):
|
|
"""All fixture PDFs can be processed without crashing."""
|
|
for pdf_path in all_fixture_pdfs:
|
|
extraction = make_extraction(pdf_path)
|
|
title_rect, filtered = detect_title_block(extraction)
|
|
# Either finds a title block or returns None gracefully
|
|
assert isinstance(filtered, PageExtraction)
|
|
|
|
def test_returns_page_extraction_type(self, simple_panel_pdf):
|
|
"""detect_title_block returns PageExtraction for filtered result."""
|
|
extraction = make_extraction(simple_panel_pdf)
|
|
_, filtered = detect_title_block(extraction)
|
|
assert isinstance(filtered, PageExtraction)
|
|
|
|
|
|
class TestExtractTitleBlockInfo:
|
|
def test_extracts_info_dict(self, simple_panel_pdf):
|
|
"""extract_title_block_info returns a dict."""
|
|
extraction = make_extraction(simple_panel_pdf)
|
|
title_rect, _ = detect_title_block(extraction)
|
|
if title_rect is None:
|
|
pytest.skip("Title block not detected")
|
|
info = extract_title_block_info(extraction, title_rect)
|
|
assert isinstance(info, dict)
|
|
assert "part_name" in info
|
|
assert "material" in info
|
|
assert "scale" in info
|