feat: pdf2cad
This commit is contained in:
79
tests/test_title_block.py
Normal file
79
tests/test_title_block.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Tests for title block detection and exclusion."""
|
||||
import pytest
|
||||
import pymupdf
|
||||
from pathlib import Path
|
||||
from pdf2imos.extract.geometry import extract_geometry
|
||||
from pdf2imos.extract.text import extract_text
|
||||
from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info
|
||||
from pdf2imos.models import PageExtraction
|
||||
|
||||
|
||||
def make_extraction(pdf_path: Path) -> PageExtraction:
|
||||
"""Create a PageExtraction from a PDF path."""
|
||||
doc = pymupdf.open(str(pdf_path))
|
||||
page = doc[0]
|
||||
geo = extract_geometry(page)
|
||||
texts = extract_text(page)
|
||||
return PageExtraction(
|
||||
paths=geo.paths,
|
||||
texts=tuple(texts),
|
||||
page_width=geo.page_width,
|
||||
page_height=geo.page_height,
|
||||
)
|
||||
|
||||
|
||||
class TestDetectTitleBlock:
|
||||
def test_title_block_detected(self, simple_panel_pdf):
|
||||
"""Title block should be detected in simple_panel.pdf."""
|
||||
extraction = make_extraction(simple_panel_pdf)
|
||||
title_rect, filtered = detect_title_block(extraction)
|
||||
assert title_rect is not None, "Title block not detected"
|
||||
|
||||
def test_title_rect_in_bottom_right(self, simple_panel_pdf):
|
||||
"""Title block rect should be in bottom-right quadrant."""
|
||||
extraction = make_extraction(simple_panel_pdf)
|
||||
title_rect, _ = detect_title_block(extraction)
|
||||
if title_rect is None:
|
||||
pytest.skip("Title block not detected")
|
||||
x0, y0, x1, y1 = title_rect
|
||||
cx = (x0 + x1) / 2
|
||||
cy = (y0 + y1) / 2
|
||||
# In CAD coords: center x should be > 40% of page width
|
||||
assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half"
|
||||
|
||||
def test_filtered_has_fewer_paths(self, simple_panel_pdf):
|
||||
"""After filtering, extraction should have fewer paths."""
|
||||
extraction = make_extraction(simple_panel_pdf)
|
||||
title_rect, filtered = detect_title_block(extraction)
|
||||
if title_rect is None:
|
||||
pytest.skip("Title block not detected")
|
||||
assert len(filtered.paths) < len(extraction.paths), \
|
||||
"No paths were removed during title block filtering"
|
||||
|
||||
def test_all_fixtures_process_without_crash(self, all_fixture_pdfs):
|
||||
"""All fixture PDFs can be processed without crashing."""
|
||||
for pdf_path in all_fixture_pdfs:
|
||||
extraction = make_extraction(pdf_path)
|
||||
title_rect, filtered = detect_title_block(extraction)
|
||||
# Either finds a title block or returns None gracefully
|
||||
assert isinstance(filtered, PageExtraction)
|
||||
|
||||
def test_returns_page_extraction_type(self, simple_panel_pdf):
|
||||
"""detect_title_block returns PageExtraction for filtered result."""
|
||||
extraction = make_extraction(simple_panel_pdf)
|
||||
_, filtered = detect_title_block(extraction)
|
||||
assert isinstance(filtered, PageExtraction)
|
||||
|
||||
|
||||
class TestExtractTitleBlockInfo:
|
||||
def test_extracts_info_dict(self, simple_panel_pdf):
|
||||
"""extract_title_block_info returns a dict."""
|
||||
extraction = make_extraction(simple_panel_pdf)
|
||||
title_rect, _ = detect_title_block(extraction)
|
||||
if title_rect is None:
|
||||
pytest.skip("Title block not detected")
|
||||
info = extract_title_block_info(extraction, title_rect)
|
||||
assert isinstance(info, dict)
|
||||
assert "part_name" in info
|
||||
assert "material" in info
|
||||
assert "scale" in info
|
||||
Reference in New Issue
Block a user