feat: pdf2cad

This commit is contained in:
2026-03-03 21:24:02 +00:00
commit 112213da6e
61 changed files with 7290 additions and 0 deletions

79
tests/test_title_block.py Normal file
View File

@@ -0,0 +1,79 @@
"""Tests for title block detection and exclusion."""
import pytest
import pymupdf
from pathlib import Path
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.extract.text import extract_text
from pdf2imos.interpret.title_block import detect_title_block, extract_title_block_info
from pdf2imos.models import PageExtraction
def make_extraction(pdf_path: Path) -> PageExtraction:
"""Create a PageExtraction from a PDF path."""
doc = pymupdf.open(str(pdf_path))
page = doc[0]
geo = extract_geometry(page)
texts = extract_text(page)
return PageExtraction(
paths=geo.paths,
texts=tuple(texts),
page_width=geo.page_width,
page_height=geo.page_height,
)
class TestDetectTitleBlock:
def test_title_block_detected(self, simple_panel_pdf):
"""Title block should be detected in simple_panel.pdf."""
extraction = make_extraction(simple_panel_pdf)
title_rect, filtered = detect_title_block(extraction)
assert title_rect is not None, "Title block not detected"
def test_title_rect_in_bottom_right(self, simple_panel_pdf):
"""Title block rect should be in bottom-right quadrant."""
extraction = make_extraction(simple_panel_pdf)
title_rect, _ = detect_title_block(extraction)
if title_rect is None:
pytest.skip("Title block not detected")
x0, y0, x1, y1 = title_rect
cx = (x0 + x1) / 2
cy = (y0 + y1) / 2
# In CAD coords: center x should be > 40% of page width
assert cx > extraction.page_width * 0.3, f"Title block center x={cx} not in right half"
def test_filtered_has_fewer_paths(self, simple_panel_pdf):
"""After filtering, extraction should have fewer paths."""
extraction = make_extraction(simple_panel_pdf)
title_rect, filtered = detect_title_block(extraction)
if title_rect is None:
pytest.skip("Title block not detected")
assert len(filtered.paths) < len(extraction.paths), \
"No paths were removed during title block filtering"
def test_all_fixtures_process_without_crash(self, all_fixture_pdfs):
"""All fixture PDFs can be processed without crashing."""
for pdf_path in all_fixture_pdfs:
extraction = make_extraction(pdf_path)
title_rect, filtered = detect_title_block(extraction)
# Either finds a title block or returns None gracefully
assert isinstance(filtered, PageExtraction)
def test_returns_page_extraction_type(self, simple_panel_pdf):
"""detect_title_block returns PageExtraction for filtered result."""
extraction = make_extraction(simple_panel_pdf)
_, filtered = detect_title_block(extraction)
assert isinstance(filtered, PageExtraction)
class TestExtractTitleBlockInfo:
def test_extracts_info_dict(self, simple_panel_pdf):
"""extract_title_block_info returns a dict."""
extraction = make_extraction(simple_panel_pdf)
title_rect, _ = detect_title_block(extraction)
if title_rect is None:
pytest.skip("Title block not detected")
info = extract_title_block_info(extraction, title_rect)
assert isinstance(info, dict)
assert "part_name" in info
assert "material" in info
assert "scale" in info