feat: pdf2cad

This commit is contained in:
2026-03-03 21:24:02 +00:00
commit 112213da6e
61 changed files with 7290 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
"""Tests for PDF vector geometry extraction."""
import pytest
import pymupdf
from pathlib import Path
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.models import PageExtraction, RawPath
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "input"
class TestExtractGeometry:
def test_returns_page_extraction(self, simple_panel_pdf):
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
assert isinstance(result, PageExtraction)
def test_paths_are_raw_path_objects(self, simple_panel_pdf):
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
assert all(isinstance(p, RawPath) for p in result.paths)
def test_extracts_sufficient_paths(self, simple_panel_pdf):
"""simple_panel.pdf should have >10 paths."""
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
assert len(result.paths) > 10, f"Expected >10 paths, got {len(result.paths)}"
def test_dashes_extracted_correctly(self, simple_panel_pdf):
"""Solid lines have empty dashes, dashed lines have non-empty dashes."""
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
solid = [p for p in result.paths if not p.dashes]
# Should have at least some solid lines (geometry outline)
assert len(solid) > 0, "No solid lines found"
def test_y_coordinates_flipped(self, simple_panel_pdf):
"""After y-flip, rect y0 should be >= 0 and <= page_height."""
doc = pymupdf.open(str(simple_panel_pdf))
page = doc[0]
result = extract_geometry(page)
page_h = result.page_height
for p in result.paths:
x0, y0, x1, y1 = p.rect
assert y0 >= -0.1, f"y0 negative: {y0}"
assert y1 <= page_h + 0.1, f"y1 > page_height: {y1}"
def test_texts_empty_in_result(self, simple_panel_pdf):
"""extract_geometry returns empty texts (text extracted separately)."""
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
assert result.texts == (), "extract_geometry should return empty texts"
def test_page_dimensions_stored(self, simple_panel_pdf):
"""Page width and height stored correctly."""
doc = pymupdf.open(str(simple_panel_pdf))
page = doc[0]
result = extract_geometry(page)
assert result.page_width == pytest.approx(page.rect.width)
assert result.page_height == pytest.approx(page.rect.height)
def test_all_fixtures_extractable(self, all_fixture_pdfs):
"""All fixture PDFs can be extracted without error."""
for pdf_path in all_fixture_pdfs:
doc = pymupdf.open(str(pdf_path))
result = extract_geometry(doc[0])
assert len(result.paths) > 0, f"No paths in {pdf_path.name}"
def test_width_stored_in_rawpath(self, simple_panel_pdf):
"""RawPath.width field populated."""
doc = pymupdf.open(str(simple_panel_pdf))
result = extract_geometry(doc[0])
widths = {p.width for p in result.paths}
assert len(widths) > 1, "Expected multiple distinct line widths"