feat: pdf2cad

This commit is contained in:
2026-03-03 21:24:02 +00:00
commit 112213da6e
61 changed files with 7290 additions and 0 deletions

View File

@@ -0,0 +1,189 @@
"""Tests for pdf2imos custom exception hierarchy and error handling."""
from pathlib import Path
import pymupdf
import pytest
from typer.testing import CliRunner
from pdf2imos.cli import app, process_pdf
from pdf2imos.errors import (
DimensionExtractionError,
OutputWriteError,
Pdf2ImosError,
PdfExtractionError,
ViewSegmentationError,
)
runner = CliRunner()
# ---------------------------------------------------------------------------
# Helpers: create broken/edge-case PDFs on disk
# ---------------------------------------------------------------------------
def _create_non_pdf(path: Path) -> Path:
"""Write a plain-text file with .pdf extension."""
path.write_text("This is not a PDF file at all.")
return path
def _create_empty_pdf(path: Path) -> Path:
"""Write a minimal valid PDF structure with 0 pages."""
pdf_bytes = (
b"%PDF-1.4\n"
b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n"
b"xref\n0 3\n"
b"0000000000 65535 f \n"
b"0000000010 00000 n \n"
b"0000000059 00000 n \n"
b"trailer\n<< /Size 3 /Root 1 0 R >>\n"
b"startxref\n110\n%%EOF"
)
path.write_bytes(pdf_bytes)
return path
def _create_text_only_pdf(path: Path) -> Path:
"""Create a PDF with text but zero vector paths (raster-like)."""
doc = pymupdf.open()
page = doc.new_page()
page.insert_text((100, 100), "Hello world", fontsize=12)
doc.save(str(path))
doc.close()
return path
# ---------------------------------------------------------------------------
# Test: Exception Hierarchy
# ---------------------------------------------------------------------------
class TestExceptionHierarchy:
"""Verify all custom exceptions inherit from Pdf2ImosError."""
def test_pdf2imos_error_is_base(self):
assert issubclass(Pdf2ImosError, Exception)
def test_pdf_extraction_error_inherits(self):
assert issubclass(PdfExtractionError, Pdf2ImosError)
def test_view_segmentation_error_inherits(self):
assert issubclass(ViewSegmentationError, Pdf2ImosError)
def test_dimension_extraction_error_inherits(self):
assert issubclass(DimensionExtractionError, Pdf2ImosError)
def test_output_write_error_inherits(self):
assert issubclass(OutputWriteError, Pdf2ImosError)
def test_all_catchable_as_pdf2imos_error(self):
"""All custom exceptions can be caught via Pdf2ImosError."""
for exc_class in (
PdfExtractionError,
ViewSegmentationError,
DimensionExtractionError,
OutputWriteError,
):
with pytest.raises(Pdf2ImosError):
raise exc_class("test")
def test_output_write_error_can_be_raised(self):
"""OutputWriteError can be raised and caught independently."""
with pytest.raises(OutputWriteError, match="disk full"):
raise OutputWriteError("disk full")
# ---------------------------------------------------------------------------
# Test: process_pdf error paths
# ---------------------------------------------------------------------------
class TestProcessPdfErrors:
"""Verify process_pdf raises correct custom exceptions."""
def test_non_pdf_raises_extraction_error(self, tmp_path):
fake = _create_non_pdf(tmp_path / "fake.pdf")
with pytest.raises(PdfExtractionError, match="Cannot open"):
process_pdf(fake, tmp_path / "out")
def test_empty_pdf_raises_extraction_error(self, tmp_path):
empty = _create_empty_pdf(tmp_path / "empty.pdf")
with pytest.raises(PdfExtractionError, match="Empty PDF"):
process_pdf(empty, tmp_path / "out")
def test_text_only_pdf_raises_no_vector_content(self, tmp_path):
txt_pdf = _create_text_only_pdf(tmp_path / "text_only.pdf")
with pytest.raises(
PdfExtractionError, match="No vector content",
):
process_pdf(txt_pdf, tmp_path / "out")
# ---------------------------------------------------------------------------
# Test: CLI handles errors gracefully (no crash/traceback to user)
# ---------------------------------------------------------------------------
class TestCliErrorHandling:
"""CLI should catch errors and exit with proper codes."""
def test_non_pdf_file_exits_nonzero(self, tmp_path):
"""Non-PDF file → exit code 1 or 2, no unhandled crash."""
in_dir = tmp_path / "in"
in_dir.mkdir()
_create_non_pdf(in_dir / "bad.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code in (1, 2)
# No unhandled traceback in output
assert result.exception is None or isinstance(
result.exception, SystemExit,
)
def test_empty_pdf_exits_nonzero(self, tmp_path):
"""Empty PDF → exit code 1 or 2."""
in_dir = tmp_path / "in"
in_dir.mkdir()
_create_empty_pdf(in_dir / "empty.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code in (1, 2)
def test_empty_input_dir_exits_2(self, tmp_path):
"""No PDF files in input dir → exit code 2."""
in_dir = tmp_path / "in"
in_dir.mkdir()
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code == 2
def test_nonexistent_input_dir_exits_2(self, tmp_path):
"""Nonexistent input dir → exit code 2."""
result = runner.invoke(
app,
[str(tmp_path / "nope"), str(tmp_path / "out")],
)
assert result.exit_code == 2
def test_mixed_good_and_bad_exits_1(self, tmp_path):
"""Mix of valid + invalid PDFs → exit code 1 (partial)."""
in_dir = tmp_path / "in"
in_dir.mkdir()
# Copy a real fixture
fixture = (
Path(__file__).parent
/ "fixtures" / "input" / "simple_panel.pdf"
)
(in_dir / "good.pdf").write_bytes(fixture.read_bytes())
# Add a bad PDF
_create_non_pdf(in_dir / "bad.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code == 1