Files
pdf2cad/tests/test_error_handling.py
2026-03-03 21:24:02 +00:00

190 lines
6.4 KiB
Python

"""Tests for pdf2imos custom exception hierarchy and error handling."""
from pathlib import Path
import pymupdf
import pytest
from typer.testing import CliRunner
from pdf2imos.cli import app, process_pdf
from pdf2imos.errors import (
DimensionExtractionError,
OutputWriteError,
Pdf2ImosError,
PdfExtractionError,
ViewSegmentationError,
)
runner = CliRunner()
# ---------------------------------------------------------------------------
# Helpers: create broken/edge-case PDFs on disk
# ---------------------------------------------------------------------------
def _create_non_pdf(path: Path) -> Path:
"""Write a plain-text file with .pdf extension."""
path.write_text("This is not a PDF file at all.")
return path
def _create_empty_pdf(path: Path) -> Path:
"""Write a minimal valid PDF structure with 0 pages."""
pdf_bytes = (
b"%PDF-1.4\n"
b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n"
b"xref\n0 3\n"
b"0000000000 65535 f \n"
b"0000000010 00000 n \n"
b"0000000059 00000 n \n"
b"trailer\n<< /Size 3 /Root 1 0 R >>\n"
b"startxref\n110\n%%EOF"
)
path.write_bytes(pdf_bytes)
return path
def _create_text_only_pdf(path: Path) -> Path:
"""Create a PDF with text but zero vector paths (raster-like)."""
doc = pymupdf.open()
page = doc.new_page()
page.insert_text((100, 100), "Hello world", fontsize=12)
doc.save(str(path))
doc.close()
return path
# ---------------------------------------------------------------------------
# Test: Exception Hierarchy
# ---------------------------------------------------------------------------
class TestExceptionHierarchy:
"""Verify all custom exceptions inherit from Pdf2ImosError."""
def test_pdf2imos_error_is_base(self):
assert issubclass(Pdf2ImosError, Exception)
def test_pdf_extraction_error_inherits(self):
assert issubclass(PdfExtractionError, Pdf2ImosError)
def test_view_segmentation_error_inherits(self):
assert issubclass(ViewSegmentationError, Pdf2ImosError)
def test_dimension_extraction_error_inherits(self):
assert issubclass(DimensionExtractionError, Pdf2ImosError)
def test_output_write_error_inherits(self):
assert issubclass(OutputWriteError, Pdf2ImosError)
def test_all_catchable_as_pdf2imos_error(self):
"""All custom exceptions can be caught via Pdf2ImosError."""
for exc_class in (
PdfExtractionError,
ViewSegmentationError,
DimensionExtractionError,
OutputWriteError,
):
with pytest.raises(Pdf2ImosError):
raise exc_class("test")
def test_output_write_error_can_be_raised(self):
"""OutputWriteError can be raised and caught independently."""
with pytest.raises(OutputWriteError, match="disk full"):
raise OutputWriteError("disk full")
# ---------------------------------------------------------------------------
# Test: process_pdf error paths
# ---------------------------------------------------------------------------
class TestProcessPdfErrors:
"""Verify process_pdf raises correct custom exceptions."""
def test_non_pdf_raises_extraction_error(self, tmp_path):
fake = _create_non_pdf(tmp_path / "fake.pdf")
with pytest.raises(PdfExtractionError, match="Cannot open"):
process_pdf(fake, tmp_path / "out")
def test_empty_pdf_raises_extraction_error(self, tmp_path):
empty = _create_empty_pdf(tmp_path / "empty.pdf")
with pytest.raises(PdfExtractionError, match="Empty PDF"):
process_pdf(empty, tmp_path / "out")
def test_text_only_pdf_raises_no_vector_content(self, tmp_path):
txt_pdf = _create_text_only_pdf(tmp_path / "text_only.pdf")
with pytest.raises(
PdfExtractionError, match="No vector content",
):
process_pdf(txt_pdf, tmp_path / "out")
# ---------------------------------------------------------------------------
# Test: CLI handles errors gracefully (no crash/traceback to user)
# ---------------------------------------------------------------------------
class TestCliErrorHandling:
"""CLI should catch errors and exit with proper codes."""
def test_non_pdf_file_exits_nonzero(self, tmp_path):
"""Non-PDF file → exit code 1 or 2, no unhandled crash."""
in_dir = tmp_path / "in"
in_dir.mkdir()
_create_non_pdf(in_dir / "bad.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code in (1, 2)
# No unhandled traceback in output
assert result.exception is None or isinstance(
result.exception, SystemExit,
)
def test_empty_pdf_exits_nonzero(self, tmp_path):
"""Empty PDF → exit code 1 or 2."""
in_dir = tmp_path / "in"
in_dir.mkdir()
_create_empty_pdf(in_dir / "empty.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code in (1, 2)
def test_empty_input_dir_exits_2(self, tmp_path):
"""No PDF files in input dir → exit code 2."""
in_dir = tmp_path / "in"
in_dir.mkdir()
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code == 2
def test_nonexistent_input_dir_exits_2(self, tmp_path):
"""Nonexistent input dir → exit code 2."""
result = runner.invoke(
app,
[str(tmp_path / "nope"), str(tmp_path / "out")],
)
assert result.exit_code == 2
def test_mixed_good_and_bad_exits_1(self, tmp_path):
"""Mix of valid + invalid PDFs → exit code 1 (partial)."""
in_dir = tmp_path / "in"
in_dir.mkdir()
# Copy a real fixture
fixture = (
Path(__file__).parent
/ "fixtures" / "input" / "simple_panel.pdf"
)
(in_dir / "good.pdf").write_bytes(fixture.read_bytes())
# Add a bad PDF
_create_non_pdf(in_dir / "bad.pdf")
out_dir = tmp_path / "out"
result = runner.invoke(
app, [str(in_dir), str(out_dir)],
)
assert result.exit_code == 1