190 lines
6.4 KiB
Python
190 lines
6.4 KiB
Python
"""Tests for pdf2imos custom exception hierarchy and error handling."""
|
|
|
|
from pathlib import Path
|
|
|
|
import pymupdf
|
|
import pytest
|
|
from typer.testing import CliRunner
|
|
|
|
from pdf2imos.cli import app, process_pdf
|
|
from pdf2imos.errors import (
|
|
DimensionExtractionError,
|
|
OutputWriteError,
|
|
Pdf2ImosError,
|
|
PdfExtractionError,
|
|
ViewSegmentationError,
|
|
)
|
|
|
|
runner = CliRunner()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers: create broken/edge-case PDFs on disk
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _create_non_pdf(path: Path) -> Path:
|
|
"""Write a plain-text file with .pdf extension."""
|
|
path.write_text("This is not a PDF file at all.")
|
|
return path
|
|
|
|
|
|
def _create_empty_pdf(path: Path) -> Path:
|
|
"""Write a minimal valid PDF structure with 0 pages."""
|
|
pdf_bytes = (
|
|
b"%PDF-1.4\n"
|
|
b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
|
|
b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n"
|
|
b"xref\n0 3\n"
|
|
b"0000000000 65535 f \n"
|
|
b"0000000010 00000 n \n"
|
|
b"0000000059 00000 n \n"
|
|
b"trailer\n<< /Size 3 /Root 1 0 R >>\n"
|
|
b"startxref\n110\n%%EOF"
|
|
)
|
|
path.write_bytes(pdf_bytes)
|
|
return path
|
|
|
|
|
|
def _create_text_only_pdf(path: Path) -> Path:
|
|
"""Create a PDF with text but zero vector paths (raster-like)."""
|
|
doc = pymupdf.open()
|
|
page = doc.new_page()
|
|
page.insert_text((100, 100), "Hello world", fontsize=12)
|
|
doc.save(str(path))
|
|
doc.close()
|
|
return path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test: Exception Hierarchy
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExceptionHierarchy:
|
|
"""Verify all custom exceptions inherit from Pdf2ImosError."""
|
|
|
|
def test_pdf2imos_error_is_base(self):
|
|
assert issubclass(Pdf2ImosError, Exception)
|
|
|
|
def test_pdf_extraction_error_inherits(self):
|
|
assert issubclass(PdfExtractionError, Pdf2ImosError)
|
|
|
|
def test_view_segmentation_error_inherits(self):
|
|
assert issubclass(ViewSegmentationError, Pdf2ImosError)
|
|
|
|
def test_dimension_extraction_error_inherits(self):
|
|
assert issubclass(DimensionExtractionError, Pdf2ImosError)
|
|
|
|
def test_output_write_error_inherits(self):
|
|
assert issubclass(OutputWriteError, Pdf2ImosError)
|
|
|
|
def test_all_catchable_as_pdf2imos_error(self):
|
|
"""All custom exceptions can be caught via Pdf2ImosError."""
|
|
for exc_class in (
|
|
PdfExtractionError,
|
|
ViewSegmentationError,
|
|
DimensionExtractionError,
|
|
OutputWriteError,
|
|
):
|
|
with pytest.raises(Pdf2ImosError):
|
|
raise exc_class("test")
|
|
|
|
def test_output_write_error_can_be_raised(self):
|
|
"""OutputWriteError can be raised and caught independently."""
|
|
with pytest.raises(OutputWriteError, match="disk full"):
|
|
raise OutputWriteError("disk full")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test: process_pdf error paths
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestProcessPdfErrors:
|
|
"""Verify process_pdf raises correct custom exceptions."""
|
|
|
|
def test_non_pdf_raises_extraction_error(self, tmp_path):
|
|
fake = _create_non_pdf(tmp_path / "fake.pdf")
|
|
with pytest.raises(PdfExtractionError, match="Cannot open"):
|
|
process_pdf(fake, tmp_path / "out")
|
|
|
|
def test_empty_pdf_raises_extraction_error(self, tmp_path):
|
|
empty = _create_empty_pdf(tmp_path / "empty.pdf")
|
|
with pytest.raises(PdfExtractionError, match="Empty PDF"):
|
|
process_pdf(empty, tmp_path / "out")
|
|
|
|
def test_text_only_pdf_raises_no_vector_content(self, tmp_path):
|
|
txt_pdf = _create_text_only_pdf(tmp_path / "text_only.pdf")
|
|
with pytest.raises(
|
|
PdfExtractionError, match="No vector content",
|
|
):
|
|
process_pdf(txt_pdf, tmp_path / "out")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test: CLI handles errors gracefully (no crash/traceback to user)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCliErrorHandling:
|
|
"""CLI should catch errors and exit with proper codes."""
|
|
|
|
def test_non_pdf_file_exits_nonzero(self, tmp_path):
|
|
"""Non-PDF file → exit code 1 or 2, no unhandled crash."""
|
|
in_dir = tmp_path / "in"
|
|
in_dir.mkdir()
|
|
_create_non_pdf(in_dir / "bad.pdf")
|
|
out_dir = tmp_path / "out"
|
|
result = runner.invoke(
|
|
app, [str(in_dir), str(out_dir)],
|
|
)
|
|
assert result.exit_code in (1, 2)
|
|
# No unhandled traceback in output
|
|
assert result.exception is None or isinstance(
|
|
result.exception, SystemExit,
|
|
)
|
|
|
|
def test_empty_pdf_exits_nonzero(self, tmp_path):
|
|
"""Empty PDF → exit code 1 or 2."""
|
|
in_dir = tmp_path / "in"
|
|
in_dir.mkdir()
|
|
_create_empty_pdf(in_dir / "empty.pdf")
|
|
out_dir = tmp_path / "out"
|
|
result = runner.invoke(
|
|
app, [str(in_dir), str(out_dir)],
|
|
)
|
|
assert result.exit_code in (1, 2)
|
|
|
|
def test_empty_input_dir_exits_2(self, tmp_path):
|
|
"""No PDF files in input dir → exit code 2."""
|
|
in_dir = tmp_path / "in"
|
|
in_dir.mkdir()
|
|
out_dir = tmp_path / "out"
|
|
result = runner.invoke(
|
|
app, [str(in_dir), str(out_dir)],
|
|
)
|
|
assert result.exit_code == 2
|
|
|
|
def test_nonexistent_input_dir_exits_2(self, tmp_path):
|
|
"""Nonexistent input dir → exit code 2."""
|
|
result = runner.invoke(
|
|
app,
|
|
[str(tmp_path / "nope"), str(tmp_path / "out")],
|
|
)
|
|
assert result.exit_code == 2
|
|
|
|
def test_mixed_good_and_bad_exits_1(self, tmp_path):
|
|
"""Mix of valid + invalid PDFs → exit code 1 (partial)."""
|
|
in_dir = tmp_path / "in"
|
|
in_dir.mkdir()
|
|
# Copy a real fixture
|
|
fixture = (
|
|
Path(__file__).parent
|
|
/ "fixtures" / "input" / "simple_panel.pdf"
|
|
)
|
|
(in_dir / "good.pdf").write_bytes(fixture.read_bytes())
|
|
# Add a bad PDF
|
|
_create_non_pdf(in_dir / "bad.pdf")
|
|
out_dir = tmp_path / "out"
|
|
result = runner.invoke(
|
|
app, [str(in_dir), str(out_dir)],
|
|
)
|
|
assert result.exit_code == 1
|