"""Tests for pdf2imos custom exception hierarchy and error handling.""" from pathlib import Path import pymupdf import pytest from typer.testing import CliRunner from pdf2imos.cli import app, process_pdf from pdf2imos.errors import ( DimensionExtractionError, OutputWriteError, Pdf2ImosError, PdfExtractionError, ViewSegmentationError, ) runner = CliRunner() # --------------------------------------------------------------------------- # Helpers: create broken/edge-case PDFs on disk # --------------------------------------------------------------------------- def _create_non_pdf(path: Path) -> Path: """Write a plain-text file with .pdf extension.""" path.write_text("This is not a PDF file at all.") return path def _create_empty_pdf(path: Path) -> Path: """Write a minimal valid PDF structure with 0 pages.""" pdf_bytes = ( b"%PDF-1.4\n" b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n" b"2 0 obj\n<< /Type /Pages /Kids [] /Count 0 >>\nendobj\n" b"xref\n0 3\n" b"0000000000 65535 f \n" b"0000000010 00000 n \n" b"0000000059 00000 n \n" b"trailer\n<< /Size 3 /Root 1 0 R >>\n" b"startxref\n110\n%%EOF" ) path.write_bytes(pdf_bytes) return path def _create_text_only_pdf(path: Path) -> Path: """Create a PDF with text but zero vector paths (raster-like).""" doc = pymupdf.open() page = doc.new_page() page.insert_text((100, 100), "Hello world", fontsize=12) doc.save(str(path)) doc.close() return path # --------------------------------------------------------------------------- # Test: Exception Hierarchy # --------------------------------------------------------------------------- class TestExceptionHierarchy: """Verify all custom exceptions inherit from Pdf2ImosError.""" def test_pdf2imos_error_is_base(self): assert issubclass(Pdf2ImosError, Exception) def test_pdf_extraction_error_inherits(self): assert issubclass(PdfExtractionError, Pdf2ImosError) def test_view_segmentation_error_inherits(self): assert issubclass(ViewSegmentationError, Pdf2ImosError) def test_dimension_extraction_error_inherits(self): assert issubclass(DimensionExtractionError, Pdf2ImosError) def test_output_write_error_inherits(self): assert issubclass(OutputWriteError, Pdf2ImosError) def test_all_catchable_as_pdf2imos_error(self): """All custom exceptions can be caught via Pdf2ImosError.""" for exc_class in ( PdfExtractionError, ViewSegmentationError, DimensionExtractionError, OutputWriteError, ): with pytest.raises(Pdf2ImosError): raise exc_class("test") def test_output_write_error_can_be_raised(self): """OutputWriteError can be raised and caught independently.""" with pytest.raises(OutputWriteError, match="disk full"): raise OutputWriteError("disk full") # --------------------------------------------------------------------------- # Test: process_pdf error paths # --------------------------------------------------------------------------- class TestProcessPdfErrors: """Verify process_pdf raises correct custom exceptions.""" def test_non_pdf_raises_extraction_error(self, tmp_path): fake = _create_non_pdf(tmp_path / "fake.pdf") with pytest.raises(PdfExtractionError, match="Cannot open"): process_pdf(fake, tmp_path / "out") def test_empty_pdf_raises_extraction_error(self, tmp_path): empty = _create_empty_pdf(tmp_path / "empty.pdf") with pytest.raises(PdfExtractionError, match="Empty PDF"): process_pdf(empty, tmp_path / "out") def test_text_only_pdf_raises_no_vector_content(self, tmp_path): txt_pdf = _create_text_only_pdf(tmp_path / "text_only.pdf") with pytest.raises( PdfExtractionError, match="No vector content", ): process_pdf(txt_pdf, tmp_path / "out") # --------------------------------------------------------------------------- # Test: CLI handles errors gracefully (no crash/traceback to user) # --------------------------------------------------------------------------- class TestCliErrorHandling: """CLI should catch errors and exit with proper codes.""" def test_non_pdf_file_exits_nonzero(self, tmp_path): """Non-PDF file → exit code 1 or 2, no unhandled crash.""" in_dir = tmp_path / "in" in_dir.mkdir() _create_non_pdf(in_dir / "bad.pdf") out_dir = tmp_path / "out" result = runner.invoke( app, [str(in_dir), str(out_dir)], ) assert result.exit_code in (1, 2) # No unhandled traceback in output assert result.exception is None or isinstance( result.exception, SystemExit, ) def test_empty_pdf_exits_nonzero(self, tmp_path): """Empty PDF → exit code 1 or 2.""" in_dir = tmp_path / "in" in_dir.mkdir() _create_empty_pdf(in_dir / "empty.pdf") out_dir = tmp_path / "out" result = runner.invoke( app, [str(in_dir), str(out_dir)], ) assert result.exit_code in (1, 2) def test_empty_input_dir_exits_2(self, tmp_path): """No PDF files in input dir → exit code 2.""" in_dir = tmp_path / "in" in_dir.mkdir() out_dir = tmp_path / "out" result = runner.invoke( app, [str(in_dir), str(out_dir)], ) assert result.exit_code == 2 def test_nonexistent_input_dir_exits_2(self, tmp_path): """Nonexistent input dir → exit code 2.""" result = runner.invoke( app, [str(tmp_path / "nope"), str(tmp_path / "out")], ) assert result.exit_code == 2 def test_mixed_good_and_bad_exits_1(self, tmp_path): """Mix of valid + invalid PDFs → exit code 1 (partial).""" in_dir = tmp_path / "in" in_dir.mkdir() # Copy a real fixture fixture = ( Path(__file__).parent / "fixtures" / "input" / "simple_panel.pdf" ) (in_dir / "good.pdf").write_bytes(fixture.read_bytes()) # Add a bad PDF _create_non_pdf(in_dir / "bad.pdf") out_dir = tmp_path / "out" result = runner.invoke( app, [str(in_dir), str(out_dir)], ) assert result.exit_code == 1