pdf2cad/tests/test_view_segmenter.py

"""Tests for view boundary segmentation."""

import pymupdf
import pytest

from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.extract.text import extract_text
from pdf2imos.interpret.title_block import detect_title_block
from pdf2imos.interpret.view_segmenter import (
    _cluster_area,
    _cluster_bbox,
    _cluster_paths,
    _clusters_are_close,
    segment_views,
)
from pdf2imos.models import PageExtraction, RawPath, RawText, ViewRegion, ViewType


def make_filtered_extraction(pdf_path):
    """Run full pre-processing: extract → filter title block."""
    doc = pymupdf.open(str(pdf_path))
    page = doc[0]
    geo = extract_geometry(page)
    texts = extract_text(page)
    extraction = PageExtraction(
        paths=geo.paths,
        texts=tuple(texts),
        page_width=geo.page_width,
        page_height=geo.page_height,
    )
    _, filtered = detect_title_block(extraction)
    return filtered


# ---------------------------------------------------------------------------
# Helper to build synthetic RawPath for unit tests
# ---------------------------------------------------------------------------

def _make_path(x0, y0, x1, y1, width=1.0):
    """Create a minimal RawPath with given bounding box."""
    return RawPath(
        items=(("l", (x0, y0), (x1, y1)),),
        color=(0.0, 0.0, 0.0),
        fill=None,
        dashes="",
        width=width,
        rect=(x0, y0, x1, y1),
    )


# ===========================================================================
# Unit tests for clustering helpers
# ===========================================================================


class TestClusterPaths:
    def test_empty_input(self):
        assert _cluster_paths([]) == []

    def test_single_path(self):
        p = _make_path(0, 0, 10, 10)
        result = _cluster_paths([p])
        assert len(result) == 1
        assert result[0] == [p]

    def test_close_paths_merge(self):
        """Paths within gap_threshold merge into one cluster."""
        p1 = _make_path(0, 0, 10, 10)
        p2 = _make_path(15, 0, 25, 10)  # 5pt gap from p1
        result = _cluster_paths([p1, p2], gap_threshold=10.0)
        assert len(result) == 1

    def test_far_paths_separate(self):
        """Paths beyond gap_threshold stay as separate clusters."""
        p1 = _make_path(0, 0, 10, 10)
        p2 = _make_path(100, 0, 110, 10)  # 90pt gap from p1
        result = _cluster_paths([p1, p2], gap_threshold=25.0)
        assert len(result) == 2

    def test_chain_merge(self):
        """A-close-to-B and B-close-to-C → all in one cluster."""
        p1 = _make_path(0, 0, 10, 10)
        p2 = _make_path(20, 0, 30, 10)  # 10pt from p1
        p3 = _make_path(40, 0, 50, 10)  # 10pt from p2
        result = _cluster_paths([p1, p2, p3], gap_threshold=15.0)
        assert len(result) == 1

    def test_two_separate_clusters(self):
        """Two groups far apart → two clusters."""
        group_a = [_make_path(0, 0, 10, 10), _make_path(5, 5, 15, 15)]
        group_b = [_make_path(200, 200, 210, 210), _make_path(205, 205, 215, 215)]
        result = _cluster_paths(group_a + group_b, gap_threshold=25.0)
        assert len(result) == 2


class TestClusterBbox:
    def test_single_path(self):
        p = _make_path(5, 10, 20, 30)
        assert _cluster_bbox([p]) == (5, 10, 20, 30)

    def test_multiple_paths(self):
        p1 = _make_path(0, 0, 10, 10)
        p2 = _make_path(20, 20, 30, 30)
        assert _cluster_bbox([p1, p2]) == (0, 0, 30, 30)


class TestClusterArea:
    def test_area_computation(self):
        cluster = [_make_path(0, 0, 10, 20)]
        assert _cluster_area(cluster) == pytest.approx(200.0)

    def test_zero_area(self):
        cluster = [_make_path(5, 5, 5, 5)]
        assert _cluster_area(cluster) == pytest.approx(0.0)


class TestClustersAreClose:
    def test_overlapping(self):
        a = [_make_path(0, 0, 20, 20)]
        b = [_make_path(10, 10, 30, 30)]
        assert _clusters_are_close(a, b, 5.0)

    def test_adjacent(self):
        a = [_make_path(0, 0, 10, 10)]
        b = [_make_path(10, 0, 20, 10)]  # 0 gap
        assert _clusters_are_close(a, b, 5.0)

    def test_small_gap(self):
        a = [_make_path(0, 0, 10, 10)]
        b = [_make_path(13, 0, 23, 10)]  # 3pt gap
        assert _clusters_are_close(a, b, 5.0)

    def test_large_gap(self):
        a = [_make_path(0, 0, 10, 10)]
        b = [_make_path(50, 0, 60, 10)]  # 40pt gap
        assert not _clusters_are_close(a, b, 25.0)


# ===========================================================================
# Integration tests with real PDFs
# ===========================================================================


class TestSegmentViews:
    def test_returns_list(self, simple_panel_pdf):
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        assert isinstance(result, list)

    def test_views_are_view_regions(self, simple_panel_pdf):
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        assert all(isinstance(v, ViewRegion) for v in result)

    def test_detects_at_least_two_views(self, simple_panel_pdf):
        """Must detect at least 2 views (FRONT + one more)."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        assert len(result) >= 2, f"Expected >=2 views, got {len(result)}"

    def test_front_view_present(self, simple_panel_pdf):
        """FRONT view must always be detected."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        view_types = {v.view_type for v in result}
        assert ViewType.FRONT in view_types, f"No FRONT view. Got: {view_types}"

    def test_front_view_is_lowest(self, simple_panel_pdf):
        """FRONT view should have the lowest y-center (bottom of page in CAD)."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        if len(result) < 2:
            pytest.skip("Less than 2 views detected")
        front = next((v for v in result if v.view_type == ViewType.FRONT), None)
        assert front is not None
        front_cy = (front.bounds[1] + front.bounds[3]) / 2
        for v in result:
            if v.view_type != ViewType.FRONT:
                other_cy = (v.bounds[1] + v.bounds[3]) / 2
                # Front should have y-center <= others (or at least not much higher)
                # Allow some tolerance since SIDE may have similar y
                if v.view_type == ViewType.TOP:
                    assert front_cy < other_cy, (
                        f"FRONT cy={front_cy} should be below TOP cy={other_cy}"
                    )

    def test_each_view_has_paths(self, simple_panel_pdf):
        """Each detected view has at least one path."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        for view in result:
            assert len(view.paths) > 0, f"{view.view_type} has no paths"

    def test_all_fixtures_segmentable(self, all_fixture_pdfs):
        """All fixture PDFs can be segmented without crashing."""
        for pdf_path in all_fixture_pdfs:
            filtered = make_filtered_extraction(pdf_path)
            result = segment_views(filtered)
            assert isinstance(result, list)

    def test_cabinet_has_multiple_views(self, cabinet_basic_pdf):
        """Cabinet drawing should detect multiple views."""
        filtered = make_filtered_extraction(cabinet_basic_pdf)
        result = segment_views(filtered)
        assert len(result) >= 2

    def test_view_bounds_are_reasonable(self, simple_panel_pdf):
        """View bounds should be within page dimensions."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        for view in result:
            x0, y0, x1, y1 = view.bounds
            assert x0 >= -5, f"x0 out of range: {x0}"
            assert y0 >= -5, f"y0 out of range: {y0}"
            assert x1 <= filtered.page_width + 5, f"x1 out of range: {x1}"
            assert y1 <= filtered.page_height + 5, f"y1 out of range: {y1}"

    def test_views_dont_overlap_much(self, simple_panel_pdf):
        """Distinct views should not overlap significantly."""
        filtered = make_filtered_extraction(simple_panel_pdf)
        result = segment_views(filtered)
        if len(result) < 2:
            pytest.skip("Less than 2 views")
        for i, v1 in enumerate(result):
            for v2 in result[i + 1 :]:
                overlap = _bbox_overlap_area(v1.bounds, v2.bounds)
                a1 = _bbox_area(v1.bounds)
                a2 = _bbox_area(v2.bounds)
                min_area = min(a1, a2) if min(a1, a2) > 0 else 1
                # Overlap should be < 20% of smaller view
                assert overlap / min_area < 0.2, (
                    f"{v1.view_type} and {v2.view_type} overlap "
                    f"{overlap / min_area:.1%}"
                )


class TestSegmentViewsEmpty:
    def test_empty_extraction(self):
        """Empty extraction returns empty list."""
        extraction = PageExtraction(
            paths=(), texts=(), page_width=595, page_height=842
        )
        result = segment_views(extraction)
        assert result == []


class TestSegmentViewsSynthetic:
    """Test with synthetic data mimicking third-angle projection layout."""

    def _make_three_view_extraction(self):
        """Create extraction with clear front/top/side layout.

        Layout (CAD coords, y-up):
          Top view:  x=100-300, y=400-450  (above front)
          Front view: x=100-300, y=100-350  (bottom-left)
          Side view:  x=350-400, y=100-350  (right of front)
        """
        # Front view paths (large rectangle)
        front_paths = [
            _make_path(100, 100, 300, 350),
            _make_path(120, 120, 280, 330),
        ]
        # Top view paths (above front)
        top_paths = [
            _make_path(100, 400, 300, 450),
            _make_path(120, 410, 280, 440),
        ]
        # Side view paths (right of front)
        side_paths = [
            _make_path(350, 100, 400, 350),
            _make_path(355, 120, 395, 330),
        ]

        all_paths = tuple(front_paths + top_paths + side_paths)
        return PageExtraction(
            paths=all_paths,
            texts=(),
            page_width=595,
            page_height=842,
        )

    def test_detects_three_views(self):
        extraction = self._make_three_view_extraction()
        result = segment_views(extraction)
        assert len(result) == 3

    def test_front_is_bottom_left(self):
        extraction = self._make_three_view_extraction()
        result = segment_views(extraction)
        front = next((v for v in result if v.view_type == ViewType.FRONT), None)
        assert front is not None
        # Front should be around y=100-350
        assert front.bounds[1] < 200, f"Front y0={front.bounds[1]} too high"

    def test_top_is_above_front(self):
        extraction = self._make_three_view_extraction()
        result = segment_views(extraction)
        front = next((v for v in result if v.view_type == ViewType.FRONT), None)
        top = next((v for v in result if v.view_type == ViewType.TOP), None)
        assert front is not None
        assert top is not None
        front_cy = (front.bounds[1] + front.bounds[3]) / 2
        top_cy = (top.bounds[1] + top.bounds[3]) / 2
        assert top_cy > front_cy, "TOP should be above FRONT"

    def test_side_is_right_of_front(self):
        extraction = self._make_three_view_extraction()
        result = segment_views(extraction)
        front = next((v for v in result if v.view_type == ViewType.FRONT), None)
        side = next((v for v in result if v.view_type == ViewType.SIDE), None)
        assert front is not None
        assert side is not None
        front_cx = (front.bounds[0] + front.bounds[2]) / 2
        side_cx = (side.bounds[0] + side.bounds[2]) / 2
        assert side_cx > front_cx, "SIDE should be right of FRONT"

    def test_text_assignment_with_coord_conversion(self):
        """Texts in PDF coords should be assigned to correct views."""
        extraction = self._make_three_view_extraction()

        # Add a text that (in PDF coords) lands in the front view area
        # Front view in CAD: y=100-350
        # In PDF coords: y = page_h - cad_y, so y = 842-350=492 to 842-100=742
        text_in_front = RawText(
            text="600",
            bbox=(150.0, 600.0, 170.0, 612.0),  # PDF coords
            font="Helvetica",
            size=10.0,
            color=0,
        )
        # Text in top view area
        # Top in CAD: y=400-450
        # In PDF coords: y = 842-450=392 to 842-400=442
        text_in_top = RawText(
            text="720",
            bbox=(150.0, 400.0, 170.0, 412.0),  # PDF coords
            font="Helvetica",
            size=10.0,
            color=0,
        )

        extraction_with_text = PageExtraction(
            paths=extraction.paths,
            texts=(text_in_front, text_in_top),
            page_width=595,
            page_height=842,
        )
        result = segment_views(extraction_with_text)

        front = next((v for v in result if v.view_type == ViewType.FRONT), None)
        top = next((v for v in result if v.view_type == ViewType.TOP), None)
        assert front is not None

        # "600" should be assigned to front view
        front_text_vals = [t.text for t in front.texts]
        assert "600" in front_text_vals, (
            f"Text '600' not in front view. Front texts: {front_text_vals}"
        )

        if top is not None:
            top_text_vals = [t.text for t in top.texts]
            assert "720" in top_text_vals, (
                f"Text '720' not in top view. Top texts: {top_text_vals}"
            )


# ---------------------------------------------------------------------------
# Test helpers
# ---------------------------------------------------------------------------


def _bbox_overlap_area(a, b):
    """Compute overlap area of two bounding boxes."""
    x0 = max(a[0], b[0])
    y0 = max(a[1], b[1])
    x1 = min(a[2], b[2])
    y1 = min(a[3], b[3])
    if x1 <= x0 or y1 <= y0:
        return 0.0
    return (x1 - x0) * (y1 - y0)


def _bbox_area(bbox):
    """Compute area of a bounding box."""
    return abs(bbox[2] - bbox[0]) * abs(bbox[3] - bbox[1])