"""Tests for view boundary segmentation.""" import pymupdf import pytest from pdf2imos.extract.geometry import extract_geometry from pdf2imos.extract.text import extract_text from pdf2imos.interpret.title_block import detect_title_block from pdf2imos.interpret.view_segmenter import ( _cluster_area, _cluster_bbox, _cluster_paths, _clusters_are_close, segment_views, ) from pdf2imos.models import PageExtraction, RawPath, RawText, ViewRegion, ViewType def make_filtered_extraction(pdf_path): """Run full pre-processing: extract → filter title block.""" doc = pymupdf.open(str(pdf_path)) page = doc[0] geo = extract_geometry(page) texts = extract_text(page) extraction = PageExtraction( paths=geo.paths, texts=tuple(texts), page_width=geo.page_width, page_height=geo.page_height, ) _, filtered = detect_title_block(extraction) return filtered # --------------------------------------------------------------------------- # Helper to build synthetic RawPath for unit tests # --------------------------------------------------------------------------- def _make_path(x0, y0, x1, y1, width=1.0): """Create a minimal RawPath with given bounding box.""" return RawPath( items=(("l", (x0, y0), (x1, y1)),), color=(0.0, 0.0, 0.0), fill=None, dashes="", width=width, rect=(x0, y0, x1, y1), ) # =========================================================================== # Unit tests for clustering helpers # =========================================================================== class TestClusterPaths: def test_empty_input(self): assert _cluster_paths([]) == [] def test_single_path(self): p = _make_path(0, 0, 10, 10) result = _cluster_paths([p]) assert len(result) == 1 assert result[0] == [p] def test_close_paths_merge(self): """Paths within gap_threshold merge into one cluster.""" p1 = _make_path(0, 0, 10, 10) p2 = _make_path(15, 0, 25, 10) # 5pt gap from p1 result = _cluster_paths([p1, p2], gap_threshold=10.0) assert len(result) == 1 def test_far_paths_separate(self): """Paths beyond gap_threshold stay as separate clusters.""" p1 = _make_path(0, 0, 10, 10) p2 = _make_path(100, 0, 110, 10) # 90pt gap from p1 result = _cluster_paths([p1, p2], gap_threshold=25.0) assert len(result) == 2 def test_chain_merge(self): """A-close-to-B and B-close-to-C → all in one cluster.""" p1 = _make_path(0, 0, 10, 10) p2 = _make_path(20, 0, 30, 10) # 10pt from p1 p3 = _make_path(40, 0, 50, 10) # 10pt from p2 result = _cluster_paths([p1, p2, p3], gap_threshold=15.0) assert len(result) == 1 def test_two_separate_clusters(self): """Two groups far apart → two clusters.""" group_a = [_make_path(0, 0, 10, 10), _make_path(5, 5, 15, 15)] group_b = [_make_path(200, 200, 210, 210), _make_path(205, 205, 215, 215)] result = _cluster_paths(group_a + group_b, gap_threshold=25.0) assert len(result) == 2 class TestClusterBbox: def test_single_path(self): p = _make_path(5, 10, 20, 30) assert _cluster_bbox([p]) == (5, 10, 20, 30) def test_multiple_paths(self): p1 = _make_path(0, 0, 10, 10) p2 = _make_path(20, 20, 30, 30) assert _cluster_bbox([p1, p2]) == (0, 0, 30, 30) class TestClusterArea: def test_area_computation(self): cluster = [_make_path(0, 0, 10, 20)] assert _cluster_area(cluster) == pytest.approx(200.0) def test_zero_area(self): cluster = [_make_path(5, 5, 5, 5)] assert _cluster_area(cluster) == pytest.approx(0.0) class TestClustersAreClose: def test_overlapping(self): a = [_make_path(0, 0, 20, 20)] b = [_make_path(10, 10, 30, 30)] assert _clusters_are_close(a, b, 5.0) def test_adjacent(self): a = [_make_path(0, 0, 10, 10)] b = [_make_path(10, 0, 20, 10)] # 0 gap assert _clusters_are_close(a, b, 5.0) def test_small_gap(self): a = [_make_path(0, 0, 10, 10)] b = [_make_path(13, 0, 23, 10)] # 3pt gap assert _clusters_are_close(a, b, 5.0) def test_large_gap(self): a = [_make_path(0, 0, 10, 10)] b = [_make_path(50, 0, 60, 10)] # 40pt gap assert not _clusters_are_close(a, b, 25.0) # =========================================================================== # Integration tests with real PDFs # =========================================================================== class TestSegmentViews: def test_returns_list(self, simple_panel_pdf): filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) assert isinstance(result, list) def test_views_are_view_regions(self, simple_panel_pdf): filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) assert all(isinstance(v, ViewRegion) for v in result) def test_detects_at_least_two_views(self, simple_panel_pdf): """Must detect at least 2 views (FRONT + one more).""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) assert len(result) >= 2, f"Expected >=2 views, got {len(result)}" def test_front_view_present(self, simple_panel_pdf): """FRONT view must always be detected.""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) view_types = {v.view_type for v in result} assert ViewType.FRONT in view_types, f"No FRONT view. Got: {view_types}" def test_front_view_is_lowest(self, simple_panel_pdf): """FRONT view should have the lowest y-center (bottom of page in CAD).""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) if len(result) < 2: pytest.skip("Less than 2 views detected") front = next((v for v in result if v.view_type == ViewType.FRONT), None) assert front is not None front_cy = (front.bounds[1] + front.bounds[3]) / 2 for v in result: if v.view_type != ViewType.FRONT: other_cy = (v.bounds[1] + v.bounds[3]) / 2 # Front should have y-center <= others (or at least not much higher) # Allow some tolerance since SIDE may have similar y if v.view_type == ViewType.TOP: assert front_cy < other_cy, ( f"FRONT cy={front_cy} should be below TOP cy={other_cy}" ) def test_each_view_has_paths(self, simple_panel_pdf): """Each detected view has at least one path.""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) for view in result: assert len(view.paths) > 0, f"{view.view_type} has no paths" def test_all_fixtures_segmentable(self, all_fixture_pdfs): """All fixture PDFs can be segmented without crashing.""" for pdf_path in all_fixture_pdfs: filtered = make_filtered_extraction(pdf_path) result = segment_views(filtered) assert isinstance(result, list) def test_cabinet_has_multiple_views(self, cabinet_basic_pdf): """Cabinet drawing should detect multiple views.""" filtered = make_filtered_extraction(cabinet_basic_pdf) result = segment_views(filtered) assert len(result) >= 2 def test_view_bounds_are_reasonable(self, simple_panel_pdf): """View bounds should be within page dimensions.""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) for view in result: x0, y0, x1, y1 = view.bounds assert x0 >= -5, f"x0 out of range: {x0}" assert y0 >= -5, f"y0 out of range: {y0}" assert x1 <= filtered.page_width + 5, f"x1 out of range: {x1}" assert y1 <= filtered.page_height + 5, f"y1 out of range: {y1}" def test_views_dont_overlap_much(self, simple_panel_pdf): """Distinct views should not overlap significantly.""" filtered = make_filtered_extraction(simple_panel_pdf) result = segment_views(filtered) if len(result) < 2: pytest.skip("Less than 2 views") for i, v1 in enumerate(result): for v2 in result[i + 1 :]: overlap = _bbox_overlap_area(v1.bounds, v2.bounds) a1 = _bbox_area(v1.bounds) a2 = _bbox_area(v2.bounds) min_area = min(a1, a2) if min(a1, a2) > 0 else 1 # Overlap should be < 20% of smaller view assert overlap / min_area < 0.2, ( f"{v1.view_type} and {v2.view_type} overlap " f"{overlap / min_area:.1%}" ) class TestSegmentViewsEmpty: def test_empty_extraction(self): """Empty extraction returns empty list.""" extraction = PageExtraction( paths=(), texts=(), page_width=595, page_height=842 ) result = segment_views(extraction) assert result == [] class TestSegmentViewsSynthetic: """Test with synthetic data mimicking third-angle projection layout.""" def _make_three_view_extraction(self): """Create extraction with clear front/top/side layout. Layout (CAD coords, y-up): Top view: x=100-300, y=400-450 (above front) Front view: x=100-300, y=100-350 (bottom-left) Side view: x=350-400, y=100-350 (right of front) """ # Front view paths (large rectangle) front_paths = [ _make_path(100, 100, 300, 350), _make_path(120, 120, 280, 330), ] # Top view paths (above front) top_paths = [ _make_path(100, 400, 300, 450), _make_path(120, 410, 280, 440), ] # Side view paths (right of front) side_paths = [ _make_path(350, 100, 400, 350), _make_path(355, 120, 395, 330), ] all_paths = tuple(front_paths + top_paths + side_paths) return PageExtraction( paths=all_paths, texts=(), page_width=595, page_height=842, ) def test_detects_three_views(self): extraction = self._make_three_view_extraction() result = segment_views(extraction) assert len(result) == 3 def test_front_is_bottom_left(self): extraction = self._make_three_view_extraction() result = segment_views(extraction) front = next((v for v in result if v.view_type == ViewType.FRONT), None) assert front is not None # Front should be around y=100-350 assert front.bounds[1] < 200, f"Front y0={front.bounds[1]} too high" def test_top_is_above_front(self): extraction = self._make_three_view_extraction() result = segment_views(extraction) front = next((v for v in result if v.view_type == ViewType.FRONT), None) top = next((v for v in result if v.view_type == ViewType.TOP), None) assert front is not None assert top is not None front_cy = (front.bounds[1] + front.bounds[3]) / 2 top_cy = (top.bounds[1] + top.bounds[3]) / 2 assert top_cy > front_cy, "TOP should be above FRONT" def test_side_is_right_of_front(self): extraction = self._make_three_view_extraction() result = segment_views(extraction) front = next((v for v in result if v.view_type == ViewType.FRONT), None) side = next((v for v in result if v.view_type == ViewType.SIDE), None) assert front is not None assert side is not None front_cx = (front.bounds[0] + front.bounds[2]) / 2 side_cx = (side.bounds[0] + side.bounds[2]) / 2 assert side_cx > front_cx, "SIDE should be right of FRONT" def test_text_assignment_with_coord_conversion(self): """Texts in PDF coords should be assigned to correct views.""" extraction = self._make_three_view_extraction() # Add a text that (in PDF coords) lands in the front view area # Front view in CAD: y=100-350 # In PDF coords: y = page_h - cad_y, so y = 842-350=492 to 842-100=742 text_in_front = RawText( text="600", bbox=(150.0, 600.0, 170.0, 612.0), # PDF coords font="Helvetica", size=10.0, color=0, ) # Text in top view area # Top in CAD: y=400-450 # In PDF coords: y = 842-450=392 to 842-400=442 text_in_top = RawText( text="720", bbox=(150.0, 400.0, 170.0, 412.0), # PDF coords font="Helvetica", size=10.0, color=0, ) extraction_with_text = PageExtraction( paths=extraction.paths, texts=(text_in_front, text_in_top), page_width=595, page_height=842, ) result = segment_views(extraction_with_text) front = next((v for v in result if v.view_type == ViewType.FRONT), None) top = next((v for v in result if v.view_type == ViewType.TOP), None) assert front is not None # "600" should be assigned to front view front_text_vals = [t.text for t in front.texts] assert "600" in front_text_vals, ( f"Text '600' not in front view. Front texts: {front_text_vals}" ) if top is not None: top_text_vals = [t.text for t in top.texts] assert "720" in top_text_vals, ( f"Text '720' not in top view. Top texts: {top_text_vals}" ) # --------------------------------------------------------------------------- # Test helpers # --------------------------------------------------------------------------- def _bbox_overlap_area(a, b): """Compute overlap area of two bounding boxes.""" x0 = max(a[0], b[0]) y0 = max(a[1], b[1]) x1 = min(a[2], b[2]) y1 = min(a[3], b[3]) if x1 <= x0 or y1 <= y0: return 0.0 return (x1 - x0) * (y1 - y0) def _bbox_area(bbox): """Compute area of a bounding box.""" return abs(bbox[2] - bbox[0]) * abs(bbox[3] - bbox[1])