Files
pdf2cad/tests/test_view_segmenter.py
2026-03-03 21:24:02 +00:00

386 lines
14 KiB
Python

"""Tests for view boundary segmentation."""
import pymupdf
import pytest
from pdf2imos.extract.geometry import extract_geometry
from pdf2imos.extract.text import extract_text
from pdf2imos.interpret.title_block import detect_title_block
from pdf2imos.interpret.view_segmenter import (
_cluster_area,
_cluster_bbox,
_cluster_paths,
_clusters_are_close,
segment_views,
)
from pdf2imos.models import PageExtraction, RawPath, RawText, ViewRegion, ViewType
def make_filtered_extraction(pdf_path):
"""Run full pre-processing: extract → filter title block."""
doc = pymupdf.open(str(pdf_path))
page = doc[0]
geo = extract_geometry(page)
texts = extract_text(page)
extraction = PageExtraction(
paths=geo.paths,
texts=tuple(texts),
page_width=geo.page_width,
page_height=geo.page_height,
)
_, filtered = detect_title_block(extraction)
return filtered
# ---------------------------------------------------------------------------
# Helper to build synthetic RawPath for unit tests
# ---------------------------------------------------------------------------
def _make_path(x0, y0, x1, y1, width=1.0):
"""Create a minimal RawPath with given bounding box."""
return RawPath(
items=(("l", (x0, y0), (x1, y1)),),
color=(0.0, 0.0, 0.0),
fill=None,
dashes="",
width=width,
rect=(x0, y0, x1, y1),
)
# ===========================================================================
# Unit tests for clustering helpers
# ===========================================================================
class TestClusterPaths:
def test_empty_input(self):
assert _cluster_paths([]) == []
def test_single_path(self):
p = _make_path(0, 0, 10, 10)
result = _cluster_paths([p])
assert len(result) == 1
assert result[0] == [p]
def test_close_paths_merge(self):
"""Paths within gap_threshold merge into one cluster."""
p1 = _make_path(0, 0, 10, 10)
p2 = _make_path(15, 0, 25, 10) # 5pt gap from p1
result = _cluster_paths([p1, p2], gap_threshold=10.0)
assert len(result) == 1
def test_far_paths_separate(self):
"""Paths beyond gap_threshold stay as separate clusters."""
p1 = _make_path(0, 0, 10, 10)
p2 = _make_path(100, 0, 110, 10) # 90pt gap from p1
result = _cluster_paths([p1, p2], gap_threshold=25.0)
assert len(result) == 2
def test_chain_merge(self):
"""A-close-to-B and B-close-to-C → all in one cluster."""
p1 = _make_path(0, 0, 10, 10)
p2 = _make_path(20, 0, 30, 10) # 10pt from p1
p3 = _make_path(40, 0, 50, 10) # 10pt from p2
result = _cluster_paths([p1, p2, p3], gap_threshold=15.0)
assert len(result) == 1
def test_two_separate_clusters(self):
"""Two groups far apart → two clusters."""
group_a = [_make_path(0, 0, 10, 10), _make_path(5, 5, 15, 15)]
group_b = [_make_path(200, 200, 210, 210), _make_path(205, 205, 215, 215)]
result = _cluster_paths(group_a + group_b, gap_threshold=25.0)
assert len(result) == 2
class TestClusterBbox:
def test_single_path(self):
p = _make_path(5, 10, 20, 30)
assert _cluster_bbox([p]) == (5, 10, 20, 30)
def test_multiple_paths(self):
p1 = _make_path(0, 0, 10, 10)
p2 = _make_path(20, 20, 30, 30)
assert _cluster_bbox([p1, p2]) == (0, 0, 30, 30)
class TestClusterArea:
def test_area_computation(self):
cluster = [_make_path(0, 0, 10, 20)]
assert _cluster_area(cluster) == pytest.approx(200.0)
def test_zero_area(self):
cluster = [_make_path(5, 5, 5, 5)]
assert _cluster_area(cluster) == pytest.approx(0.0)
class TestClustersAreClose:
def test_overlapping(self):
a = [_make_path(0, 0, 20, 20)]
b = [_make_path(10, 10, 30, 30)]
assert _clusters_are_close(a, b, 5.0)
def test_adjacent(self):
a = [_make_path(0, 0, 10, 10)]
b = [_make_path(10, 0, 20, 10)] # 0 gap
assert _clusters_are_close(a, b, 5.0)
def test_small_gap(self):
a = [_make_path(0, 0, 10, 10)]
b = [_make_path(13, 0, 23, 10)] # 3pt gap
assert _clusters_are_close(a, b, 5.0)
def test_large_gap(self):
a = [_make_path(0, 0, 10, 10)]
b = [_make_path(50, 0, 60, 10)] # 40pt gap
assert not _clusters_are_close(a, b, 25.0)
# ===========================================================================
# Integration tests with real PDFs
# ===========================================================================
class TestSegmentViews:
def test_returns_list(self, simple_panel_pdf):
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
assert isinstance(result, list)
def test_views_are_view_regions(self, simple_panel_pdf):
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
assert all(isinstance(v, ViewRegion) for v in result)
def test_detects_at_least_two_views(self, simple_panel_pdf):
"""Must detect at least 2 views (FRONT + one more)."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
assert len(result) >= 2, f"Expected >=2 views, got {len(result)}"
def test_front_view_present(self, simple_panel_pdf):
"""FRONT view must always be detected."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
view_types = {v.view_type for v in result}
assert ViewType.FRONT in view_types, f"No FRONT view. Got: {view_types}"
def test_front_view_is_lowest(self, simple_panel_pdf):
"""FRONT view should have the lowest y-center (bottom of page in CAD)."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
if len(result) < 2:
pytest.skip("Less than 2 views detected")
front = next((v for v in result if v.view_type == ViewType.FRONT), None)
assert front is not None
front_cy = (front.bounds[1] + front.bounds[3]) / 2
for v in result:
if v.view_type != ViewType.FRONT:
other_cy = (v.bounds[1] + v.bounds[3]) / 2
# Front should have y-center <= others (or at least not much higher)
# Allow some tolerance since SIDE may have similar y
if v.view_type == ViewType.TOP:
assert front_cy < other_cy, (
f"FRONT cy={front_cy} should be below TOP cy={other_cy}"
)
def test_each_view_has_paths(self, simple_panel_pdf):
"""Each detected view has at least one path."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
for view in result:
assert len(view.paths) > 0, f"{view.view_type} has no paths"
def test_all_fixtures_segmentable(self, all_fixture_pdfs):
"""All fixture PDFs can be segmented without crashing."""
for pdf_path in all_fixture_pdfs:
filtered = make_filtered_extraction(pdf_path)
result = segment_views(filtered)
assert isinstance(result, list)
def test_cabinet_has_multiple_views(self, cabinet_basic_pdf):
"""Cabinet drawing should detect multiple views."""
filtered = make_filtered_extraction(cabinet_basic_pdf)
result = segment_views(filtered)
assert len(result) >= 2
def test_view_bounds_are_reasonable(self, simple_panel_pdf):
"""View bounds should be within page dimensions."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
for view in result:
x0, y0, x1, y1 = view.bounds
assert x0 >= -5, f"x0 out of range: {x0}"
assert y0 >= -5, f"y0 out of range: {y0}"
assert x1 <= filtered.page_width + 5, f"x1 out of range: {x1}"
assert y1 <= filtered.page_height + 5, f"y1 out of range: {y1}"
def test_views_dont_overlap_much(self, simple_panel_pdf):
"""Distinct views should not overlap significantly."""
filtered = make_filtered_extraction(simple_panel_pdf)
result = segment_views(filtered)
if len(result) < 2:
pytest.skip("Less than 2 views")
for i, v1 in enumerate(result):
for v2 in result[i + 1 :]:
overlap = _bbox_overlap_area(v1.bounds, v2.bounds)
a1 = _bbox_area(v1.bounds)
a2 = _bbox_area(v2.bounds)
min_area = min(a1, a2) if min(a1, a2) > 0 else 1
# Overlap should be < 20% of smaller view
assert overlap / min_area < 0.2, (
f"{v1.view_type} and {v2.view_type} overlap "
f"{overlap / min_area:.1%}"
)
class TestSegmentViewsEmpty:
def test_empty_extraction(self):
"""Empty extraction returns empty list."""
extraction = PageExtraction(
paths=(), texts=(), page_width=595, page_height=842
)
result = segment_views(extraction)
assert result == []
class TestSegmentViewsSynthetic:
"""Test with synthetic data mimicking third-angle projection layout."""
def _make_three_view_extraction(self):
"""Create extraction with clear front/top/side layout.
Layout (CAD coords, y-up):
Top view: x=100-300, y=400-450 (above front)
Front view: x=100-300, y=100-350 (bottom-left)
Side view: x=350-400, y=100-350 (right of front)
"""
# Front view paths (large rectangle)
front_paths = [
_make_path(100, 100, 300, 350),
_make_path(120, 120, 280, 330),
]
# Top view paths (above front)
top_paths = [
_make_path(100, 400, 300, 450),
_make_path(120, 410, 280, 440),
]
# Side view paths (right of front)
side_paths = [
_make_path(350, 100, 400, 350),
_make_path(355, 120, 395, 330),
]
all_paths = tuple(front_paths + top_paths + side_paths)
return PageExtraction(
paths=all_paths,
texts=(),
page_width=595,
page_height=842,
)
def test_detects_three_views(self):
extraction = self._make_three_view_extraction()
result = segment_views(extraction)
assert len(result) == 3
def test_front_is_bottom_left(self):
extraction = self._make_three_view_extraction()
result = segment_views(extraction)
front = next((v for v in result if v.view_type == ViewType.FRONT), None)
assert front is not None
# Front should be around y=100-350
assert front.bounds[1] < 200, f"Front y0={front.bounds[1]} too high"
def test_top_is_above_front(self):
extraction = self._make_three_view_extraction()
result = segment_views(extraction)
front = next((v for v in result if v.view_type == ViewType.FRONT), None)
top = next((v for v in result if v.view_type == ViewType.TOP), None)
assert front is not None
assert top is not None
front_cy = (front.bounds[1] + front.bounds[3]) / 2
top_cy = (top.bounds[1] + top.bounds[3]) / 2
assert top_cy > front_cy, "TOP should be above FRONT"
def test_side_is_right_of_front(self):
extraction = self._make_three_view_extraction()
result = segment_views(extraction)
front = next((v for v in result if v.view_type == ViewType.FRONT), None)
side = next((v for v in result if v.view_type == ViewType.SIDE), None)
assert front is not None
assert side is not None
front_cx = (front.bounds[0] + front.bounds[2]) / 2
side_cx = (side.bounds[0] + side.bounds[2]) / 2
assert side_cx > front_cx, "SIDE should be right of FRONT"
def test_text_assignment_with_coord_conversion(self):
"""Texts in PDF coords should be assigned to correct views."""
extraction = self._make_three_view_extraction()
# Add a text that (in PDF coords) lands in the front view area
# Front view in CAD: y=100-350
# In PDF coords: y = page_h - cad_y, so y = 842-350=492 to 842-100=742
text_in_front = RawText(
text="600",
bbox=(150.0, 600.0, 170.0, 612.0), # PDF coords
font="Helvetica",
size=10.0,
color=0,
)
# Text in top view area
# Top in CAD: y=400-450
# In PDF coords: y = 842-450=392 to 842-400=442
text_in_top = RawText(
text="720",
bbox=(150.0, 400.0, 170.0, 412.0), # PDF coords
font="Helvetica",
size=10.0,
color=0,
)
extraction_with_text = PageExtraction(
paths=extraction.paths,
texts=(text_in_front, text_in_top),
page_width=595,
page_height=842,
)
result = segment_views(extraction_with_text)
front = next((v for v in result if v.view_type == ViewType.FRONT), None)
top = next((v for v in result if v.view_type == ViewType.TOP), None)
assert front is not None
# "600" should be assigned to front view
front_text_vals = [t.text for t in front.texts]
assert "600" in front_text_vals, (
f"Text '600' not in front view. Front texts: {front_text_vals}"
)
if top is not None:
top_text_vals = [t.text for t in top.texts]
assert "720" in top_text_vals, (
f"Text '720' not in top view. Top texts: {top_text_vals}"
)
# ---------------------------------------------------------------------------
# Test helpers
# ---------------------------------------------------------------------------
def _bbox_overlap_area(a, b):
"""Compute overlap area of two bounding boxes."""
x0 = max(a[0], b[0])
y0 = max(a[1], b[1])
x1 = min(a[2], b[2])
y1 = min(a[3], b[3])
if x1 <= x0 or y1 <= y0:
return 0.0
return (x1 - x0) * (y1 - y0)
def _bbox_area(bbox):
"""Compute area of a bounding box."""
return abs(bbox[2] - bbox[0]) * abs(bbox[3] - bbox[1])