feat: pdf2cad

This commit is contained in:
2026-03-03 21:24:02 +00:00
commit 112213da6e
61 changed files with 7290 additions and 0 deletions

469
tests/generate_fixtures.py Normal file
View File

@@ -0,0 +1,469 @@
#!/usr/bin/env python3
"""Generate synthetic test PDF fixtures for pdf2imos tests.
Creates 4 realistic AutoCAD-like technical drawing PDFs with vector geometry
and dimension text. All content is vector-based (no raster, no OCR needed).
PDF page coordinate system: origin TOP-LEFT, y increases DOWNWARD.
"""
import pymupdf
from pathlib import Path
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "input"
# A4 portrait dimensions in points
A4_W, A4_H = 595, 842
# ---------------------------------------------------------------------------
# Drawing helpers
# ---------------------------------------------------------------------------
def _draw_arrowhead(shape, tip_x: float, tip_y: float, direction: str, size: float = 4) -> None:
"""Draw a filled triangular arrowhead.
direction: 'right', 'left', 'up', 'down'
"""
p = pymupdf.Point
half = size * 0.4
if direction == "right":
pts = [p(tip_x, tip_y), p(tip_x - size, tip_y - half), p(tip_x - size, tip_y + half)]
elif direction == "left":
pts = [p(tip_x, tip_y), p(tip_x + size, tip_y - half), p(tip_x + size, tip_y + half)]
elif direction == "down":
pts = [p(tip_x, tip_y), p(tip_x - half, tip_y - size), p(tip_x + half, tip_y - size)]
elif direction == "up":
pts = [p(tip_x, tip_y), p(tip_x - half, tip_y + size), p(tip_x + half, tip_y + size)]
else:
return
pts.append(pts[0]) # close triangle
shape.draw_polyline(pts)
shape.finish(color=(0, 0, 0), fill=(0, 0, 0), width=0)
def _draw_hdim(page, x1: float, x2: float, y_obj: float, y_dim: float,
text: str, fontsize: float = 8) -> None:
"""Draw a horizontal dimension (extension lines + dim line + arrows + text).
x1, x2: horizontal extents on the object edge
y_obj: y of the object edge (where extension lines start)
y_dim: y of the dimension line (below/above the object)
"""
ext_gap = 2 # small gap between object and extension line start
ext_overshoot = 3 # extension line extends past dim line
sign = 1 if y_dim > y_obj else -1 # direction of extension
# Extension lines
page.draw_line((x1, y_obj + sign * ext_gap), (x1, y_dim + sign * ext_overshoot),
color=(0, 0, 0), width=0.25)
page.draw_line((x2, y_obj + sign * ext_gap), (x2, y_dim + sign * ext_overshoot),
color=(0, 0, 0), width=0.25)
# Dimension line
page.draw_line((x1, y_dim), (x2, y_dim), color=(0, 0, 0), width=0.25)
# Arrowheads
shape = page.new_shape()
_draw_arrowhead(shape, x1, y_dim, "right")
_draw_arrowhead(shape, x2, y_dim, "left")
shape.commit()
# Dimension text — centered above the dimension line
text_x = (x1 + x2) / 2 - len(text) * fontsize * 0.15
text_y = y_dim + sign * (fontsize + 2)
page.insert_text((text_x, text_y), text, fontsize=fontsize, color=(0, 0, 0))
def _draw_vdim(page, y1: float, y2: float, x_obj: float, x_dim: float,
text: str, fontsize: float = 8) -> None:
"""Draw a vertical dimension (extension lines + dim line + arrows + text).
y1, y2: vertical extents on the object edge
x_obj: x of the object edge (where extension lines start)
x_dim: x of the dimension line (left/right of the object)
"""
ext_gap = 2
ext_overshoot = 3
sign = 1 if x_dim > x_obj else -1
# Extension lines
page.draw_line((x_obj + sign * ext_gap, y1), (x_dim + sign * ext_overshoot, y1),
color=(0, 0, 0), width=0.25)
page.draw_line((x_obj + sign * ext_gap, y2), (x_dim + sign * ext_overshoot, y2),
color=(0, 0, 0), width=0.25)
# Dimension line
page.draw_line((x_dim, y1), (x_dim, y2), color=(0, 0, 0), width=0.25)
# Arrowheads
shape = page.new_shape()
_draw_arrowhead(shape, x_dim, y1, "down")
_draw_arrowhead(shape, x_dim, y2, "up")
shape.commit()
# Dimension text — to the side of the dim line
text_x = x_dim + sign * 4
text_y = (y1 + y2) / 2 + fontsize * 0.3
page.insert_text((text_x, text_y), text, fontsize=fontsize, color=(0, 0, 0))
def _draw_title_block(page, x0: float, y0: float, x1: float, y1: float,
lines: list[str]) -> None:
"""Draw a title block rectangle with text lines."""
page.draw_rect(pymupdf.Rect(x0, y0, x1, y1), color=(0, 0, 0), width=1.0)
# Horizontal divider
row_h = (y1 - y0) / max(len(lines), 1)
for i, text in enumerate(lines):
ty = y0 + row_h * i + row_h * 0.6
page.insert_text((x0 + 5, ty), text, fontsize=7, color=(0, 0, 0))
if i > 0:
page.draw_line((x0, y0 + row_h * i), (x1, y0 + row_h * i),
color=(0, 0, 0), width=0.5)
def _draw_border(page) -> None:
"""Draw a standard drawing border with margin."""
margin = 20
page.draw_rect(pymupdf.Rect(margin, margin, A4_W - margin, A4_H - margin),
color=(0, 0, 0), width=1.0)
# ---------------------------------------------------------------------------
# PDF generators
# ---------------------------------------------------------------------------
def create_simple_panel() -> None:
"""Create simple_panel.pdf: 600×720×18mm flat panel with 3 orthographic views.
Third-angle projection: front (W×H), top (W×D), side (D×H).
Scale: 0.3 pt/mm.
"""
scale = 0.3
w_pt = 600 * scale # 180
h_pt = 720 * scale # 216
d_pt = 18 * scale # 5.4
# View origins (top-left corners)
front_x, front_y = 80, 350
top_x, top_y = 80, front_y - 10 - d_pt # above front, 10pt gap
side_x, side_y = front_x + w_pt + 10, front_y # right of front, 10pt gap
doc = pymupdf.open()
page = doc.new_page(width=A4_W, height=A4_H)
_draw_border(page)
# --- Front view (W × H) ---
fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt)
page.draw_rect(fr, color=(0, 0, 0), width=0.5)
# Hidden lines (dashed) — simulate back edges
mid_x = front_x + w_pt / 2
page.draw_line((mid_x, front_y), (mid_x, front_y + h_pt),
color=(0, 0, 0), width=0.3, dashes="[3 2] 0")
# Centerlines (dash-dot)
page.draw_line((front_x, front_y + h_pt / 2),
(front_x + w_pt, front_y + h_pt / 2),
color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0")
# --- Top view (W × D) ---
tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt)
page.draw_rect(tr, color=(0, 0, 0), width=0.5)
# --- Side view (D × H) ---
sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt)
page.draw_rect(sr, color=(0, 0, 0), width=0.5)
# --- Dimensions ---
# Width dimension below front view
_draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600")
# Height dimension left of front view
_draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720")
# Depth dimension below side view
_draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "18")
# Depth dimension right of top view (vertical, showing D)
_draw_vdim(page, top_y, top_y + d_pt, top_x + w_pt, top_x + w_pt + 15, "18")
# Width dimension above top view (redundant, as in real drawings)
_draw_hdim(page, top_x, top_x + w_pt, top_y, top_y - 15, "600")
# Height dimension right of side view
_draw_vdim(page, side_y, side_y + h_pt, side_x + d_pt, side_x + d_pt + 15, "720")
# --- Title block ---
_draw_title_block(page, 370, 730, 565, 820, [
"Part Name: side_panel",
"Material: 18mm MDF",
"Scale: 1:1",
"Drawing: simple_panel",
])
out = FIXTURES_DIR / "simple_panel.pdf"
doc.save(str(out))
doc.close()
print(f" Created {out}")
def create_cabinet_basic() -> None:
"""Create cabinet_basic.pdf: 600×720×400mm cabinet with material/edgebanding.
Third-angle projection with larger depth. Scale: 0.25 pt/mm.
"""
scale = 0.25
w_pt = 600 * scale # 150
h_pt = 720 * scale # 180
d_pt = 400 * scale # 100
front_x, front_y = 80, 380
top_x, top_y = 80, front_y - 10 - d_pt # 270
side_x, side_y = front_x + w_pt + 10, front_y # 240, 380
doc = pymupdf.open()
page = doc.new_page(width=A4_W, height=A4_H)
_draw_border(page)
# --- Front view (W × H) ---
fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt)
page.draw_rect(fr, color=(0, 0, 0), width=0.5)
# Internal shelves (hidden lines)
for i in range(1, 4):
sy = front_y + h_pt * i / 4
page.draw_line((front_x, sy), (front_x + w_pt, sy),
color=(0, 0, 0), width=0.3, dashes="[3 2] 0")
# Centerlines
page.draw_line((front_x + w_pt / 2, front_y),
(front_x + w_pt / 2, front_y + h_pt),
color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0")
# --- Top view (W × D) ---
tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt)
page.draw_rect(tr, color=(0, 0, 0), width=0.5)
# Back panel offset (dashed)
inset = 18 * scale # 18mm back panel inset
page.draw_line((top_x, top_y + inset), (top_x + w_pt, top_y + inset),
color=(0, 0, 0), width=0.3, dashes="[3 2] 0")
# --- Side view (D × H) ---
sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt)
page.draw_rect(sr, color=(0, 0, 0), width=0.5)
# Internal shelves (hidden)
for i in range(1, 4):
sy = side_y + h_pt * i / 4
page.draw_line((side_x, sy), (side_x + d_pt, sy),
color=(0, 0, 0), width=0.3, dashes="[3 2] 0")
# Back panel line
page.draw_line((side_x + d_pt - inset, side_y), (side_x + d_pt - inset, side_y + h_pt),
color=(0, 0, 0), width=0.3, dashes="[3 2] 0")
# --- Dimensions ---
_draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 25, "600")
_draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 25, "720")
_draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 25, "400")
# --- Material & edgebanding annotations ---
page.insert_text((80, front_y + h_pt + 55), "Material: 18mm white melamine MDF",
fontsize=8, color=(0, 0, 0))
page.insert_text((80, front_y + h_pt + 68), "EB: 2mm ABS white (top, bottom, left, right)",
fontsize=8, color=(0, 0, 0))
page.insert_text((80, front_y + h_pt + 81), "Back Panel: 3mm HDF",
fontsize=8, color=(0, 0, 0))
# --- Title block ---
_draw_title_block(page, 370, 730, 565, 820, [
"Part Name: cabinet_carcass",
"Material: 18mm melamine MDF",
"Edgebanding: 2mm ABS white",
"Scale: 1:1",
])
out = FIXTURES_DIR / "cabinet_basic.pdf"
doc.save(str(out))
doc.close()
print(f" Created {out}")
def create_panel_with_drilling() -> None:
"""Create panel_with_drilling.pdf: 600×720×18mm panel with shelf pin holes.
Same layout as simple_panel but with 4 shelf pin drilling circles
and drilling annotation text.
"""
scale = 0.3
w_pt = 600 * scale # 180
h_pt = 720 * scale # 216
d_pt = 18 * scale # 5.4
front_x, front_y = 80, 350
top_x, top_y = 80, front_y - 10 - d_pt
side_x, side_y = front_x + w_pt + 10, front_y
doc = pymupdf.open()
page = doc.new_page(width=A4_W, height=A4_H)
_draw_border(page)
# --- Front view ---
fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt)
page.draw_rect(fr, color=(0, 0, 0), width=0.5)
# Centerlines
page.draw_line((front_x + w_pt / 2, front_y),
(front_x + w_pt / 2, front_y + h_pt),
color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0")
page.draw_line((front_x, front_y + h_pt / 2),
(front_x + w_pt, front_y + h_pt / 2),
color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0")
# --- 4 shelf pin holes (in front view) ---
# Positions: 37mm from each side edge, at 1/4, 1/2, 3/4, and near-top heights
hole_x_left = front_x + 37 * scale # 37mm from left
hole_x_right = front_x + (600 - 37) * scale # 37mm from right
hole_positions_y = [
front_y + 180 * scale, # 180mm from top
front_y + 360 * scale, # 360mm from top
front_y + 540 * scale, # 540mm from top
front_y + 640 * scale, # 640mm from top (near bottom)
]
hole_radius = 5 * scale / 2 # 5mm diameter → 2.5mm radius → 0.75pt
for hy in hole_positions_y:
page.draw_circle((hole_x_left, hy), hole_radius, color=(0, 0, 0), width=0.3)
page.draw_circle((hole_x_right, hy), hole_radius, color=(0, 0, 0), width=0.3)
# --- Top view ---
tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt)
page.draw_rect(tr, color=(0, 0, 0), width=0.5)
# --- Side view ---
sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt)
page.draw_rect(sr, color=(0, 0, 0), width=0.5)
# --- Dimensions ---
_draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600")
_draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720")
_draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "18")
# --- Drilling annotation ---
# Leader line from hole cluster to annotation text
leader_start_x = hole_x_right + 5
leader_start_y = hole_positions_y[1]
leader_end_x = front_x + w_pt + 40
leader_end_y = hole_positions_y[1] - 30
page.draw_line((leader_start_x, leader_start_y), (leader_end_x, leader_end_y),
color=(0, 0, 0), width=0.25)
page.insert_text((leader_end_x + 3, leader_end_y), "4x", fontsize=8, color=(0, 0, 0))
page.insert_text((leader_end_x + 3, leader_end_y + 11), "D5mm",
fontsize=8, color=(0, 0, 0))
page.insert_text((leader_end_x + 3, leader_end_y + 22), "12mm deep",
fontsize=8, color=(0, 0, 0))
# Hole spacing dimension (vertical between first two holes)
_draw_vdim(page, hole_positions_y[0], hole_positions_y[1],
hole_x_left, hole_x_left - 15, "180")
# Edge offset dimension (horizontal from left edge to hole center)
_draw_hdim(page, front_x, hole_x_left, front_y - 10, front_y - 25, "37")
# --- Title block ---
_draw_title_block(page, 370, 730, 565, 820, [
"Part Name: shelf_side",
"Material: 18mm MDF",
"Drilling: 4x shelf pins",
"Scale: 1:1",
])
out = FIXTURES_DIR / "panel_with_drilling.pdf"
doc.save(str(out))
doc.close()
print(f" Created {out}")
def create_edge_cases() -> None:
"""Create edge_cases.pdf: 600×720×3mm back panel (very thin) with closely spaced dims.
Tests edge cases:
- Very thin panel (3mm depth → nearly invisible in side/top views)
- Closely spaced dimension text
- Multiple redundant dimensions
"""
scale = 0.3
w_pt = 600 * scale # 180
h_pt = 720 * scale # 216
d_pt = 3 * scale # 0.9 — nearly a line!
front_x, front_y = 80, 350
top_x, top_y = 80, front_y - 10 - d_pt
side_x, side_y = front_x + w_pt + 10, front_y
doc = pymupdf.open()
page = doc.new_page(width=A4_W, height=A4_H)
_draw_border(page)
# --- Front view (W × H) — looks the same as any panel from the front ---
fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt)
page.draw_rect(fr, color=(0, 0, 0), width=0.5)
# Cross-hatch pattern to indicate thin material
for i in range(0, int(w_pt), 15):
page.draw_line((front_x + i, front_y), (front_x + i + 10, front_y + 10),
color=(0.6, 0.6, 0.6), width=0.15)
# --- Top view (W × D = 600 × 3mm → 180pt × 0.9pt) ---
# This is almost a single line — the edge case!
tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt)
page.draw_rect(tr, color=(0, 0, 0), width=0.5)
# --- Side view (D × H = 3mm × 720mm → 0.9pt × 216pt) ---
sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt)
page.draw_rect(sr, color=(0, 0, 0), width=0.5)
# --- Primary dimensions ---
_draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600")
_draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720")
_draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "3")
# --- Closely spaced redundant dimensions (edge case: overlapping text) ---
# Second set of dimensions slightly offset
_draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt,
front_y + h_pt + 35, "600.0")
_draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 40, "720.0")
# Half-dimension (partial measurement)
_draw_hdim(page, front_x, front_x + w_pt / 2, front_y + h_pt,
front_y + h_pt + 50, "300")
# --- Material annotation ---
page.insert_text((80, front_y + h_pt + 70), "Material: 3mm HDF back panel",
fontsize=8, color=(0, 0, 0))
page.insert_text((80, front_y + h_pt + 83), "Note: Thin panel, handle with care",
fontsize=8, color=(0, 0, 0))
# --- Title block ---
_draw_title_block(page, 370, 730, 565, 820, [
"Part Name: back_panel",
"Material: 3mm HDF",
"Scale: 1:1",
"Drawing: edge_cases",
])
out = FIXTURES_DIR / "edge_cases.pdf"
doc.save(str(out))
doc.close()
print(f" Created {out}")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
FIXTURES_DIR.mkdir(parents=True, exist_ok=True)
print("Generating test fixture PDFs...")
create_simple_panel()
create_cabinet_basic()
create_panel_with_drilling()
create_edge_cases()
print("Fixtures generated successfully")