#!/usr/bin/env python3 """Generate synthetic test PDF fixtures for pdf2imos tests. Creates 4 realistic AutoCAD-like technical drawing PDFs with vector geometry and dimension text. All content is vector-based (no raster, no OCR needed). PDF page coordinate system: origin TOP-LEFT, y increases DOWNWARD. """ import pymupdf from pathlib import Path FIXTURES_DIR = Path(__file__).parent / "fixtures" / "input" # A4 portrait dimensions in points A4_W, A4_H = 595, 842 # --------------------------------------------------------------------------- # Drawing helpers # --------------------------------------------------------------------------- def _draw_arrowhead(shape, tip_x: float, tip_y: float, direction: str, size: float = 4) -> None: """Draw a filled triangular arrowhead. direction: 'right', 'left', 'up', 'down' """ p = pymupdf.Point half = size * 0.4 if direction == "right": pts = [p(tip_x, tip_y), p(tip_x - size, tip_y - half), p(tip_x - size, tip_y + half)] elif direction == "left": pts = [p(tip_x, tip_y), p(tip_x + size, tip_y - half), p(tip_x + size, tip_y + half)] elif direction == "down": pts = [p(tip_x, tip_y), p(tip_x - half, tip_y - size), p(tip_x + half, tip_y - size)] elif direction == "up": pts = [p(tip_x, tip_y), p(tip_x - half, tip_y + size), p(tip_x + half, tip_y + size)] else: return pts.append(pts[0]) # close triangle shape.draw_polyline(pts) shape.finish(color=(0, 0, 0), fill=(0, 0, 0), width=0) def _draw_hdim(page, x1: float, x2: float, y_obj: float, y_dim: float, text: str, fontsize: float = 8) -> None: """Draw a horizontal dimension (extension lines + dim line + arrows + text). x1, x2: horizontal extents on the object edge y_obj: y of the object edge (where extension lines start) y_dim: y of the dimension line (below/above the object) """ ext_gap = 2 # small gap between object and extension line start ext_overshoot = 3 # extension line extends past dim line sign = 1 if y_dim > y_obj else -1 # direction of extension # Extension lines page.draw_line((x1, y_obj + sign * ext_gap), (x1, y_dim + sign * ext_overshoot), color=(0, 0, 0), width=0.25) page.draw_line((x2, y_obj + sign * ext_gap), (x2, y_dim + sign * ext_overshoot), color=(0, 0, 0), width=0.25) # Dimension line page.draw_line((x1, y_dim), (x2, y_dim), color=(0, 0, 0), width=0.25) # Arrowheads shape = page.new_shape() _draw_arrowhead(shape, x1, y_dim, "right") _draw_arrowhead(shape, x2, y_dim, "left") shape.commit() # Dimension text — centered above the dimension line text_x = (x1 + x2) / 2 - len(text) * fontsize * 0.15 text_y = y_dim + sign * (fontsize + 2) page.insert_text((text_x, text_y), text, fontsize=fontsize, color=(0, 0, 0)) def _draw_vdim(page, y1: float, y2: float, x_obj: float, x_dim: float, text: str, fontsize: float = 8) -> None: """Draw a vertical dimension (extension lines + dim line + arrows + text). y1, y2: vertical extents on the object edge x_obj: x of the object edge (where extension lines start) x_dim: x of the dimension line (left/right of the object) """ ext_gap = 2 ext_overshoot = 3 sign = 1 if x_dim > x_obj else -1 # Extension lines page.draw_line((x_obj + sign * ext_gap, y1), (x_dim + sign * ext_overshoot, y1), color=(0, 0, 0), width=0.25) page.draw_line((x_obj + sign * ext_gap, y2), (x_dim + sign * ext_overshoot, y2), color=(0, 0, 0), width=0.25) # Dimension line page.draw_line((x_dim, y1), (x_dim, y2), color=(0, 0, 0), width=0.25) # Arrowheads shape = page.new_shape() _draw_arrowhead(shape, x_dim, y1, "down") _draw_arrowhead(shape, x_dim, y2, "up") shape.commit() # Dimension text — to the side of the dim line text_x = x_dim + sign * 4 text_y = (y1 + y2) / 2 + fontsize * 0.3 page.insert_text((text_x, text_y), text, fontsize=fontsize, color=(0, 0, 0)) def _draw_title_block(page, x0: float, y0: float, x1: float, y1: float, lines: list[str]) -> None: """Draw a title block rectangle with text lines.""" page.draw_rect(pymupdf.Rect(x0, y0, x1, y1), color=(0, 0, 0), width=1.0) # Horizontal divider row_h = (y1 - y0) / max(len(lines), 1) for i, text in enumerate(lines): ty = y0 + row_h * i + row_h * 0.6 page.insert_text((x0 + 5, ty), text, fontsize=7, color=(0, 0, 0)) if i > 0: page.draw_line((x0, y0 + row_h * i), (x1, y0 + row_h * i), color=(0, 0, 0), width=0.5) def _draw_border(page) -> None: """Draw a standard drawing border with margin.""" margin = 20 page.draw_rect(pymupdf.Rect(margin, margin, A4_W - margin, A4_H - margin), color=(0, 0, 0), width=1.0) # --------------------------------------------------------------------------- # PDF generators # --------------------------------------------------------------------------- def create_simple_panel() -> None: """Create simple_panel.pdf: 600×720×18mm flat panel with 3 orthographic views. Third-angle projection: front (W×H), top (W×D), side (D×H). Scale: 0.3 pt/mm. """ scale = 0.3 w_pt = 600 * scale # 180 h_pt = 720 * scale # 216 d_pt = 18 * scale # 5.4 # View origins (top-left corners) front_x, front_y = 80, 350 top_x, top_y = 80, front_y - 10 - d_pt # above front, 10pt gap side_x, side_y = front_x + w_pt + 10, front_y # right of front, 10pt gap doc = pymupdf.open() page = doc.new_page(width=A4_W, height=A4_H) _draw_border(page) # --- Front view (W × H) --- fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt) page.draw_rect(fr, color=(0, 0, 0), width=0.5) # Hidden lines (dashed) — simulate back edges mid_x = front_x + w_pt / 2 page.draw_line((mid_x, front_y), (mid_x, front_y + h_pt), color=(0, 0, 0), width=0.3, dashes="[3 2] 0") # Centerlines (dash-dot) page.draw_line((front_x, front_y + h_pt / 2), (front_x + w_pt, front_y + h_pt / 2), color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0") # --- Top view (W × D) --- tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt) page.draw_rect(tr, color=(0, 0, 0), width=0.5) # --- Side view (D × H) --- sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt) page.draw_rect(sr, color=(0, 0, 0), width=0.5) # --- Dimensions --- # Width dimension below front view _draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600") # Height dimension left of front view _draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720") # Depth dimension below side view _draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "18") # Depth dimension right of top view (vertical, showing D) _draw_vdim(page, top_y, top_y + d_pt, top_x + w_pt, top_x + w_pt + 15, "18") # Width dimension above top view (redundant, as in real drawings) _draw_hdim(page, top_x, top_x + w_pt, top_y, top_y - 15, "600") # Height dimension right of side view _draw_vdim(page, side_y, side_y + h_pt, side_x + d_pt, side_x + d_pt + 15, "720") # --- Title block --- _draw_title_block(page, 370, 730, 565, 820, [ "Part Name: side_panel", "Material: 18mm MDF", "Scale: 1:1", "Drawing: simple_panel", ]) out = FIXTURES_DIR / "simple_panel.pdf" doc.save(str(out)) doc.close() print(f" Created {out}") def create_cabinet_basic() -> None: """Create cabinet_basic.pdf: 600×720×400mm cabinet with material/edgebanding. Third-angle projection with larger depth. Scale: 0.25 pt/mm. """ scale = 0.25 w_pt = 600 * scale # 150 h_pt = 720 * scale # 180 d_pt = 400 * scale # 100 front_x, front_y = 80, 380 top_x, top_y = 80, front_y - 10 - d_pt # 270 side_x, side_y = front_x + w_pt + 10, front_y # 240, 380 doc = pymupdf.open() page = doc.new_page(width=A4_W, height=A4_H) _draw_border(page) # --- Front view (W × H) --- fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt) page.draw_rect(fr, color=(0, 0, 0), width=0.5) # Internal shelves (hidden lines) for i in range(1, 4): sy = front_y + h_pt * i / 4 page.draw_line((front_x, sy), (front_x + w_pt, sy), color=(0, 0, 0), width=0.3, dashes="[3 2] 0") # Centerlines page.draw_line((front_x + w_pt / 2, front_y), (front_x + w_pt / 2, front_y + h_pt), color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0") # --- Top view (W × D) --- tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt) page.draw_rect(tr, color=(0, 0, 0), width=0.5) # Back panel offset (dashed) inset = 18 * scale # 18mm back panel inset page.draw_line((top_x, top_y + inset), (top_x + w_pt, top_y + inset), color=(0, 0, 0), width=0.3, dashes="[3 2] 0") # --- Side view (D × H) --- sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt) page.draw_rect(sr, color=(0, 0, 0), width=0.5) # Internal shelves (hidden) for i in range(1, 4): sy = side_y + h_pt * i / 4 page.draw_line((side_x, sy), (side_x + d_pt, sy), color=(0, 0, 0), width=0.3, dashes="[3 2] 0") # Back panel line page.draw_line((side_x + d_pt - inset, side_y), (side_x + d_pt - inset, side_y + h_pt), color=(0, 0, 0), width=0.3, dashes="[3 2] 0") # --- Dimensions --- _draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 25, "600") _draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 25, "720") _draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 25, "400") # --- Material & edgebanding annotations --- page.insert_text((80, front_y + h_pt + 55), "Material: 18mm white melamine MDF", fontsize=8, color=(0, 0, 0)) page.insert_text((80, front_y + h_pt + 68), "EB: 2mm ABS white (top, bottom, left, right)", fontsize=8, color=(0, 0, 0)) page.insert_text((80, front_y + h_pt + 81), "Back Panel: 3mm HDF", fontsize=8, color=(0, 0, 0)) # --- Title block --- _draw_title_block(page, 370, 730, 565, 820, [ "Part Name: cabinet_carcass", "Material: 18mm melamine MDF", "Edgebanding: 2mm ABS white", "Scale: 1:1", ]) out = FIXTURES_DIR / "cabinet_basic.pdf" doc.save(str(out)) doc.close() print(f" Created {out}") def create_panel_with_drilling() -> None: """Create panel_with_drilling.pdf: 600×720×18mm panel with shelf pin holes. Same layout as simple_panel but with 4 shelf pin drilling circles and drilling annotation text. """ scale = 0.3 w_pt = 600 * scale # 180 h_pt = 720 * scale # 216 d_pt = 18 * scale # 5.4 front_x, front_y = 80, 350 top_x, top_y = 80, front_y - 10 - d_pt side_x, side_y = front_x + w_pt + 10, front_y doc = pymupdf.open() page = doc.new_page(width=A4_W, height=A4_H) _draw_border(page) # --- Front view --- fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt) page.draw_rect(fr, color=(0, 0, 0), width=0.5) # Centerlines page.draw_line((front_x + w_pt / 2, front_y), (front_x + w_pt / 2, front_y + h_pt), color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0") page.draw_line((front_x, front_y + h_pt / 2), (front_x + w_pt, front_y + h_pt / 2), color=(0, 0, 0), width=0.25, dashes="[6 2 2 2] 0") # --- 4 shelf pin holes (in front view) --- # Positions: 37mm from each side edge, at 1/4, 1/2, 3/4, and near-top heights hole_x_left = front_x + 37 * scale # 37mm from left hole_x_right = front_x + (600 - 37) * scale # 37mm from right hole_positions_y = [ front_y + 180 * scale, # 180mm from top front_y + 360 * scale, # 360mm from top front_y + 540 * scale, # 540mm from top front_y + 640 * scale, # 640mm from top (near bottom) ] hole_radius = 5 * scale / 2 # 5mm diameter → 2.5mm radius → 0.75pt for hy in hole_positions_y: page.draw_circle((hole_x_left, hy), hole_radius, color=(0, 0, 0), width=0.3) page.draw_circle((hole_x_right, hy), hole_radius, color=(0, 0, 0), width=0.3) # --- Top view --- tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt) page.draw_rect(tr, color=(0, 0, 0), width=0.5) # --- Side view --- sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt) page.draw_rect(sr, color=(0, 0, 0), width=0.5) # --- Dimensions --- _draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600") _draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720") _draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "18") # --- Drilling annotation --- # Leader line from hole cluster to annotation text leader_start_x = hole_x_right + 5 leader_start_y = hole_positions_y[1] leader_end_x = front_x + w_pt + 40 leader_end_y = hole_positions_y[1] - 30 page.draw_line((leader_start_x, leader_start_y), (leader_end_x, leader_end_y), color=(0, 0, 0), width=0.25) page.insert_text((leader_end_x + 3, leader_end_y), "4x", fontsize=8, color=(0, 0, 0)) page.insert_text((leader_end_x + 3, leader_end_y + 11), "D5mm", fontsize=8, color=(0, 0, 0)) page.insert_text((leader_end_x + 3, leader_end_y + 22), "12mm deep", fontsize=8, color=(0, 0, 0)) # Hole spacing dimension (vertical between first two holes) _draw_vdim(page, hole_positions_y[0], hole_positions_y[1], hole_x_left, hole_x_left - 15, "180") # Edge offset dimension (horizontal from left edge to hole center) _draw_hdim(page, front_x, hole_x_left, front_y - 10, front_y - 25, "37") # --- Title block --- _draw_title_block(page, 370, 730, 565, 820, [ "Part Name: shelf_side", "Material: 18mm MDF", "Drilling: 4x shelf pins", "Scale: 1:1", ]) out = FIXTURES_DIR / "panel_with_drilling.pdf" doc.save(str(out)) doc.close() print(f" Created {out}") def create_edge_cases() -> None: """Create edge_cases.pdf: 600×720×3mm back panel (very thin) with closely spaced dims. Tests edge cases: - Very thin panel (3mm depth → nearly invisible in side/top views) - Closely spaced dimension text - Multiple redundant dimensions """ scale = 0.3 w_pt = 600 * scale # 180 h_pt = 720 * scale # 216 d_pt = 3 * scale # 0.9 — nearly a line! front_x, front_y = 80, 350 top_x, top_y = 80, front_y - 10 - d_pt side_x, side_y = front_x + w_pt + 10, front_y doc = pymupdf.open() page = doc.new_page(width=A4_W, height=A4_H) _draw_border(page) # --- Front view (W × H) — looks the same as any panel from the front --- fr = pymupdf.Rect(front_x, front_y, front_x + w_pt, front_y + h_pt) page.draw_rect(fr, color=(0, 0, 0), width=0.5) # Cross-hatch pattern to indicate thin material for i in range(0, int(w_pt), 15): page.draw_line((front_x + i, front_y), (front_x + i + 10, front_y + 10), color=(0.6, 0.6, 0.6), width=0.15) # --- Top view (W × D = 600 × 3mm → 180pt × 0.9pt) --- # This is almost a single line — the edge case! tr = pymupdf.Rect(top_x, top_y, top_x + w_pt, top_y + d_pt) page.draw_rect(tr, color=(0, 0, 0), width=0.5) # --- Side view (D × H = 3mm × 720mm → 0.9pt × 216pt) --- sr = pymupdf.Rect(side_x, side_y, side_x + d_pt, side_y + h_pt) page.draw_rect(sr, color=(0, 0, 0), width=0.5) # --- Primary dimensions --- _draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 20, "600") _draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 20, "720") _draw_hdim(page, side_x, side_x + d_pt, side_y + h_pt, side_y + h_pt + 20, "3") # --- Closely spaced redundant dimensions (edge case: overlapping text) --- # Second set of dimensions slightly offset _draw_hdim(page, front_x, front_x + w_pt, front_y + h_pt, front_y + h_pt + 35, "600.0") _draw_vdim(page, front_y, front_y + h_pt, front_x, front_x - 40, "720.0") # Half-dimension (partial measurement) _draw_hdim(page, front_x, front_x + w_pt / 2, front_y + h_pt, front_y + h_pt + 50, "300") # --- Material annotation --- page.insert_text((80, front_y + h_pt + 70), "Material: 3mm HDF back panel", fontsize=8, color=(0, 0, 0)) page.insert_text((80, front_y + h_pt + 83), "Note: Thin panel, handle with care", fontsize=8, color=(0, 0, 0)) # --- Title block --- _draw_title_block(page, 370, 730, 565, 820, [ "Part Name: back_panel", "Material: 3mm HDF", "Scale: 1:1", "Drawing: edge_cases", ]) out = FIXTURES_DIR / "edge_cases.pdf" doc.save(str(out)) doc.close() print(f" Created {out}") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- if __name__ == "__main__": FIXTURES_DIR.mkdir(parents=True, exist_ok=True) print("Generating test fixture PDFs...") create_simple_panel() create_cabinet_basic() create_panel_with_drilling() create_edge_cases() print("Fixtures generated successfully")