diff --git a/packages/llm/src/pipeline.ts b/packages/llm/src/pipeline.ts new file mode 100644 index 0000000..8cc7f19 --- /dev/null +++ b/packages/llm/src/pipeline.ts @@ -0,0 +1,153 @@ +import type { CodeStructure, GeneratedDocs, FileNode } from "@codeboard/shared"; +import type { LLMProvider } from "./providers/base.js"; +import { buildArchitecturePrompt } from "./prompts/architecture-overview.js"; +import { buildModuleSummaryPrompt } from "./prompts/module-summary.js"; +import { buildPatternsPrompt } from "./prompts/patterns-detection.js"; +import { buildGettingStartedPrompt } from "./prompts/getting-started.js"; + +function parseSection(text: string, header: string): string { + const regex = new RegExp(`## ${header}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`); + const match = regex.exec(text); + return match?.[1]?.trim() ?? ""; +} + +function parseMermaid(text: string): string { + const match = /```mermaid\s*\n([\s\S]*?)```/.exec(text); + return match?.[1]?.trim() ?? "flowchart TD\n A[No diagram generated]"; +} + +function parseList(text: string): string[] { + return text + .split("\n") + .map((l) => l.replace(/^[-*]\s*/, "").trim()) + .filter(Boolean); +} + +export async function generateDocumentation( + codeStructure: CodeStructure, + provider: LLMProvider, + onProgress?: (stage: string, progress: number) => void +): Promise { + onProgress?.("architecture", 10); + + const archMessages = buildArchitecturePrompt(codeStructure); + const archResponse = await provider.chat(archMessages); + + const architectureOverview = parseSection(archResponse, "Architecture Overview"); + const techStackRaw = parseSection(archResponse, "Tech Stack"); + const architectureDiagram = parseMermaid(archResponse); + const techStack = techStackRaw.split(",").map((s) => s.trim()).filter(Boolean); + + onProgress?.("modules", 30); + + const moduleLimit = Math.min(codeStructure.modules.length, 10); + const moduleSummaries = await Promise.all( + codeStructure.modules.slice(0, moduleLimit).map(async (mod) => { + const moduleFiles: FileNode[] = codeStructure.files.filter((f) => + mod.files.includes(f.path) + ); + + if (moduleFiles.length === 0) { + return { + name: mod.name, + path: mod.path, + summary: "Empty module — no parseable files found.", + keyFiles: [], + publicApi: [], + dependsOn: [], + dependedBy: [], + }; + } + + const messages = buildModuleSummaryPrompt(mod, moduleFiles); + const response = await provider.chat(messages, { model: undefined }); + + const summary = parseSection(response, "Summary"); + const keyFilesRaw = parseList(parseSection(response, "Key Files")); + const publicApi = parseList(parseSection(response, "Public API")); + + const dependsOn = [ + ...new Set( + moduleFiles.flatMap((f) => + f.imports + .map((imp) => imp.source) + .filter((s) => !s.startsWith(".")) + ) + ), + ].slice(0, 10); + + const dependedBy = codeStructure.dependencies + .filter((d) => mod.files.includes(d.target)) + .map((d) => d.source) + .filter((s) => !mod.files.includes(s)) + .slice(0, 10); + + return { + name: mod.name, + path: mod.path, + summary: summary || "Module analyzed but no summary generated.", + keyFiles: keyFilesRaw.map((kf) => ({ path: kf, purpose: "" })), + publicApi, + dependsOn, + dependedBy, + }; + }) + ); + + onProgress?.("patterns", 60); + + const patternsMessages = buildPatternsPrompt(codeStructure); + const patternsResponse = await provider.chat(patternsMessages); + + const conventions = parseList(parseSection(patternsResponse, "Coding Conventions")); + const designPatterns = parseList(parseSection(patternsResponse, "Design Patterns")); + const architecturalDecisions = parseList(parseSection(patternsResponse, "Architectural Decisions")); + + onProgress?.("getting-started", 80); + + const gsMessages = buildGettingStartedPrompt( + codeStructure, + architectureOverview + ); + const gsResponse = await provider.chat(gsMessages); + + const prerequisites = parseList(parseSection(gsResponse, "Prerequisites")); + const setupSteps = parseList(parseSection(gsResponse, "Setup Steps")); + const firstTask = parseSection(gsResponse, "Your First Task"); + + onProgress?.("complete", 100); + + const languages = [...new Set(codeStructure.files.map((f) => f.language))]; + + return { + id: "", + repoUrl: "", + repoName: "", + generatedAt: new Date().toISOString(), + sections: { + overview: { + title: "Architecture Overview", + description: architectureOverview, + architectureDiagram, + techStack, + keyMetrics: { + files: codeStructure.files.length, + modules: codeStructure.modules.length, + languages, + }, + }, + modules: moduleSummaries, + patterns: { + conventions, + designPatterns, + architecturalDecisions, + }, + gettingStarted: { + prerequisites, + setupSteps, + firstTask, + }, + dependencyGraph: architectureDiagram, + }, + }; +}