feat: initial CodeBoard monorepo scaffold

Turborepo monorepo with npm workspaces:
- apps/web: Next.js 14 frontend with Tailwind v4, SSE progress, doc viewer
- apps/worker: BullMQ job processor (clone → parse → LLM generate)
- packages/shared: TypeScript types
- packages/parser: Babel-based AST parser (JS/TS) + regex (Python)
- packages/llm: OpenAI/Anthropic provider abstraction + prompt pipeline
- packages/diagrams: Mermaid architecture & dependency graph generators
- packages/database: Prisma schema (PostgreSQL)
- Docker multi-stage build (web + worker targets)

All packages compile successfully with tsc and next build.
This commit is contained in:
Vectry
2026-02-09 15:22:50 +00:00
parent efdc282da5
commit 79dad6124f
72 changed files with 10132 additions and 136 deletions

26
packages/llm/package.json Normal file
View File

@@ -0,0 +1,26 @@
{
"name": "@codeboard/llm",
"version": "0.0.1",
"private": true,
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
},
"scripts": {
"build": "tsc",
"clean": "rm -rf dist",
"dev": "tsc --watch"
},
"dependencies": {
"@codeboard/shared": "*",
"openai": "^4.77.0",
"@anthropic-ai/sdk": "^0.36.0"
},
"devDependencies": {
"typescript": "^5.7"
}
}

View File

@@ -0,0 +1,72 @@
import type { FileNode } from "@codeboard/shared";
const APPROX_CHARS_PER_TOKEN = 4;
export function chunkCode(content: string, maxTokens: number): string[] {
const maxChars = maxTokens * APPROX_CHARS_PER_TOKEN;
if (content.length <= maxChars) return [content];
const lines = content.split("\n");
const chunks: string[] = [];
let current: string[] = [];
let currentLen = 0;
for (const line of lines) {
if (currentLen + line.length > maxChars && current.length > 0) {
chunks.push(current.join("\n"));
current = [];
currentLen = 0;
}
current.push(line);
currentLen += line.length + 1;
}
if (current.length > 0) {
chunks.push(current.join("\n"));
}
return chunks;
}
export function extractSignatures(fileNode: FileNode): string {
const parts: string[] = [];
parts.push(`File: ${fileNode.path} (${fileNode.language})`);
if (fileNode.imports.length > 0) {
parts.push("Imports:");
for (const imp of fileNode.imports) {
parts.push(` from "${imp.source}" import {${imp.specifiers.join(", ")}}`);
}
}
if (fileNode.exports.length > 0) {
parts.push("Exports:");
for (const exp of fileNode.exports) {
parts.push(` ${exp.isDefault ? "default " : ""}${exp.name}`);
}
}
for (const fn of fileNode.functions) {
const params = fn.params.join(", ");
const ret = fn.returnType ? `: ${fn.returnType}` : "";
const doc = fn.docstring ? `${fn.docstring.slice(0, 100)}` : "";
parts.push(`function ${fn.name}(${params})${ret}${doc}`);
}
for (const cls of fileNode.classes) {
parts.push(`class ${cls.name}`);
for (const method of cls.methods) {
parts.push(` method ${method.name}(${method.params.join(", ")})`);
}
for (const prop of cls.properties) {
parts.push(` property ${prop.name}${prop.type ? `: ${prop.type}` : ""}`);
}
}
return parts.join("\n");
}
export function estimateTokens(text: string): number {
return Math.ceil(text.length / APPROX_CHARS_PER_TOKEN);
}

View File

@@ -0,0 +1,4 @@
export { createProvider } from "./providers/factory.js";
export { generateDocumentation } from "./pipeline.js";
export { chunkCode, extractSignatures } from "./chunker.js";
export type { LLMProvider } from "./providers/base.js";

View File

@@ -0,0 +1,153 @@
import type { CodeStructure, GeneratedDocs, FileNode } from "@codeboard/shared";
import type { LLMProvider } from "./providers/base.js";
import { buildArchitecturePrompt } from "./prompts/architecture-overview.js";
import { buildModuleSummaryPrompt } from "./prompts/module-summary.js";
import { buildPatternsPrompt } from "./prompts/patterns-detection.js";
import { buildGettingStartedPrompt } from "./prompts/getting-started.js";
function parseSection(text: string, header: string): string {
const regex = new RegExp(`## ${header}\\s*\\n([\\s\\S]*?)(?=\\n## |$)`);
const match = regex.exec(text);
return match?.[1]?.trim() ?? "";
}
function parseMermaid(text: string): string {
const match = /```mermaid\s*\n([\s\S]*?)```/.exec(text);
return match?.[1]?.trim() ?? "flowchart TD\n A[No diagram generated]";
}
function parseList(text: string): string[] {
return text
.split("\n")
.map((l) => l.replace(/^[-*]\s*/, "").trim())
.filter(Boolean);
}
export async function generateDocumentation(
codeStructure: CodeStructure,
provider: LLMProvider,
onProgress?: (stage: string, progress: number) => void
): Promise<GeneratedDocs> {
onProgress?.("architecture", 10);
const archMessages = buildArchitecturePrompt(codeStructure);
const archResponse = await provider.chat(archMessages);
const architectureOverview = parseSection(archResponse, "Architecture Overview");
const techStackRaw = parseSection(archResponse, "Tech Stack");
const architectureDiagram = parseMermaid(archResponse);
const techStack = techStackRaw.split(",").map((s) => s.trim()).filter(Boolean);
onProgress?.("modules", 30);
const moduleLimit = Math.min(codeStructure.modules.length, 10);
const moduleSummaries = await Promise.all(
codeStructure.modules.slice(0, moduleLimit).map(async (mod) => {
const moduleFiles: FileNode[] = codeStructure.files.filter((f) =>
mod.files.includes(f.path)
);
if (moduleFiles.length === 0) {
return {
name: mod.name,
path: mod.path,
summary: "Empty module — no parseable files found.",
keyFiles: [],
publicApi: [],
dependsOn: [],
dependedBy: [],
};
}
const messages = buildModuleSummaryPrompt(mod, moduleFiles);
const response = await provider.chat(messages, { model: undefined });
const summary = parseSection(response, "Summary");
const keyFilesRaw = parseList(parseSection(response, "Key Files"));
const publicApi = parseList(parseSection(response, "Public API"));
const dependsOn = [
...new Set(
moduleFiles.flatMap((f) =>
f.imports
.map((imp) => imp.source)
.filter((s) => !s.startsWith("."))
)
),
].slice(0, 10);
const dependedBy = codeStructure.dependencies
.filter((d) => mod.files.includes(d.target))
.map((d) => d.source)
.filter((s) => !mod.files.includes(s))
.slice(0, 10);
return {
name: mod.name,
path: mod.path,
summary: summary || "Module analyzed but no summary generated.",
keyFiles: keyFilesRaw.map((kf) => ({ path: kf, purpose: "" })),
publicApi,
dependsOn,
dependedBy,
};
})
);
onProgress?.("patterns", 60);
const patternsMessages = buildPatternsPrompt(codeStructure);
const patternsResponse = await provider.chat(patternsMessages);
const conventions = parseList(parseSection(patternsResponse, "Coding Conventions"));
const designPatterns = parseList(parseSection(patternsResponse, "Design Patterns"));
const architecturalDecisions = parseList(parseSection(patternsResponse, "Architectural Decisions"));
onProgress?.("getting-started", 80);
const gsMessages = buildGettingStartedPrompt(
codeStructure,
architectureOverview
);
const gsResponse = await provider.chat(gsMessages);
const prerequisites = parseList(parseSection(gsResponse, "Prerequisites"));
const setupSteps = parseList(parseSection(gsResponse, "Setup Steps"));
const firstTask = parseSection(gsResponse, "Your First Task");
onProgress?.("complete", 100);
const languages = [...new Set(codeStructure.files.map((f) => f.language))];
return {
id: "",
repoUrl: "",
repoName: "",
generatedAt: new Date().toISOString(),
sections: {
overview: {
title: "Architecture Overview",
description: architectureOverview,
architectureDiagram,
techStack,
keyMetrics: {
files: codeStructure.files.length,
modules: codeStructure.modules.length,
languages,
},
},
modules: moduleSummaries,
patterns: {
conventions,
designPatterns,
architecturalDecisions,
},
gettingStarted: {
prerequisites,
setupSteps,
firstTask,
},
dependencyGraph: architectureDiagram,
},
};
}

View File

@@ -0,0 +1,51 @@
import type { LLMMessage, CodeStructure } from "@codeboard/shared";
export function buildArchitecturePrompt(
structure: CodeStructure
): LLMMessage[] {
const fileTree = structure.files
.map((f) => ` ${f.path} (${f.language}, ${f.functions.length} functions, ${f.classes.length} classes)`)
.join("\n");
const modules = structure.modules
.map((m) => ` ${m.name}/ (${m.files.length} files)`)
.join("\n");
const entryPoints = structure.entryPoints.join(", ") || "none detected";
return [
{
role: "system",
content: `You are an expert software architect analyzing a codebase. Generate a concise architecture overview and a Mermaid flowchart diagram.
Output format (use exactly these headers):
## Architecture Overview
[2-4 paragraphs describing the high-level architecture, key design decisions, and how components interact]
## Tech Stack
[comma-separated list of technologies detected]
## Mermaid Diagram
\`\`\`mermaid
[flowchart TD diagram showing modules and their relationships]
\`\`\``,
},
{
role: "user",
content: `Analyze this codebase structure:
FILE TREE:
${fileTree}
MODULES:
${modules}
ENTRY POINTS: ${entryPoints}
DEPENDENCIES (import edges):
${structure.dependencies.slice(0, 50).map((d) => ` ${d.source} -> ${d.target}`).join("\n")}
Generate the architecture overview with a Mermaid diagram.`,
},
];
}

View File

@@ -0,0 +1,43 @@
import type { LLMMessage, CodeStructure } from "@codeboard/shared";
export function buildGettingStartedPrompt(
structure: CodeStructure,
architectureOverview: string,
readmeContent?: string,
packageJsonContent?: string
): LLMMessage[] {
return [
{
role: "system",
content: `You are writing an onboarding guide for a new developer joining this project. Be specific and actionable.
Output format:
## Prerequisites
[list required tools, runtimes, and their versions]
## Setup Steps
[numbered list of concrete commands and actions to get the project running locally]
## Your First Task
[suggest a good first contribution — something small but meaningful that touches multiple parts of the codebase]`,
},
{
role: "user",
content: `Create an onboarding guide for this project.
ARCHITECTURE OVERVIEW:
${architectureOverview}
${readmeContent ? `README:\n${readmeContent.slice(0, 3000)}` : "README: not available"}
${packageJsonContent ? `PACKAGE.JSON:\n${packageJsonContent.slice(0, 2000)}` : ""}
LANGUAGES: ${[...new Set(structure.files.map((f) => f.language))].join(", ")}
ENTRY POINTS: ${structure.entryPoints.join(", ") || "none detected"}
TOTAL FILES: ${structure.files.length}
TOTAL MODULES: ${structure.modules.length}
Write a concrete, actionable onboarding guide.`,
},
];
}

View File

@@ -0,0 +1,42 @@
import type { LLMMessage, ModuleNode, FileNode } from "@codeboard/shared";
export function buildModuleSummaryPrompt(
module: ModuleNode,
files: FileNode[]
): LLMMessage[] {
const fileDetails = files
.map((f) => {
const fns = f.functions.map((fn) => ` ${fn.name}(${fn.params.join(", ")})`).join("\n");
const cls = f.classes.map((c) => ` class ${c.name}`).join("\n");
const exps = f.exports.map((e) => ` export ${e.isDefault ? "default " : ""}${e.name}`).join("\n");
return ` ${f.path}:\n${fns}\n${cls}\n${exps}`;
})
.join("\n\n");
return [
{
role: "system",
content: `You are analyzing a code module. Provide a concise summary.
Output format:
## Summary
[1-2 paragraphs explaining what this module does and its role in the project]
## Key Files
[list each important file with a one-line description]
## Public API
[list the main exported functions/classes and what they do]`,
},
{
role: "user",
content: `Module: ${module.name} (${module.path})
Files: ${module.files.length}
FILE DETAILS:
${fileDetails}
Summarize this module.`,
},
];
}

View File

@@ -0,0 +1,55 @@
import type { LLMMessage, CodeStructure } from "@codeboard/shared";
export function buildPatternsPrompt(structure: CodeStructure): LLMMessage[] {
const sampleFunctions = structure.files
.flatMap((f) => f.functions.map((fn) => `${f.path}: ${fn.name}(${fn.params.join(", ")})`))
.slice(0, 40)
.join("\n");
const sampleClasses = structure.files
.flatMap((f) => f.classes.map((c) => `${f.path}: class ${c.name} [${c.methods.map((m) => m.name).join(", ")}]`))
.slice(0, 20)
.join("\n");
const importSources = new Set<string>();
for (const f of structure.files) {
for (const imp of f.imports) {
importSources.add(imp.source);
}
}
return [
{
role: "system",
content: `You are a code reviewer identifying patterns and conventions in a codebase.
Output format:
## Coding Conventions
[list conventions like naming patterns, file organization, error handling approach]
## Design Patterns
[list design patterns detected: factory, singleton, observer, repository, etc.]
## Architectural Decisions
[list key architectural decisions: monorepo vs polyrepo, framework choices, state management, etc.]`,
},
{
role: "user",
content: `Analyze these code patterns:
FUNCTION SIGNATURES:
${sampleFunctions}
CLASS DEFINITIONS:
${sampleClasses}
EXTERNAL DEPENDENCIES:
${Array.from(importSources).filter((s) => !s.startsWith(".")).slice(0, 30).join(", ")}
DETECTED PATTERNS FROM AST:
${structure.patterns.map((p) => ` ${p.name}: ${p.description}`).join("\n") || " (none pre-detected)"}
Identify coding conventions, design patterns, and architectural decisions.`,
},
];
}

View File

@@ -0,0 +1,34 @@
import Anthropic from "@anthropic-ai/sdk";
import type { LLMMessage, LLMOptions } from "@codeboard/shared";
import type { LLMProvider } from "./base.js";
export class AnthropicProvider implements LLMProvider {
name = "anthropic";
private client: Anthropic;
private defaultModel: string;
constructor(apiKey: string, model?: string) {
this.client = new Anthropic({ apiKey });
this.defaultModel = model ?? "claude-sonnet-4-20250514";
}
async chat(messages: LLMMessage[], options?: LLMOptions): Promise<string> {
const systemMessage = messages.find((m) => m.role === "system");
const nonSystemMessages = messages
.filter((m) => m.role !== "system")
.map((m) => ({
role: m.role as "user" | "assistant",
content: m.content,
}));
const response = await this.client.messages.create({
model: options?.model ?? this.defaultModel,
max_tokens: options?.maxTokens ?? 4096,
system: systemMessage?.content,
messages: nonSystemMessages,
});
const textBlock = response.content.find((b) => b.type === "text");
return textBlock?.type === "text" ? textBlock.text : "";
}
}

View File

@@ -0,0 +1,6 @@
import type { LLMMessage, LLMOptions } from "@codeboard/shared";
export interface LLMProvider {
name: string;
chat(messages: LLMMessage[], options?: LLMOptions): Promise<string>;
}

View File

@@ -0,0 +1,15 @@
import type { LLMProviderConfig } from "@codeboard/shared";
import type { LLMProvider } from "./base.js";
import { OpenAIProvider } from "./openai.js";
import { AnthropicProvider } from "./anthropic.js";
export function createProvider(config: LLMProviderConfig): LLMProvider {
switch (config.provider) {
case "openai":
return new OpenAIProvider(config.apiKey, config.model, config.baseUrl);
case "anthropic":
return new AnthropicProvider(config.apiKey, config.model);
default:
throw new Error(`Unknown LLM provider: ${config.provider}`);
}
}

View File

@@ -0,0 +1,28 @@
import OpenAI from "openai";
import type { LLMMessage, LLMOptions } from "@codeboard/shared";
import type { LLMProvider } from "./base.js";
export class OpenAIProvider implements LLMProvider {
name = "openai";
private client: OpenAI;
private defaultModel: string;
constructor(apiKey: string, model?: string, baseUrl?: string) {
this.client = new OpenAI({
apiKey,
baseURL: baseUrl,
});
this.defaultModel = model ?? "gpt-4o";
}
async chat(messages: LLMMessage[], options?: LLMOptions): Promise<string> {
const response = await this.client.chat.completions.create({
model: options?.model ?? this.defaultModel,
messages: messages.map((m) => ({ role: m.role, content: m.content })),
temperature: options?.temperature ?? 0.3,
max_tokens: options?.maxTokens ?? 4096,
});
return response.choices[0]?.message?.content ?? "";
}
}

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src"]
}