From b0065b60c49f4badc946ef80dc976f2374143bfe Mon Sep 17 00:00:00 2001 From: repi Date: Mon, 1 Jan 2001 00:00:00 +0000 Subject: [PATCH] feat: add repository analyzer --- packages/parser/src/analyzer.ts | 150 ++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 packages/parser/src/analyzer.ts diff --git a/packages/parser/src/analyzer.ts b/packages/parser/src/analyzer.ts new file mode 100644 index 0000000..d2d0286 --- /dev/null +++ b/packages/parser/src/analyzer.ts @@ -0,0 +1,150 @@ +import { readFile } from "node:fs/promises"; +import { dirname, basename } from "node:path"; +import type { + CodeStructure, + FileNode, + ModuleNode, + DependencyEdge, + ExportNode, +} from "@codeboard/shared"; +import { walkFiles } from "./file-walker.js"; +import { typescriptParser } from "./languages/typescript.js"; +import { pythonParser } from "./languages/python.js"; +import type { LanguageParser } from "./languages/base.js"; + +const MAX_FILES = 200; + +const parsers: LanguageParser[] = [typescriptParser, pythonParser]; + +function getParser(language: string): LanguageParser | null { + return ( + parsers.find((p) => + p.extensions.some((ext) => { + const langMap: Record = { + typescript: [".ts", ".tsx"], + javascript: [".js", ".jsx", ".mjs", ".cjs"], + python: [".py"], + }; + return langMap[language]?.includes(ext); + }) + ) ?? null + ); +} + +function buildModules(files: FileNode[]): ModuleNode[] { + const dirMap = new Map(); + + for (const file of files) { + const dir = dirname(file.path); + const existing = dirMap.get(dir); + if (existing) { + existing.push(file.path); + } else { + dirMap.set(dir, [file.path]); + } + } + + return Array.from(dirMap.entries()).map(([dirPath, filePaths]) => ({ + name: basename(dirPath) || "root", + path: dirPath, + files: filePaths, + })); +} + +function buildDependencies(files: FileNode[]): DependencyEdge[] { + const edges: DependencyEdge[] = []; + const filePathSet = new Set(files.map((f) => f.path)); + + for (const file of files) { + for (const imp of file.imports) { + let resolved = imp.source; + + if (resolved.startsWith(".")) { + const dir = dirname(file.path); + const candidate = `${dir}/${resolved.replace(/^\.\//, "")}`; + const extensions = [".ts", ".tsx", ".js", ".jsx", ".py", ""]; + for (const ext of extensions) { + if (filePathSet.has(candidate + ext)) { + resolved = candidate + ext; + break; + } + if (filePathSet.has(`${candidate}/index${ext}`)) { + resolved = `${candidate}/index${ext}`; + break; + } + } + } + + edges.push({ + source: file.path, + target: resolved, + type: "import", + }); + } + } + + return edges; +} + +function detectEntryPoints(files: FileNode[]): string[] { + const entryNames = new Set([ + "index", + "main", + "app", + "server", + "mod", + "lib", + "__init__", + ]); + + return files + .filter((f) => { + const name = basename(f.path).replace(/\.[^.]+$/, ""); + return entryNames.has(name); + }) + .map((f) => f.path); +} + +function collectExports(files: FileNode[]): ExportNode[] { + const allExports: ExportNode[] = []; + for (const file of files) { + allExports.push(...file.exports); + } + return allExports; +} + +export async function analyzeRepository( + repoPath: string +): Promise { + const walkedFiles = await walkFiles(repoPath); + const filesToAnalyze = walkedFiles.slice(0, MAX_FILES); + + const parsedFiles: FileNode[] = []; + + for (const walkedFile of filesToAnalyze) { + const parser = getParser(walkedFile.language); + if (!parser) continue; + + try { + const content = await readFile(walkedFile.absolutePath, "utf-8"); + const fileNode = parser.parse(content, walkedFile.relativePath); + parsedFiles.push(fileNode); + } catch { + continue; + } + } + + const modules = buildModules(parsedFiles); + const dependencies = buildDependencies(parsedFiles); + const entryPoints = detectEntryPoints(parsedFiles); + const exports = collectExports(parsedFiles); + + return { + files: parsedFiles, + modules, + entryPoints, + exports, + dependencies, + patterns: [], + }; +}