diff --git a/packages/parser/src/file-walker.ts b/packages/parser/src/file-walker.ts new file mode 100644 index 0000000..0ccd627 --- /dev/null +++ b/packages/parser/src/file-walker.ts @@ -0,0 +1,121 @@ +import { readdir, stat, readFile } from "node:fs/promises"; +import { join, relative, extname, basename } from "node:path"; + +const IGNORED_DIRS = new Set([ + "node_modules", + ".git", + "dist", + "build", + "vendor", + "__pycache__", + ".next", + ".turbo", + "coverage", + ".venv", + "venv", + ".tox", + "target", + ".cache", + ".idea", + ".vscode", +]); + +const LANGUAGE_MAP: Record = { + ".ts": "typescript", + ".tsx": "typescript", + ".js": "javascript", + ".jsx": "javascript", + ".mjs": "javascript", + ".cjs": "javascript", + ".py": "python", + ".go": "go", + ".rs": "rust", + ".java": "java", + ".rb": "ruby", + ".php": "php", + ".cs": "csharp", + ".cpp": "cpp", + ".c": "c", + ".h": "c", + ".hpp": "cpp", + ".swift": "swift", + ".kt": "kotlin", +}; + +const ENTRY_POINT_NAMES = new Set([ + "index", + "main", + "app", + "server", + "mod", + "lib", + "__init__", + "manage", +]); + +export interface WalkedFile { + absolutePath: string; + relativePath: string; + language: string; + size: number; + isEntryPoint: boolean; +} + +async function walkDir( + dir: string, + rootDir: string, + results: WalkedFile[] +): Promise { + const entries = await readdir(dir, { withFileTypes: true }); + + for (const entry of entries) { + if (IGNORED_DIRS.has(entry.name)) continue; + if (entry.name.startsWith(".")) continue; + + const fullPath = join(dir, entry.name); + + if (entry.isDirectory()) { + await walkDir(fullPath, rootDir, results); + continue; + } + + const ext = extname(entry.name); + const language = LANGUAGE_MAP[ext]; + if (!language) continue; + + const fileStat = await stat(fullPath); + if (fileStat.size > 500_000) continue; + + const nameWithoutExt = basename(entry.name, ext); + const isEntryPoint = ENTRY_POINT_NAMES.has(nameWithoutExt); + + results.push({ + absolutePath: fullPath, + relativePath: relative(rootDir, fullPath), + language, + size: fileStat.size, + isEntryPoint, + }); + } +} + +export async function walkFiles(repoPath: string): Promise { + const results: WalkedFile[] = []; + await walkDir(repoPath, repoPath, results); + + results.sort((a, b) => { + if (a.isEntryPoint && !b.isEntryPoint) return -1; + if (!a.isEntryPoint && b.isEntryPoint) return 1; + return a.relativePath.localeCompare(b.relativePath); + }); + + return results; +} + +export async function readFileContent(filePath: string): Promise { + return readFile(filePath, "utf-8"); +} + +export function detectLanguage(filePath: string): string | null { + return LANGUAGE_MAP[extname(filePath)] ?? null; +}