diff --git a/packages/parser/src/languages/python.ts b/packages/parser/src/languages/python.ts new file mode 100644 index 0000000..335735a --- /dev/null +++ b/packages/parser/src/languages/python.ts @@ -0,0 +1,157 @@ +import type { + FileNode, + FunctionNode, + ClassNode, + ImportNode, + ExportNode, +} from "@codeboard/shared"; +import type { LanguageParser } from "./base.js"; + +const FUNC_RE = /^(\s*)def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm; +const CLASS_RE = /^(\s*)class\s+(\w+)(?:\(([^)]*)\))?\s*:/gm; +const IMPORT_RE = /^(?:from\s+([\w.]+)\s+)?import\s+(.+)$/gm; +const DOCSTRING_RE = /^\s*(?:"""([\s\S]*?)"""|'''([\s\S]*?)''')/; + +function parseParams(raw: string): string[] { + if (!raw.trim()) return []; + return raw + .split(",") + .map((p) => p.trim().split(":")[0].split("=")[0].trim()) + .filter((p) => p && p !== "self" && p !== "cls"); +} + +export const pythonParser: LanguageParser = { + extensions: [".py"], + + parse(content: string, filePath: string): FileNode { + const lines = content.split("\n"); + const functions: FunctionNode[] = []; + const classes: ClassNode[] = []; + const imports: ImportNode[] = []; + const exports: ExportNode[] = []; + + let match: RegExpExecArray | null; + + FUNC_RE.lastIndex = 0; + while ((match = FUNC_RE.exec(content)) !== null) { + const indent = match[1].length; + const name = match[2]; + const params = parseParams(match[3]); + const returnType = match[4]?.trim(); + const lineStart = + content.substring(0, match.index).split("\n").length; + + let lineEnd = lineStart; + for (let i = lineStart; i < lines.length; i++) { + const line = lines[i]; + if ( + i > lineStart && + line.trim() && + !line.startsWith(" ".repeat(indent + 1)) && + !line.startsWith("\t".repeat(indent === 0 ? 1 : indent)) + ) { + lineEnd = i; + break; + } + lineEnd = i + 1; + } + + let docstring: string | undefined; + if (lineStart < lines.length) { + const bodyStart = lines.slice(lineStart, lineStart + 5).join("\n"); + const docMatch = DOCSTRING_RE.exec(bodyStart); + if (docMatch) { + docstring = (docMatch[1] ?? docMatch[2]).trim(); + } + } + + if (indent === 0) { + functions.push({ + name, + params, + returnType, + lineStart, + lineEnd, + docstring, + calls: [], + }); + } + } + + CLASS_RE.lastIndex = 0; + while ((match = CLASS_RE.exec(content)) !== null) { + const name = match[2]; + const methods: FunctionNode[] = []; + const classLineStart = + content.substring(0, match.index).split("\n").length; + + const classBody = content.substring(match.index + match[0].length); + const methodRe = /^\s{2,}def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*([^:]+))?\s*:/gm; + let methodMatch: RegExpExecArray | null; + while ((methodMatch = methodRe.exec(classBody)) !== null) { + const methodLineStart = + classLineStart + + classBody.substring(0, methodMatch.index).split("\n").length; + methods.push({ + name: methodMatch[1], + params: parseParams(methodMatch[2]), + returnType: methodMatch[3]?.trim(), + lineStart: methodLineStart, + lineEnd: methodLineStart + 1, + calls: [], + }); + } + + classes.push({ name, methods, properties: [] }); + } + + IMPORT_RE.lastIndex = 0; + while ((match = IMPORT_RE.exec(content)) !== null) { + const fromModule = match[1]; + const importedNames = match[2] + .split(",") + .map((s) => s.trim().split(" as ")[0].trim()) + .filter(Boolean); + + if (fromModule) { + imports.push({ source: fromModule, specifiers: importedNames }); + } else { + for (const name of importedNames) { + imports.push({ source: name, specifiers: [name] }); + } + } + } + + const allRe = /^__all__\s*=\s*\[([^\]]*)\]/m; + const allMatch = allRe.exec(content); + if (allMatch) { + const names = allMatch[1] + .split(",") + .map((s) => s.trim().replace(/['"]/g, "")) + .filter(Boolean); + for (const name of names) { + exports.push({ name, isDefault: false }); + } + } + + let complexity = 0; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.startsWith("if ") || trimmed.startsWith("elif ")) complexity++; + if (trimmed.startsWith("for ") || trimmed.startsWith("while ")) complexity++; + if (trimmed.startsWith("except")) complexity++; + if (trimmed.includes(" and ") || trimmed.includes(" or ")) complexity++; + } + + return { + path: filePath, + language: "python", + size: content.length, + functions, + classes, + imports, + exports, + complexity, + }; + }, +};