From bb16dc4b6b3d69cce3eb10583a59e03feac5f33b Mon Sep 17 00:00:00 2001 From: repi Date: Mon, 1 Jan 2001 00:00:00 +0000 Subject: [PATCH] feat: add code chunker utility --- packages/llm/src/chunker.ts | 72 +++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 packages/llm/src/chunker.ts diff --git a/packages/llm/src/chunker.ts b/packages/llm/src/chunker.ts new file mode 100644 index 0000000..9928b37 --- /dev/null +++ b/packages/llm/src/chunker.ts @@ -0,0 +1,72 @@ +import type { FileNode } from "@codeboard/shared"; + +const APPROX_CHARS_PER_TOKEN = 4; + +export function chunkCode(content: string, maxTokens: number): string[] { + const maxChars = maxTokens * APPROX_CHARS_PER_TOKEN; + if (content.length <= maxChars) return [content]; + + const lines = content.split("\n"); + const chunks: string[] = []; + let current: string[] = []; + let currentLen = 0; + + for (const line of lines) { + if (currentLen + line.length > maxChars && current.length > 0) { + chunks.push(current.join("\n")); + current = []; + currentLen = 0; + } + current.push(line); + currentLen += line.length + 1; + } + + if (current.length > 0) { + chunks.push(current.join("\n")); + } + + return chunks; +} + +export function extractSignatures(fileNode: FileNode): string { + const parts: string[] = []; + + parts.push(`File: ${fileNode.path} (${fileNode.language})`); + + if (fileNode.imports.length > 0) { + parts.push("Imports:"); + for (const imp of fileNode.imports) { + parts.push(` from "${imp.source}" import {${imp.specifiers.join(", ")}}`); + } + } + + if (fileNode.exports.length > 0) { + parts.push("Exports:"); + for (const exp of fileNode.exports) { + parts.push(` ${exp.isDefault ? "default " : ""}${exp.name}`); + } + } + + for (const fn of fileNode.functions) { + const params = fn.params.join(", "); + const ret = fn.returnType ? `: ${fn.returnType}` : ""; + const doc = fn.docstring ? ` — ${fn.docstring.slice(0, 100)}` : ""; + parts.push(`function ${fn.name}(${params})${ret}${doc}`); + } + + for (const cls of fileNode.classes) { + parts.push(`class ${cls.name}`); + for (const method of cls.methods) { + parts.push(` method ${method.name}(${method.params.join(", ")})`); + } + for (const prop of cls.properties) { + parts.push(` property ${prop.name}${prop.type ? `: ${prop.type}` : ""}`); + } + } + + return parts.join("\n"); +} + +export function estimateTokens(text: string): number { + return Math.ceil(text.length / APPROX_CHARS_PER_TOKEN); +}