feat: add code chunker utility
This commit is contained in:
72
packages/llm/src/chunker.ts
Normal file
72
packages/llm/src/chunker.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import type { FileNode } from "@codeboard/shared";
|
||||
|
||||
const APPROX_CHARS_PER_TOKEN = 4;
|
||||
|
||||
export function chunkCode(content: string, maxTokens: number): string[] {
|
||||
const maxChars = maxTokens * APPROX_CHARS_PER_TOKEN;
|
||||
if (content.length <= maxChars) return [content];
|
||||
|
||||
const lines = content.split("\n");
|
||||
const chunks: string[] = [];
|
||||
let current: string[] = [];
|
||||
let currentLen = 0;
|
||||
|
||||
for (const line of lines) {
|
||||
if (currentLen + line.length > maxChars && current.length > 0) {
|
||||
chunks.push(current.join("\n"));
|
||||
current = [];
|
||||
currentLen = 0;
|
||||
}
|
||||
current.push(line);
|
||||
currentLen += line.length + 1;
|
||||
}
|
||||
|
||||
if (current.length > 0) {
|
||||
chunks.push(current.join("\n"));
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
export function extractSignatures(fileNode: FileNode): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
parts.push(`File: ${fileNode.path} (${fileNode.language})`);
|
||||
|
||||
if (fileNode.imports.length > 0) {
|
||||
parts.push("Imports:");
|
||||
for (const imp of fileNode.imports) {
|
||||
parts.push(` from "${imp.source}" import {${imp.specifiers.join(", ")}}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileNode.exports.length > 0) {
|
||||
parts.push("Exports:");
|
||||
for (const exp of fileNode.exports) {
|
||||
parts.push(` ${exp.isDefault ? "default " : ""}${exp.name}`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const fn of fileNode.functions) {
|
||||
const params = fn.params.join(", ");
|
||||
const ret = fn.returnType ? `: ${fn.returnType}` : "";
|
||||
const doc = fn.docstring ? ` — ${fn.docstring.slice(0, 100)}` : "";
|
||||
parts.push(`function ${fn.name}(${params})${ret}${doc}`);
|
||||
}
|
||||
|
||||
for (const cls of fileNode.classes) {
|
||||
parts.push(`class ${cls.name}`);
|
||||
for (const method of cls.methods) {
|
||||
parts.push(` method ${method.name}(${method.params.join(", ")})`);
|
||||
}
|
||||
for (const prop of cls.properties) {
|
||||
parts.push(` property ${prop.name}${prop.type ? `: ${prop.type}` : ""}`);
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
export function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / APPROX_CHARS_PER_TOKEN);
|
||||
}
|
||||
Reference in New Issue
Block a user