Files
codeboard/packages/llm/src/chunker.ts
2001-01-01 00:00:00 +00:00

73 lines
2.0 KiB
TypeScript

import type { FileNode } from "@codeboard/shared";
const APPROX_CHARS_PER_TOKEN = 4;
export function chunkCode(content: string, maxTokens: number): string[] {
const maxChars = maxTokens * APPROX_CHARS_PER_TOKEN;
if (content.length <= maxChars) return [content];
const lines = content.split("\n");
const chunks: string[] = [];
let current: string[] = [];
let currentLen = 0;
for (const line of lines) {
if (currentLen + line.length > maxChars && current.length > 0) {
chunks.push(current.join("\n"));
current = [];
currentLen = 0;
}
current.push(line);
currentLen += line.length + 1;
}
if (current.length > 0) {
chunks.push(current.join("\n"));
}
return chunks;
}
export function extractSignatures(fileNode: FileNode): string {
const parts: string[] = [];
parts.push(`File: ${fileNode.path} (${fileNode.language})`);
if (fileNode.imports.length > 0) {
parts.push("Imports:");
for (const imp of fileNode.imports) {
parts.push(` from "${imp.source}" import {${imp.specifiers.join(", ")}}`);
}
}
if (fileNode.exports.length > 0) {
parts.push("Exports:");
for (const exp of fileNode.exports) {
parts.push(` ${exp.isDefault ? "default " : ""}${exp.name}`);
}
}
for (const fn of fileNode.functions) {
const params = fn.params.join(", ");
const ret = fn.returnType ? `: ${fn.returnType}` : "";
const doc = fn.docstring ? `${fn.docstring.slice(0, 100)}` : "";
parts.push(`function ${fn.name}(${params})${ret}${doc}`);
}
for (const cls of fileNode.classes) {
parts.push(`class ${cls.name}`);
for (const method of cls.methods) {
parts.push(` method ${method.name}(${method.params.join(", ")})`);
}
for (const prop of cls.properties) {
parts.push(` property ${prop.name}${prop.type ? `: ${prop.type}` : ""}`);
}
}
return parts.join("\n");
}
export function estimateTokens(text: string): number {
return Math.ceil(text.length / APPROX_CHARS_PER_TOKEN);
}