diff --git a/apps/web/src/app/api/traces/route.ts b/apps/web/src/app/api/traces/route.ts index eb9092c..5a929b1 100644 --- a/apps/web/src/app/api/traces/route.ts +++ b/apps/web/src/app/api/traces/route.ts @@ -63,6 +63,24 @@ interface BatchTracesRequest { traces: TracePayload[]; } +function topologicalSortSpans(spans: SpanPayload[]): SpanPayload[] { + const byId = new Map(spans.map((s) => [s.id, s])); + const sorted: SpanPayload[] = []; + const visited = new Set(); + + function visit(span: SpanPayload) { + if (visited.has(span.id)) return; + visited.add(span.id); + if (span.parentSpanId && byId.has(span.parentSpanId)) { + visit(byId.get(span.parentSpanId)!); + } + sorted.push(span); + } + + for (const span of spans) visit(span); + return sorted; +} + // POST /api/traces — Batch ingest traces from SDK export async function POST(request: NextRequest) { try { @@ -166,10 +184,15 @@ export async function POST(request: NextRequest) { } } - // Insert traces using transaction - const result = await prisma.$transaction( - body.traces.map((trace) => - prisma.trace.create({ + // Insert traces using interactive transaction to control insert order. + // Spans must be inserted before decision points due to the + // DecisionPoint.parentSpanId FK referencing Span.id. + const result = await prisma.$transaction(async (tx) => { + const created: string[] = []; + + for (const trace of body.traces) { + // 1. Create the trace record (no nested relations) + await tx.trace.create({ data: { id: trace.id, name: trace.name, @@ -182,23 +205,18 @@ export async function POST(request: NextRequest) { totalDuration: trace.totalDuration, startedAt: new Date(trace.startedAt), endedAt: trace.endedAt ? new Date(trace.endedAt) : null, - decisionPoints: { - create: trace.decisionPoints.map((dp) => ({ - id: dp.id, - type: dp.type, - reasoning: dp.reasoning, - chosen: dp.chosen as Prisma.InputJsonValue, - alternatives: dp.alternatives as Prisma.InputJsonValue[], - contextSnapshot: dp.contextSnapshot as Prisma.InputJsonValue | undefined, - durationMs: dp.durationMs, - costUsd: dp.costUsd, - parentSpanId: dp.parentSpanId, - timestamp: new Date(dp.timestamp), - })), - }, - spans: { - create: trace.spans.map((span) => ({ + }, + }); + + // 2. Create spans FIRST (decision points may reference them via parentSpanId) + if (trace.spans.length > 0) { + // Sort spans so parents come before children + const spanOrder = topologicalSortSpans(trace.spans); + for (const span of spanOrder) { + await tx.span.create({ + data: { id: span.id, + traceId: trace.id, parentSpanId: span.parentSpanId, name: span.name, type: span.type, @@ -212,22 +230,53 @@ export async function POST(request: NextRequest) { startedAt: new Date(span.startedAt), endedAt: span.endedAt ? new Date(span.endedAt) : null, metadata: span.metadata as Prisma.InputJsonValue | undefined, - })), - }, - events: { - create: trace.events.map((event) => ({ - id: event.id, - spanId: event.spanId, - type: event.type, - name: event.name, - metadata: event.metadata as Prisma.InputJsonValue | undefined, - timestamp: new Date(event.timestamp), - })), - }, - }, - }) - ) - ); + }, + }); + } + } + + // 3. Create decision points AFTER spans exist + if (trace.decisionPoints.length > 0) { + // Collect valid span IDs so we can null-out invalid parentSpanId refs + const validSpanIds = new Set(trace.spans.map((s) => s.id)); + + await tx.decisionPoint.createMany({ + data: trace.decisionPoints.map((dp) => ({ + id: dp.id, + traceId: trace.id, + type: dp.type, + reasoning: dp.reasoning, + chosen: dp.chosen as Prisma.InputJsonValue, + alternatives: dp.alternatives as Prisma.InputJsonValue[], + contextSnapshot: dp.contextSnapshot as Prisma.InputJsonValue | undefined, + durationMs: dp.durationMs, + costUsd: dp.costUsd, + parentSpanId: dp.parentSpanId && validSpanIds.has(dp.parentSpanId) ? dp.parentSpanId : null, + timestamp: new Date(dp.timestamp), + })), + }); + } + + // 4. Create events + if (trace.events.length > 0) { + await tx.event.createMany({ + data: trace.events.map((event) => ({ + id: event.id, + traceId: trace.id, + spanId: event.spanId, + type: event.type, + name: event.name, + metadata: event.metadata as Prisma.InputJsonValue | undefined, + timestamp: new Date(event.timestamp), + })), + }); + } + + created.push(trace.id); + } + + return created; + }); return NextResponse.json({ success: true, count: result.length }, { status: 200 }); } catch (error) { diff --git a/examples/moonshot_real_test.py b/examples/moonshot_real_test.py new file mode 100644 index 0000000..10772e3 --- /dev/null +++ b/examples/moonshot_real_test.py @@ -0,0 +1,347 @@ +""" +AgentLens Real LLM Test — MoonshotAI (Kimi) via OpenAI-compatible API. + +Tests the full pipeline: SDK → wrap_openai() → real LLM completion → AgentLens dashboard. +Uses MoonshotAI (OpenAI-compatible) with kimi-k2-turbo-preview model. + +Usage: + pip install vectry-agentlens openai + python moonshot_real_test.py +""" + +import time +import json +import agentlens +from agentlens.integrations.openai import wrap_openai +import openai + +# ── Config ────────────────────────────────────────────────────────── +MOONSHOT_API_KEY = "sk-2uhpGUeqISKtiGwd14aGuYJ4tt2p0Ad98qke9T8Ykdc4dEPp" +MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1" +MOONSHOT_MODEL = "kimi-k2-turbo-preview" +AGENTLENS_ENDPOINT = "https://agentlens.vectry.tech" +AGENTLENS_API_KEY = "test-moonshot-key" + +# ── Initialize ────────────────────────────────────────────────────── +print("=" * 60) +print("AgentLens Real LLM Test — MoonshotAI (Kimi)") +print("=" * 60) + +agentlens.init( + api_key=AGENTLENS_API_KEY, + endpoint=AGENTLENS_ENDPOINT, +) +print(f"[✓] AgentLens initialized → {AGENTLENS_ENDPOINT}") + +# Create OpenAI client pointing to MoonshotAI +client = openai.OpenAI( + api_key=MOONSHOT_API_KEY, + base_url=MOONSHOT_BASE_URL, +) +wrap_openai(client) +print(f"[✓] OpenAI client wrapped → {MOONSHOT_BASE_URL}") +print(f"[✓] Model: {MOONSHOT_MODEL}") +print() + +# ── Test 1: Basic Completion ──────────────────────────────────────── +print("─── Test 1: Basic Completion ───") +with agentlens.trace( + "moonshot-basic-completion", + tags=["moonshot", "test", "basic"], + metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "basic"}, +): + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": MOONSHOT_MODEL, + "confidence": 0.95, + "params": {"temperature": 0.7, "max_tokens": 200}, + }, + alternatives=[ + { + "name": "moonshot-v1-8k", + "confidence": 0.6, + "reason_rejected": "Older model, less capable", + } + ], + reasoning="Using kimi-k2-turbo-preview for best quality/speed balance.", + ) + + start = time.time() + response = client.chat.completions.create( + model=MOONSHOT_MODEL, + messages=[ + { + "role": "system", + "content": "You are a helpful AI assistant. Be concise.", + }, + { + "role": "user", + "content": "What are the 3 most important principles of software engineering? Answer in one sentence each.", + }, + ], + temperature=0.7, + max_tokens=200, + ) + elapsed = time.time() - start + + content = response.choices[0].message.content + usage = response.usage + print(f" Response ({elapsed:.2f}s):") + print(f" {content[:200]}...") + print( + f" Tokens: {usage.prompt_tokens} in / {usage.completion_tokens} out / {usage.total_tokens} total" + ) + print() + +# ── Test 2: Multi-turn Conversation with Decision Logging ────────── +print("─── Test 2: Multi-turn with Decisions ───") +with agentlens.trace( + "moonshot-multi-turn-agent", + tags=["moonshot", "test", "multi-turn", "agent"], + metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "multi-turn"}, +): + # Step 1: Classify user intent + agentlens.log_decision( + type="PLANNING", + chosen={ + "name": "classify-then-respond", + "confidence": 0.9, + "params": {"strategy": "two-step"}, + }, + alternatives=[ + { + "name": "direct-response", + "confidence": 0.5, + "reason_rejected": "Classification first improves response quality", + } + ], + reasoning="Two-step approach: classify intent first, then generate targeted response.", + context_snapshot={"user_query": "Help me debug a Python TypeError"}, + ) + + with agentlens.trace("classify-intent", tags=["classification"]): + classification = client.chat.completions.create( + model=MOONSHOT_MODEL, + messages=[ + { + "role": "system", + "content": "Classify the user's programming question into one category: 'syntax', 'runtime', 'logic', 'design', 'performance'. Reply with just the category.", + }, + { + "role": "user", + "content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code", + }, + ], + temperature=0.2, + max_tokens=20, + ) + category = classification.choices[0].message.content.strip() + print(f" Intent classified: {category}") + + # Step 2: Route to appropriate response strategy + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": f"respond-as-{category}", + "confidence": 0.85, + }, + alternatives=[ + { + "name": "generic-response", + "confidence": 0.3, + "reason_rejected": "Classified response is more helpful", + } + ], + reasoning=f"User question classified as '{category}' — routing to specialized response.", + context_snapshot={"category": category}, + ) + + # Step 3: Generate response + with agentlens.trace("generate-response", tags=["response"]): + response = client.chat.completions.create( + model=MOONSHOT_MODEL, + messages=[ + { + "role": "system", + "content": f"You are an expert Python debugger specializing in {category} errors. Give a concise, actionable fix.", + }, + { + "role": "user", + "content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code", + }, + { + "role": "assistant", + "content": f"This is a {category} error. Let me help you fix it.", + }, + { + "role": "user", + "content": "Here's my code: total = count + name where count=5 and name='hello'", + }, + ], + temperature=0.5, + max_tokens=300, + ) + answer = response.choices[0].message.content + print(f" Response: {answer[:150]}...") + print() + +# ── Test 3: Tool/Function Calling ─────────────────────────────────── +print("─── Test 3: Function Calling ───") +with agentlens.trace( + "moonshot-function-calling", + tags=["moonshot", "test", "tools", "function-calling"], + metadata={ + "provider": "moonshot", + "model": MOONSHOT_MODEL, + "test": "function-calling", + }, +): + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City name, e.g. 'San Francisco'", + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit", + }, + }, + "required": ["location"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "search_web", + "description": "Search the web for information", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query", + }, + }, + "required": ["query"], + }, + }, + }, + ] + + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "provide-tools", + "confidence": 0.9, + "params": {"tools": ["get_weather", "search_web"]}, + }, + alternatives=[], + reasoning="User query likely requires weather data — providing weather and search tools.", + ) + + response = client.chat.completions.create( + model=MOONSHOT_MODEL, + messages=[ + { + "role": "system", + "content": "You are a helpful assistant with access to tools. Use them when needed.", + }, + {"role": "user", "content": "What's the weather like in Lisbon today?"}, + ], + tools=tools, + temperature=0.3, + max_tokens=200, + ) + + message = response.choices[0].message + if message.tool_calls: + print(f" Tool calls requested: {len(message.tool_calls)}") + for tc in message.tool_calls: + print(f" → {tc.function.name}({tc.function.arguments})") + + # Simulate tool response + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": tc.function.name, + "confidence": 1.0, + }, + alternatives=[], + reasoning=f"Model requested {tc.function.name} — executing tool call.", + ) + + # Send fake tool result back + tool_result = json.dumps( + { + "temperature": 18, + "unit": "celsius", + "condition": "sunny", + "location": "Lisbon", + } + ) + final_response = client.chat.completions.create( + model=MOONSHOT_MODEL, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What's the weather like in Lisbon today?"}, + message, + { + "role": "tool", + "tool_call_id": message.tool_calls[0].id, + "content": tool_result, + }, + ], + temperature=0.5, + max_tokens=200, + ) + print(f" Final answer: {final_response.choices[0].message.content[:150]}...") + else: + print(f" Direct response (no tool calls): {message.content[:150]}...") + print() + +# ── Shutdown & Verify ─────────────────────────────────────────────── +print("─── Flushing traces to AgentLens... ───") +agentlens.shutdown() +print("[✓] All traces flushed") + +# Wait a moment for async processing +time.sleep(2) + +# Verify traces arrived +print() +print("─── Verifying traces in dashboard... ───") +import httpx + +resp = httpx.get( + f"{AGENTLENS_ENDPOINT}/api/traces", + params={"search": "moonshot", "limit": "10"}, + headers={"Authorization": f"Bearer {AGENTLENS_API_KEY}"}, +) +if resp.status_code == 200: + data = resp.json() + traces = data.get("traces", []) + print(f"[✓] Found {len(traces)} moonshot traces in dashboard:") + for t in traces: + spans = t.get("_count", {}).get("spans", "?") + decisions = t.get("_count", {}).get("decisionPoints", "?") + print( + f" • {t['name']} — status={t['status']}, spans={spans}, decisions={decisions}" + ) +else: + print(f"[✗] API returned {resp.status_code}: {resp.text[:200]}") + +print() +print("=" * 60) +print("Test complete! Visit https://agentlens.vectry.tech/dashboard") +print("=" * 60)