Spans must be inserted before decision points due to DecisionPoint.parentSpanId FK referencing Span.id. Switched from nested Prisma create to interactive transaction with topological span ordering. Also adds real MoonshotAI LLM test script.
348 lines
12 KiB
Python
348 lines
12 KiB
Python
"""
|
|
AgentLens Real LLM Test — MoonshotAI (Kimi) via OpenAI-compatible API.
|
|
|
|
Tests the full pipeline: SDK → wrap_openai() → real LLM completion → AgentLens dashboard.
|
|
Uses MoonshotAI (OpenAI-compatible) with kimi-k2-turbo-preview model.
|
|
|
|
Usage:
|
|
pip install vectry-agentlens openai
|
|
python moonshot_real_test.py
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
import agentlens
|
|
from agentlens.integrations.openai import wrap_openai
|
|
import openai
|
|
|
|
# ── Config ──────────────────────────────────────────────────────────
|
|
MOONSHOT_API_KEY = "sk-2uhpGUeqISKtiGwd14aGuYJ4tt2p0Ad98qke9T8Ykdc4dEPp"
|
|
MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1"
|
|
MOONSHOT_MODEL = "kimi-k2-turbo-preview"
|
|
AGENTLENS_ENDPOINT = "https://agentlens.vectry.tech"
|
|
AGENTLENS_API_KEY = "test-moonshot-key"
|
|
|
|
# ── Initialize ──────────────────────────────────────────────────────
|
|
print("=" * 60)
|
|
print("AgentLens Real LLM Test — MoonshotAI (Kimi)")
|
|
print("=" * 60)
|
|
|
|
agentlens.init(
|
|
api_key=AGENTLENS_API_KEY,
|
|
endpoint=AGENTLENS_ENDPOINT,
|
|
)
|
|
print(f"[✓] AgentLens initialized → {AGENTLENS_ENDPOINT}")
|
|
|
|
# Create OpenAI client pointing to MoonshotAI
|
|
client = openai.OpenAI(
|
|
api_key=MOONSHOT_API_KEY,
|
|
base_url=MOONSHOT_BASE_URL,
|
|
)
|
|
wrap_openai(client)
|
|
print(f"[✓] OpenAI client wrapped → {MOONSHOT_BASE_URL}")
|
|
print(f"[✓] Model: {MOONSHOT_MODEL}")
|
|
print()
|
|
|
|
# ── Test 1: Basic Completion ────────────────────────────────────────
|
|
print("─── Test 1: Basic Completion ───")
|
|
with agentlens.trace(
|
|
"moonshot-basic-completion",
|
|
tags=["moonshot", "test", "basic"],
|
|
metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "basic"},
|
|
):
|
|
agentlens.log_decision(
|
|
type="TOOL_SELECTION",
|
|
chosen={
|
|
"name": MOONSHOT_MODEL,
|
|
"confidence": 0.95,
|
|
"params": {"temperature": 0.7, "max_tokens": 200},
|
|
},
|
|
alternatives=[
|
|
{
|
|
"name": "moonshot-v1-8k",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "Older model, less capable",
|
|
}
|
|
],
|
|
reasoning="Using kimi-k2-turbo-preview for best quality/speed balance.",
|
|
)
|
|
|
|
start = time.time()
|
|
response = client.chat.completions.create(
|
|
model=MOONSHOT_MODEL,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful AI assistant. Be concise.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "What are the 3 most important principles of software engineering? Answer in one sentence each.",
|
|
},
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=200,
|
|
)
|
|
elapsed = time.time() - start
|
|
|
|
content = response.choices[0].message.content
|
|
usage = response.usage
|
|
print(f" Response ({elapsed:.2f}s):")
|
|
print(f" {content[:200]}...")
|
|
print(
|
|
f" Tokens: {usage.prompt_tokens} in / {usage.completion_tokens} out / {usage.total_tokens} total"
|
|
)
|
|
print()
|
|
|
|
# ── Test 2: Multi-turn Conversation with Decision Logging ──────────
|
|
print("─── Test 2: Multi-turn with Decisions ───")
|
|
with agentlens.trace(
|
|
"moonshot-multi-turn-agent",
|
|
tags=["moonshot", "test", "multi-turn", "agent"],
|
|
metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "multi-turn"},
|
|
):
|
|
# Step 1: Classify user intent
|
|
agentlens.log_decision(
|
|
type="PLANNING",
|
|
chosen={
|
|
"name": "classify-then-respond",
|
|
"confidence": 0.9,
|
|
"params": {"strategy": "two-step"},
|
|
},
|
|
alternatives=[
|
|
{
|
|
"name": "direct-response",
|
|
"confidence": 0.5,
|
|
"reason_rejected": "Classification first improves response quality",
|
|
}
|
|
],
|
|
reasoning="Two-step approach: classify intent first, then generate targeted response.",
|
|
context_snapshot={"user_query": "Help me debug a Python TypeError"},
|
|
)
|
|
|
|
with agentlens.trace("classify-intent", tags=["classification"]):
|
|
classification = client.chat.completions.create(
|
|
model=MOONSHOT_MODEL,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "Classify the user's programming question into one category: 'syntax', 'runtime', 'logic', 'design', 'performance'. Reply with just the category.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code",
|
|
},
|
|
],
|
|
temperature=0.2,
|
|
max_tokens=20,
|
|
)
|
|
category = classification.choices[0].message.content.strip()
|
|
print(f" Intent classified: {category}")
|
|
|
|
# Step 2: Route to appropriate response strategy
|
|
agentlens.log_decision(
|
|
type="ROUTING",
|
|
chosen={
|
|
"name": f"respond-as-{category}",
|
|
"confidence": 0.85,
|
|
},
|
|
alternatives=[
|
|
{
|
|
"name": "generic-response",
|
|
"confidence": 0.3,
|
|
"reason_rejected": "Classified response is more helpful",
|
|
}
|
|
],
|
|
reasoning=f"User question classified as '{category}' — routing to specialized response.",
|
|
context_snapshot={"category": category},
|
|
)
|
|
|
|
# Step 3: Generate response
|
|
with agentlens.trace("generate-response", tags=["response"]):
|
|
response = client.chat.completions.create(
|
|
model=MOONSHOT_MODEL,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": f"You are an expert Python debugger specializing in {category} errors. Give a concise, actionable fix.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code",
|
|
},
|
|
{
|
|
"role": "assistant",
|
|
"content": f"This is a {category} error. Let me help you fix it.",
|
|
},
|
|
{
|
|
"role": "user",
|
|
"content": "Here's my code: total = count + name where count=5 and name='hello'",
|
|
},
|
|
],
|
|
temperature=0.5,
|
|
max_tokens=300,
|
|
)
|
|
answer = response.choices[0].message.content
|
|
print(f" Response: {answer[:150]}...")
|
|
print()
|
|
|
|
# ── Test 3: Tool/Function Calling ───────────────────────────────────
|
|
print("─── Test 3: Function Calling ───")
|
|
with agentlens.trace(
|
|
"moonshot-function-calling",
|
|
tags=["moonshot", "test", "tools", "function-calling"],
|
|
metadata={
|
|
"provider": "moonshot",
|
|
"model": MOONSHOT_MODEL,
|
|
"test": "function-calling",
|
|
},
|
|
):
|
|
tools = [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_weather",
|
|
"description": "Get the current weather for a location",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"location": {
|
|
"type": "string",
|
|
"description": "City name, e.g. 'San Francisco'",
|
|
},
|
|
"unit": {
|
|
"type": "string",
|
|
"enum": ["celsius", "fahrenheit"],
|
|
"description": "Temperature unit",
|
|
},
|
|
},
|
|
"required": ["location"],
|
|
},
|
|
},
|
|
},
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "search_web",
|
|
"description": "Search the web for information",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
agentlens.log_decision(
|
|
type="TOOL_SELECTION",
|
|
chosen={
|
|
"name": "provide-tools",
|
|
"confidence": 0.9,
|
|
"params": {"tools": ["get_weather", "search_web"]},
|
|
},
|
|
alternatives=[],
|
|
reasoning="User query likely requires weather data — providing weather and search tools.",
|
|
)
|
|
|
|
response = client.chat.completions.create(
|
|
model=MOONSHOT_MODEL,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "You are a helpful assistant with access to tools. Use them when needed.",
|
|
},
|
|
{"role": "user", "content": "What's the weather like in Lisbon today?"},
|
|
],
|
|
tools=tools,
|
|
temperature=0.3,
|
|
max_tokens=200,
|
|
)
|
|
|
|
message = response.choices[0].message
|
|
if message.tool_calls:
|
|
print(f" Tool calls requested: {len(message.tool_calls)}")
|
|
for tc in message.tool_calls:
|
|
print(f" → {tc.function.name}({tc.function.arguments})")
|
|
|
|
# Simulate tool response
|
|
agentlens.log_decision(
|
|
type="TOOL_SELECTION",
|
|
chosen={
|
|
"name": tc.function.name,
|
|
"confidence": 1.0,
|
|
},
|
|
alternatives=[],
|
|
reasoning=f"Model requested {tc.function.name} — executing tool call.",
|
|
)
|
|
|
|
# Send fake tool result back
|
|
tool_result = json.dumps(
|
|
{
|
|
"temperature": 18,
|
|
"unit": "celsius",
|
|
"condition": "sunny",
|
|
"location": "Lisbon",
|
|
}
|
|
)
|
|
final_response = client.chat.completions.create(
|
|
model=MOONSHOT_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "What's the weather like in Lisbon today?"},
|
|
message,
|
|
{
|
|
"role": "tool",
|
|
"tool_call_id": message.tool_calls[0].id,
|
|
"content": tool_result,
|
|
},
|
|
],
|
|
temperature=0.5,
|
|
max_tokens=200,
|
|
)
|
|
print(f" Final answer: {final_response.choices[0].message.content[:150]}...")
|
|
else:
|
|
print(f" Direct response (no tool calls): {message.content[:150]}...")
|
|
print()
|
|
|
|
# ── Shutdown & Verify ───────────────────────────────────────────────
|
|
print("─── Flushing traces to AgentLens... ───")
|
|
agentlens.shutdown()
|
|
print("[✓] All traces flushed")
|
|
|
|
# Wait a moment for async processing
|
|
time.sleep(2)
|
|
|
|
# Verify traces arrived
|
|
print()
|
|
print("─── Verifying traces in dashboard... ───")
|
|
import httpx
|
|
|
|
resp = httpx.get(
|
|
f"{AGENTLENS_ENDPOINT}/api/traces",
|
|
params={"search": "moonshot", "limit": "10"},
|
|
headers={"Authorization": f"Bearer {AGENTLENS_API_KEY}"},
|
|
)
|
|
if resp.status_code == 200:
|
|
data = resp.json()
|
|
traces = data.get("traces", [])
|
|
print(f"[✓] Found {len(traces)} moonshot traces in dashboard:")
|
|
for t in traces:
|
|
spans = t.get("_count", {}).get("spans", "?")
|
|
decisions = t.get("_count", {}).get("decisionPoints", "?")
|
|
print(
|
|
f" • {t['name']} — status={t['status']}, spans={spans}, decisions={decisions}"
|
|
)
|
|
else:
|
|
print(f"[✗] API returned {resp.status_code}: {resp.text[:200]}")
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print("Test complete! Visit https://agentlens.vectry.tech/dashboard")
|
|
print("=" * 60)
|