agentlens/examples/moonshot_real_test.py

"""
AgentLens Real LLM Test — MoonshotAI (Kimi) via OpenAI-compatible API.

Tests the full pipeline: SDK → wrap_openai() → real LLM completion → AgentLens dashboard.
Uses MoonshotAI (OpenAI-compatible) with kimi-k2-turbo-preview model.

Usage:
    pip install vectry-agentlens openai
    python moonshot_real_test.py
"""

import time
import json
import agentlens
from agentlens.integrations.openai import wrap_openai
import openai

# ── Config ──────────────────────────────────────────────────────────
MOONSHOT_API_KEY = "sk-2uhpGUeqISKtiGwd14aGuYJ4tt2p0Ad98qke9T8Ykdc4dEPp"
MOONSHOT_BASE_URL = "https://api.moonshot.ai/v1"
MOONSHOT_MODEL = "kimi-k2-turbo-preview"
AGENTLENS_ENDPOINT = "https://agentlens.vectry.tech"
AGENTLENS_API_KEY = "test-moonshot-key"

# ── Initialize ──────────────────────────────────────────────────────
print("=" * 60)
print("AgentLens Real LLM Test — MoonshotAI (Kimi)")
print("=" * 60)

agentlens.init(
    api_key=AGENTLENS_API_KEY,
    endpoint=AGENTLENS_ENDPOINT,
)
print(f"[✓] AgentLens initialized → {AGENTLENS_ENDPOINT}")

# Create OpenAI client pointing to MoonshotAI
client = openai.OpenAI(
    api_key=MOONSHOT_API_KEY,
    base_url=MOONSHOT_BASE_URL,
)
wrap_openai(client)
print(f"[✓] OpenAI client wrapped → {MOONSHOT_BASE_URL}")
print(f"[✓] Model: {MOONSHOT_MODEL}")
print()

# ── Test 1: Basic Completion ────────────────────────────────────────
print("─── Test 1: Basic Completion ───")
with agentlens.trace(
    "moonshot-basic-completion",
    tags=["moonshot", "test", "basic"],
    metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "basic"},
):
    agentlens.log_decision(
        type="TOOL_SELECTION",
        chosen={
            "name": MOONSHOT_MODEL,
            "confidence": 0.95,
            "params": {"temperature": 0.7, "max_tokens": 200},
        },
        alternatives=[
            {
                "name": "moonshot-v1-8k",
                "confidence": 0.6,
                "reason_rejected": "Older model, less capable",
            }
        ],
        reasoning="Using kimi-k2-turbo-preview for best quality/speed balance.",
    )

    start = time.time()
    response = client.chat.completions.create(
        model=MOONSHOT_MODEL,
        messages=[
            {
                "role": "system",
                "content": "You are a helpful AI assistant. Be concise.",
            },
            {
                "role": "user",
                "content": "What are the 3 most important principles of software engineering? Answer in one sentence each.",
            },
        ],
        temperature=0.7,
        max_tokens=200,
    )
    elapsed = time.time() - start

    content = response.choices[0].message.content
    usage = response.usage
    print(f"  Response ({elapsed:.2f}s):")
    print(f"  {content[:200]}...")
    print(
        f"  Tokens: {usage.prompt_tokens} in / {usage.completion_tokens} out / {usage.total_tokens} total"
    )
    print()

# ── Test 2: Multi-turn Conversation with Decision Logging ──────────
print("─── Test 2: Multi-turn with Decisions ───")
with agentlens.trace(
    "moonshot-multi-turn-agent",
    tags=["moonshot", "test", "multi-turn", "agent"],
    metadata={"provider": "moonshot", "model": MOONSHOT_MODEL, "test": "multi-turn"},
):
    # Step 1: Classify user intent
    agentlens.log_decision(
        type="PLANNING",
        chosen={
            "name": "classify-then-respond",
            "confidence": 0.9,
            "params": {"strategy": "two-step"},
        },
        alternatives=[
            {
                "name": "direct-response",
                "confidence": 0.5,
                "reason_rejected": "Classification first improves response quality",
            }
        ],
        reasoning="Two-step approach: classify intent first, then generate targeted response.",
        context_snapshot={"user_query": "Help me debug a Python TypeError"},
    )

    with agentlens.trace("classify-intent", tags=["classification"]):
        classification = client.chat.completions.create(
            model=MOONSHOT_MODEL,
            messages=[
                {
                    "role": "system",
                    "content": "Classify the user's programming question into one category: 'syntax', 'runtime', 'logic', 'design', 'performance'. Reply with just the category.",
                },
                {
                    "role": "user",
                    "content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code",
                },
            ],
            temperature=0.2,
            max_tokens=20,
        )
        category = classification.choices[0].message.content.strip()
        print(f"  Intent classified: {category}")

    # Step 2: Route to appropriate response strategy
    agentlens.log_decision(
        type="ROUTING",
        chosen={
            "name": f"respond-as-{category}",
            "confidence": 0.85,
        },
        alternatives=[
            {
                "name": "generic-response",
                "confidence": 0.3,
                "reason_rejected": "Classified response is more helpful",
            }
        ],
        reasoning=f"User question classified as '{category}' — routing to specialized response.",
        context_snapshot={"category": category},
    )

    # Step 3: Generate response
    with agentlens.trace("generate-response", tags=["response"]):
        response = client.chat.completions.create(
            model=MOONSHOT_MODEL,
            messages=[
                {
                    "role": "system",
                    "content": f"You are an expert Python debugger specializing in {category} errors. Give a concise, actionable fix.",
                },
                {
                    "role": "user",
                    "content": "I'm getting a TypeError: unsupported operand type(s) for +: 'int' and 'str' in my Python code",
                },
                {
                    "role": "assistant",
                    "content": f"This is a {category} error. Let me help you fix it.",
                },
                {
                    "role": "user",
                    "content": "Here's my code: total = count + name where count=5 and name='hello'",
                },
            ],
            temperature=0.5,
            max_tokens=300,
        )
        answer = response.choices[0].message.content
        print(f"  Response: {answer[:150]}...")
        print()

# ── Test 3: Tool/Function Calling ───────────────────────────────────
print("─── Test 3: Function Calling ───")
with agentlens.trace(
    "moonshot-function-calling",
    tags=["moonshot", "test", "tools", "function-calling"],
    metadata={
        "provider": "moonshot",
        "model": MOONSHOT_MODEL,
        "test": "function-calling",
    },
):
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get the current weather for a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {
                            "type": "string",
                            "description": "City name, e.g. 'San Francisco'",
                        },
                        "unit": {
                            "type": "string",
                            "enum": ["celsius", "fahrenheit"],
                            "description": "Temperature unit",
                        },
                    },
                    "required": ["location"],
                },
            },
        },
        {
            "type": "function",
            "function": {
                "name": "search_web",
                "description": "Search the web for information",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string",
                            "description": "Search query",
                        },
                    },
                    "required": ["query"],
                },
            },
        },
    ]

    agentlens.log_decision(
        type="TOOL_SELECTION",
        chosen={
            "name": "provide-tools",
            "confidence": 0.9,
            "params": {"tools": ["get_weather", "search_web"]},
        },
        alternatives=[],
        reasoning="User query likely requires weather data — providing weather and search tools.",
    )

    response = client.chat.completions.create(
        model=MOONSHOT_MODEL,
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant with access to tools. Use them when needed.",
            },
            {"role": "user", "content": "What's the weather like in Lisbon today?"},
        ],
        tools=tools,
        temperature=0.3,
        max_tokens=200,
    )

    message = response.choices[0].message
    if message.tool_calls:
        print(f"  Tool calls requested: {len(message.tool_calls)}")
        for tc in message.tool_calls:
            print(f"    → {tc.function.name}({tc.function.arguments})")

        # Simulate tool response
        agentlens.log_decision(
            type="TOOL_SELECTION",
            chosen={
                "name": tc.function.name,
                "confidence": 1.0,
            },
            alternatives=[],
            reasoning=f"Model requested {tc.function.name} — executing tool call.",
        )

        # Send fake tool result back
        tool_result = json.dumps(
            {
                "temperature": 18,
                "unit": "celsius",
                "condition": "sunny",
                "location": "Lisbon",
            }
        )
        final_response = client.chat.completions.create(
            model=MOONSHOT_MODEL,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "What's the weather like in Lisbon today?"},
                message,
                {
                    "role": "tool",
                    "tool_call_id": message.tool_calls[0].id,
                    "content": tool_result,
                },
            ],
            temperature=0.5,
            max_tokens=200,
        )
        print(f"  Final answer: {final_response.choices[0].message.content[:150]}...")
    else:
        print(f"  Direct response (no tool calls): {message.content[:150]}...")
    print()

# ── Shutdown & Verify ───────────────────────────────────────────────
print("─── Flushing traces to AgentLens... ───")
agentlens.shutdown()
print("[✓] All traces flushed")

# Wait a moment for async processing
time.sleep(2)

# Verify traces arrived
print()
print("─── Verifying traces in dashboard... ───")
import httpx

resp = httpx.get(
    f"{AGENTLENS_ENDPOINT}/api/traces",
    params={"search": "moonshot", "limit": "10"},
    headers={"Authorization": f"Bearer {AGENTLENS_API_KEY}"},
)
if resp.status_code == 200:
    data = resp.json()
    traces = data.get("traces", [])
    print(f"[✓] Found {len(traces)} moonshot traces in dashboard:")
    for t in traces:
        spans = t.get("_count", {}).get("spans", "?")
        decisions = t.get("_count", {}).get("decisionPoints", "?")
        print(
            f"    • {t['name']} — status={t['status']}, spans={spans}, decisions={decisions}"
        )
else:
    print(f"[✗] API returned {resp.status_code}: {resp.text[:200]}")

print()
print("=" * 60)
print("Test complete! Visit https://agentlens.vectry.tech/dashboard")
print("=" * 60)