diff --git a/README.md b/README.md index af064d3..edadad5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,180 @@ -# agentlens +

+

AgentLens

+

Agent observability that traces decisions, not just API calls.

+

See why your AI agents chose what they chose.

+

-AgentLens - Agent observability that traces decisions, not just API calls. See WHY your AI agents chose what they chose. \ No newline at end of file +

+ PyPI + License + Demo +

+ +--- + +## The Problem + +Existing observability tools show you _what_ LLM calls were made. AgentLens shows you _why_ your agent made each decision along the way -- which tool it picked, what alternatives it rejected, and the reasoning behind every choice. + +## Quick Start + +```bash +pip install vectry-agentlens +``` + +```python +import agentlens + +agentlens.init(api_key="your-key", endpoint="https://agentlens.vectry.tech") + +with agentlens.trace("my-agent-task", tags=["production"]): + # Your agent logic here... + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={"name": "search_web", "confidence": 0.92}, + alternatives=[{"name": "search_docs", "reason_rejected": "query too broad"}], + reasoning="User query requires real-time data not in local docs" + ) + +agentlens.shutdown() +``` + +Open `https://agentlens.vectry.tech/dashboard` to see your traces. + +## Features + +- **Decision Tracing** -- Log every decision point with reasoning, alternatives, and confidence scores +- **OpenAI Integration** -- Auto-instrument OpenAI calls with one line: `wrap_openai(client)` +- **LangChain Integration** -- Drop-in callback handler for LangChain agents +- **Nested Traces** -- Multi-agent workflows with parent-child span relationships +- **Real-time Dashboard** -- SSE-powered live trace streaming with filtering and search +- **Decision Tree Viz** -- Interactive React Flow visualization of agent decision paths +- **Analytics** -- Token usage, cost tracking, duration timelines per trace +- **Self-Hostable** -- Docker Compose deployment, bring your own Postgres + Redis + +## Architecture + +``` +SDK (Python) API (Next.js) Dashboard (React) + agentlens.trace() ------> POST /api/traces ------> Real-time SSE stream + agentlens.log_decision() Prisma + Postgres Decision tree viz + wrap_openai(client) Redis pub/sub Analytics & filters +``` + +## Integrations + +### OpenAI + +```python +import openai +from agentlens.integrations.openai import wrap_openai + +client = openai.OpenAI() +wrap_openai(client) # Auto-traces all completions + +with agentlens.trace("openai-task"): + response = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Hello!"}] + ) +``` + +### LangChain + +```python +from agentlens.integrations.langchain import AgentLensCallbackHandler + +handler = AgentLensCallbackHandler() +agent.run("Do something", callbacks=[handler]) +``` + +### Custom Agents + +```python +with agentlens.trace("planner"): + agentlens.log_decision( + type="ROUTING", + chosen={"name": "research_agent"}, + alternatives=[{"name": "writer_agent"}], + reasoning="Task requires data gathering first" + ) + with agentlens.trace("researcher"): + # Nested trace creates child span automatically + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={"name": "web_search"}, + alternatives=[{"name": "database_query"}], + reasoning="Need real-time information" + ) +``` + +## Decision Types + +| Type | Use Case | +|------|----------| +| `TOOL_SELECTION` | Agent chose which tool/function to call | +| `ROUTING` | Agent decided which sub-agent or path to take | +| `PLANNING` | Agent formulated a multi-step plan | +| `RETRY` | Agent decided to retry a failed operation | +| `ESCALATION` | Agent escalated to human or higher-level agent | +| `MEMORY_RETRIEVAL` | Agent chose what context to retrieve | +| `CUSTOM` | Any other decision type | + +## Self-Hosting + +```bash +git clone https://gitea.repi.fun/repi/agentlens.git +cd agentlens +docker compose up -d +``` + +The dashboard will be available at `http://localhost:4200`. + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `DATABASE_URL` | `postgresql://agentlens:agentlens@postgres:5432/agentlens` | PostgreSQL connection string | +| `REDIS_URL` | `redis://redis:6379` | Redis connection string | +| `NODE_ENV` | `production` | Node environment | + +## Project Structure + +``` +agentlens/ + apps/web/ # Next.js 15 dashboard + API + packages/database/ # Prisma schema + client + packages/sdk-python/ # Python SDK (PyPI: vectry-agentlens) + examples/ # Example agent scripts + docker-compose.yml # Production deployment +``` + +## SDK Reference + +See the full [Python SDK documentation](packages/sdk-python/README.md). + +## Examples + +See the [examples directory](examples/) for runnable agent scripts: + +- `basic_agent.py` -- Minimal AgentLens usage with decision logging +- `openai_agent.py` -- OpenAI wrapper auto-instrumentation +- `multi_agent.py` -- Nested multi-agent workflows +- `customer_support_agent.py` -- Realistic support bot with routing and escalation + +## Contributing + +AgentLens is open source under the MIT license. Contributions welcome. + +```bash +# Development setup +npm install +npx turbo dev # Start web app in dev mode +cd packages/sdk-python +pip install -e ".[dev]" # Install SDK in dev mode +pytest # Run SDK tests +``` + +## License + +MIT \ No newline at end of file diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..1a4be25 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,48 @@ +# AgentLens Examples + +Example scripts demonstrating the AgentLens SDK for tracing and observing AI agent behavior. + +## Setup + +```bash +pip install vectry-agentlens +``` + +## Examples + +| Script | Description | +|--------|-------------| +| `basic_agent.py` | Simplest usage — init, trace, log decisions, shutdown | +| `openai_agent.py` | OpenAI integration — wrap the client for automatic LLM call tracing | +| `multi_agent.py` | Nested traces — planner delegates to researcher, writer, and editor sub-agents | +| `customer_support_agent.py` | Realistic support workflow — classification, routing, escalation, error handling | +| `seed_demo_traces.py` | Seeds the live dashboard with 11 realistic traces via direct HTTP POST (no SDK) | + +## Running + +Each SDK example follows the same pattern: + +```bash +# Set your API key and endpoint +export AGENTLENS_API_KEY="your-key" + +# Run any example +python examples/basic_agent.py +``` + +For the OpenAI example, you also need: + +```bash +pip install openai +export OPENAI_API_KEY="sk-..." +``` + +### Seed Script + +The seed script sends pre-built traces directly to the API — no SDK or OpenAI key needed: + +```bash +python examples/seed_demo_traces.py +``` + +This populates the dashboard with varied traces (COMPLETED, ERROR, RUNNING) across multiple agent types. diff --git a/examples/basic_agent.py b/examples/basic_agent.py new file mode 100644 index 0000000..3cbe75f --- /dev/null +++ b/examples/basic_agent.py @@ -0,0 +1,92 @@ +""" +AgentLens Basic Example — Simplest possible usage. + +Demonstrates: + - Initializing the SDK + - Creating a trace with tags + - Logging decision points (TOOL_SELECTION, PLANNING) + - Graceful shutdown + +Usage: + pip install vectry-agentlens + python basic_agent.py +""" + +import agentlens +import time + +# 1. Initialize AgentLens +agentlens.init( + api_key="your-api-key-here", + endpoint="http://localhost:4200", +) + +# 2. Run an agent task inside a trace context +with agentlens.trace("research-task", tags=["demo", "basic"]): + # Simulate: agent decides which tool to use for research + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "search_web", + "confidence": 0.85, + "params": {"query": "latest AI research papers 2025"}, + }, + alternatives=[ + { + "name": "search_docs", + "confidence": 0.6, + "reason_rejected": "Internal docs unlikely to have latest papers", + }, + { + "name": "search_arxiv", + "confidence": 0.78, + "reason_rejected": "Web search covers arXiv plus other sources", + }, + ], + reasoning="Web search gives the broadest coverage for recent AI papers.", + ) + + time.sleep(0.3) # Simulate tool execution time + + # Simulate: agent plans next steps after getting search results + agentlens.log_decision( + type="PLANNING", + chosen={ + "name": "summarize_top_3", + "confidence": 0.92, + "params": {"max_papers": 3, "format": "bullet_points"}, + }, + alternatives=[ + { + "name": "summarize_all", + "confidence": 0.5, + "reason_rejected": "Too many results, would dilute quality", + }, + ], + reasoning="Focusing on top 3 papers gives concise, high-value summary.", + ) + + time.sleep(0.2) # Simulate summarization + + # Simulate: decide whether to retry with refined query + agentlens.log_decision( + type="CUSTOM", + chosen={ + "name": "return_results", + "confidence": 0.95, + "params": {"result_count": 3}, + }, + alternatives=[ + { + "name": "refine_and_retry", + "confidence": 0.3, + "reason_rejected": "Current results are already high quality", + }, + ], + reasoning="Results are comprehensive enough; no need to retry.", + ) + +# 3. Shutdown — flush any pending data +agentlens.shutdown() + +print("Done! Check your AgentLens dashboard for the 'research-task' trace.") diff --git a/examples/customer_support_agent.py b/examples/customer_support_agent.py new file mode 100644 index 0000000..8b704aa --- /dev/null +++ b/examples/customer_support_agent.py @@ -0,0 +1,211 @@ +""" +AgentLens Customer Support Example — Realistic support ticket workflow. + +Demonstrates: + - Ticket classification with ROUTING decisions + - Specialist routing with TOOL_SELECTION + - Escalation decisions with ESCALATION type + - Error handling — traces capture exceptions automatically + - Multiple real-world decision patterns + +Usage: + pip install vectry-agentlens + python customer_support_agent.py +""" + +import agentlens +import time +import random + +# Initialize +agentlens.init( + api_key="your-api-key-here", + endpoint="http://localhost:4200", +) + +# Simulated ticket data +TICKETS = [ + { + "id": "TKT-4021", + "subject": "Cannot access billing portal after password reset", + "priority": "high", + "customer_tier": "enterprise", + "body": "After resetting my password, I get a 403 error on the billing page. " + "I need to update our payment method before end of month.", + }, + { + "id": "TKT-4022", + "subject": "Feature request: dark mode for dashboard", + "priority": "low", + "customer_tier": "free", + "body": "Would love to have a dark mode option. My eyes hurt during late-night sessions.", + }, + { + "id": "TKT-4023", + "subject": "API returning 500 errors intermittently", + "priority": "critical", + "customer_tier": "enterprise", + "body": "Our production integration is failing ~20% of requests with 500 errors. " + "Started about 2 hours ago. This is blocking our release.", + }, +] + + +def simulate_llm(prompt: str, delay: float = 0.15) -> str: + """Fake LLM — replace with real calls.""" + time.sleep(delay) + return f"[Response to: {prompt[:60]}]" + + +def process_ticket(ticket: dict) -> None: + """Process a single support ticket through the agent pipeline.""" + + with agentlens.trace( + "customer-support-bot", + tags=["support", ticket["priority"], ticket["customer_tier"]], + ): + # Step 1: Classify the ticket + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": "classify_ticket", + "confidence": 0.91, + "params": { + "ticket_id": ticket["id"], + "predicted_category": ( + "billing" + if "billing" in ticket["subject"].lower() + else "bug" + if "error" in ticket["body"].lower() or "500" in ticket["body"] + else "feature_request" + ), + }, + }, + alternatives=[ + { + "name": "ask_customer_for_clarification", + "confidence": 0.2, + "reason_rejected": "Ticket subject and body are clear enough", + }, + ], + reasoning=f"Ticket '{ticket['subject']}' clearly maps to a known category.", + ) + + classification = simulate_llm(f"Classify: {ticket['subject']}") + + # Step 2: Route to specialist + is_critical = ticket["priority"] in ("critical", "high") + is_enterprise = ticket["customer_tier"] == "enterprise" + + if is_critical and is_enterprise: + specialist = "senior_engineer" + elif is_critical: + specialist = "engineer" + elif "billing" in ticket["subject"].lower(): + specialist = "billing_team" + else: + specialist = "general_support" + + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": specialist, + "confidence": 0.87, + "params": { + "ticket_id": ticket["id"], + "priority": ticket["priority"], + "sla_minutes": 30 if is_enterprise else 240, + }, + }, + alternatives=[ + { + "name": "general_support", + "confidence": 0.4, + "reason_rejected": "Ticket requires specialized handling" + if specialist != "general_support" + else "This is general support already", + }, + ], + reasoning=f"Priority={ticket['priority']}, Tier={ticket['customer_tier']} -> route to {specialist}.", + ) + + # Step 3: Specialist handles ticket (nested trace) + with agentlens.trace(f"specialist-{specialist}", tags=[specialist]): + # Tool selection for the specialist + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "search_knowledge_base", + "confidence": 0.82, + "params": {"query": ticket["subject"], "limit": 5}, + }, + alternatives=[ + { + "name": "search_past_tickets", + "confidence": 0.7, + "reason_rejected": "KB is more authoritative for known issues", + }, + { + "name": "check_status_page", + "confidence": 0.6, + "reason_rejected": "Already checked — no ongoing incidents posted", + }, + ], + reasoning="Knowledge base has resolution guides for common issues.", + ) + + kb_result = simulate_llm(f"Search KB for: {ticket['subject']}") + + # Step 4: Escalation decision for critical tickets + if ticket["priority"] == "critical": + agentlens.log_decision( + type="ESCALATION", + chosen={ + "name": "escalate_to_engineering", + "confidence": 0.94, + "params": { + "severity": "P1", + "team": "platform-reliability", + "ticket_id": ticket["id"], + }, + }, + alternatives=[ + { + "name": "resolve_at_support_level", + "confidence": 0.15, + "reason_rejected": "500 errors suggest infrastructure issue beyond support scope", + }, + ], + reasoning="Intermittent 500s on enterprise account = immediate P1 escalation.", + ) + + # Simulate escalation failure for the critical ticket (shows error handling) + if random.random() < 0.3: + raise RuntimeError( + f"Escalation service unavailable for {ticket['id']}" + ) + + # Generate response + response = simulate_llm( + f"Draft response for {ticket['id']}: {ticket['subject']}", + delay=0.3, + ) + + print(f" [{ticket['id']}] Processed -> routed to {specialist}") + + +# Process all tickets +print("Processing support tickets...\n") + +for ticket in TICKETS: + try: + process_ticket(ticket) + except Exception as e: + # The trace context manager captures the error automatically + print(f" [{ticket['id']}] Error during processing: {e}") + +# Shutdown +agentlens.shutdown() + +print("\nDone! Check AgentLens dashboard for 'customer-support-bot' traces.") +print("Look for the ERROR trace — it shows how failures are captured.") diff --git a/examples/multi_agent.py b/examples/multi_agent.py new file mode 100644 index 0000000..9892354 --- /dev/null +++ b/examples/multi_agent.py @@ -0,0 +1,183 @@ +""" +AgentLens Multi-Agent Example — Nested traces for orchestrated agent workflows. + +Demonstrates: + - A "planner" agent that delegates to sub-agents + - Nested trace contexts that create parent-child span relationships automatically + - Multiple decision types: ROUTING, PLANNING, TOOL_SELECTION + - How the dashboard shows the full agent call tree + +Usage: + pip install vectry-agentlens + python multi_agent.py +""" + +import agentlens +import time + +# Initialize +agentlens.init( + api_key="your-api-key-here", + endpoint="http://localhost:4200", +) + + +def simulate_llm_call(prompt: str, delay: float = 0.2) -> str: + """Fake LLM call — replace with real model calls in production.""" + time.sleep(delay) + return f"[LLM response to: {prompt[:50]}...]" + + +# Top-level planner agent trace +with agentlens.trace("planner-agent", tags=["multi-agent", "blog-pipeline"]): + # Planner decides the workflow + agentlens.log_decision( + type="PLANNING", + chosen={ + "name": "research_then_write", + "confidence": 0.93, + "params": { + "steps": ["research", "outline", "draft", "review"], + "topic": "AI agents in production", + }, + }, + alternatives=[ + { + "name": "write_directly", + "confidence": 0.4, + "reason_rejected": "Topic requires research for factual accuracy", + }, + ], + reasoning="Complex topic — research phase needed before writing.", + ) + + # Planner routes to researcher agent + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": "researcher-agent", + "confidence": 0.95, + "params": {"query": "AI agents in production best practices 2025"}, + }, + alternatives=[ + { + "name": "writer-agent", + "confidence": 0.3, + "reason_rejected": "Need facts before drafting", + }, + ], + reasoning="Researcher goes first to gather source material.", + ) + + # --- Nested: Researcher Agent --- + with agentlens.trace("researcher-agent", tags=["research"]): + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "web_search", + "confidence": 0.88, + "params": { + "query": "AI agents production deployment 2025", + "limit": 10, + }, + }, + alternatives=[ + { + "name": "arxiv_search", + "confidence": 0.72, + "reason_rejected": "Need industry examples, not just papers", + }, + ], + reasoning="Web search covers blog posts, case studies, and papers.", + ) + + research_results = simulate_llm_call( + "Summarize findings about AI agents in production" + ) + + agentlens.log_decision( + type="MEMORY_RETRIEVAL", + chosen={ + "name": "store_research_context", + "confidence": 0.9, + "params": {"key": "research_findings", "chunks": 5}, + }, + alternatives=[], + reasoning="Store condensed findings for the writer agent to consume.", + ) + + # Planner routes to writer agent + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": "writer-agent", + "confidence": 0.97, + "params": {"style": "technical-blog", "word_count": 1500}, + }, + alternatives=[ + { + "name": "researcher-agent", + "confidence": 0.15, + "reason_rejected": "Research phase complete, enough material gathered", + }, + ], + reasoning="Research complete — hand off to writer with gathered material.", + ) + + # --- Nested: Writer Agent --- + with agentlens.trace("writer-agent", tags=["writing"]): + agentlens.log_decision( + type="PLANNING", + chosen={ + "name": "structured_outline_first", + "confidence": 0.91, + "params": { + "sections": ["intro", "challenges", "solutions", "conclusion"] + }, + }, + alternatives=[ + { + "name": "stream_of_consciousness", + "confidence": 0.3, + "reason_rejected": "Technical blog needs clear structure", + }, + ], + reasoning="Outline-first approach produces better organized blog posts.", + ) + + outline = simulate_llm_call("Create blog outline for AI agents in production") + draft = simulate_llm_call("Write full blog draft from outline", delay=0.5) + + # --- Nested deeper: Editor sub-agent within writer --- + with agentlens.trace("editor-agent", tags=["editing"]): + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "grammar_check", + "confidence": 0.85, + "params": {"text_length": 1500, "style_guide": "technical"}, + }, + alternatives=[ + { + "name": "skip_editing", + "confidence": 0.1, + "reason_rejected": "Always edit before publishing", + }, + ], + reasoning="Run grammar and style check on the draft.", + ) + + edited = simulate_llm_call("Edit and polish the blog draft", delay=0.3) + + print("Blog pipeline complete!") + print(f"Research: {research_results}") + print(f"Final draft: {edited}") + +# Shutdown +agentlens.shutdown() + +print("\nDone! Check AgentLens dashboard — you'll see nested spans:") +print(" planner-agent") +print(" -> researcher-agent") +print(" -> writer-agent") +print(" -> editor-agent") diff --git a/examples/openai_agent.py b/examples/openai_agent.py new file mode 100644 index 0000000..75f04ed --- /dev/null +++ b/examples/openai_agent.py @@ -0,0 +1,113 @@ +""" +AgentLens OpenAI Integration Example — Wrap the OpenAI client for automatic tracing. + +Demonstrates: + - Wrapping openai.OpenAI() so all LLM calls are traced as spans + - Combining automatic LLM tracing with manual decision logging + - Using trace tags and metadata + +Usage: + pip install vectry-agentlens openai + export OPENAI_API_KEY="sk-..." + python openai_agent.py +""" + +import agentlens +from agentlens.integrations.openai import wrap_openai +import openai # pip install openai + +# 1. Initialize AgentLens +agentlens.init( + api_key="your-api-key-here", + endpoint="http://localhost:4200", +) + +# 2. Create and wrap the OpenAI client — all completions are now auto-traced +client = openai.OpenAI() +wrap_openai(client) + +# 3. Use the wrapped client inside a trace +with agentlens.trace("email-drafting-agent", tags=["openai", "email", "demo"]): + # Decision: which model to use for this task + agentlens.log_decision( + type="TOOL_SELECTION", + chosen={ + "name": "gpt-4o", + "confidence": 0.9, + "params": {"temperature": 0.7, "max_tokens": 512}, + }, + alternatives=[ + { + "name": "gpt-4o-mini", + "confidence": 0.7, + "reason_rejected": "Task needs higher quality reasoning for tone", + }, + ], + reasoning="Email drafting requires nuanced tone — use the larger model.", + ) + + # This call is automatically captured as an LLM_CALL span + classification = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "Classify the intent of this email request."}, + { + "role": "user", + "content": "Write a professional follow-up email to a client " + "who hasn't responded to our proposal in 2 weeks.", + }, + ], + temperature=0.3, + max_tokens=100, + ) + + intent = classification.choices[0].message.content + print(f"Classified intent: {intent}") + + # Decision: choose email style based on classification + agentlens.log_decision( + type="ROUTING", + chosen={ + "name": "polite_follow_up", + "confidence": 0.88, + "params": {"tone": "professional-warm", "urgency": "medium"}, + }, + alternatives=[ + { + "name": "formal_reminder", + "confidence": 0.65, + "reason_rejected": "Too stiff for a 2-week follow-up", + }, + { + "name": "casual_check_in", + "confidence": 0.4, + "reason_rejected": "Client relationship is still formal", + }, + ], + reasoning="Professional-warm tone balances urgency with courtesy.", + ) + + # Second LLM call — also auto-captured + draft = client.chat.completions.create( + model="gpt-4o", + messages=[ + { + "role": "system", + "content": "You draft professional emails. Tone: warm but professional.", + }, + { + "role": "user", + "content": f"Draft a polite follow-up email. Context: {intent}", + }, + ], + temperature=0.7, + max_tokens=512, + ) + + email_body = draft.choices[0].message.content + print(f"\nDrafted email:\n{email_body}") + +# 4. Shutdown +agentlens.shutdown() + +print("\nDone! Check AgentLens dashboard for the 'email-drafting-agent' trace.") diff --git a/examples/seed_demo_traces.py b/examples/seed_demo_traces.py new file mode 100644 index 0000000..5643b9c --- /dev/null +++ b/examples/seed_demo_traces.py @@ -0,0 +1,1363 @@ +""" +AgentLens Demo Seed Script — Sends realistic traces directly to the live API. + +This script does NOT use the SDK. It sends raw HTTP POST requests with +fabricated but realistic trace data so the AgentLens dashboard has +compelling demo content. + +Usage: + python seed_demo_traces.py +""" + +import uuid +import random +import httpx +from datetime import datetime, timezone, timedelta + +API_URL = "https://agentlens.vectry.tech/api/traces" +API_KEY = "demo-seed-key" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def uid() -> str: + return str(uuid.uuid4()) + + +def iso(dt: datetime) -> str: + return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z" + + +def now_minus(hours: float = 0, minutes: float = 0) -> datetime: + return datetime.now(timezone.utc) - timedelta(hours=hours, minutes=minutes) + + +def make_span( + name: str, + span_type: str, + parent_id: str | None, + start: datetime, + duration_ms: int, + tokens: int = 0, + cost: float = 0.0, + status: str = "COMPLETED", + input_data: dict | None = None, + output_data: dict | None = None, +) -> dict: + end = start + timedelta(milliseconds=duration_ms) + return { + "id": uid(), + "name": name, + "type": span_type, + "status": status, + "parentSpanId": parent_id, + "startedAt": iso(start), + "endedAt": iso(end), + "durationMs": duration_ms, + "tokenCount": tokens, + "costUsd": cost, + "input": input_data or {}, + "output": output_data or {}, + } + + +def make_decision( + d_type: str, + chosen: dict, + alternatives: list, + reasoning: str, + timestamp: datetime, +) -> dict: + return { + "id": uid(), + "type": d_type, + "chosen": chosen, + "alternatives": alternatives, + "reasoning": reasoning, + "timestamp": iso(timestamp), + } + + +# --------------------------------------------------------------------------- +# Trace Definitions (10 varied traces) +# --------------------------------------------------------------------------- + + +def trace_code_review_agent() -> dict: + """CodeReviewAgent — reviews a pull request.""" + t0 = now_minus(hours=22) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "CodeReviewAgent", + "AGENT", + None, + t0, + 4500, + 0, + 0.0, + input_data={"pr_url": "https://github.com/acme/api/pull/187"}, + ), + make_span( + "fetch_pr_diff", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=50), + 320, + 0, + 0.0, + input_data={"pr_number": 187}, + output_data={"files_changed": 7, "additions": 142, "deletions": 38}, + ), + make_span( + "analyze_code_quality", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=400), + 2800, + 3200, + 0.048, + input_data={"model": "gpt-4o", "diff_lines": 180}, + output_data={"issues_found": 3, "severity": "medium"}, + ), + make_span( + "generate_review_comment", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=3300), + 1100, + 1800, + 0.027, + input_data={"model": "gpt-4o", "issues": 3}, + output_data={"comment_length": 450}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "TOOL_SELECTION", + { + "name": "static_analysis", + "confidence": 0.82, + "params": {"language": "python"}, + }, + [ + { + "name": "lint_only", + "confidence": 0.5, + "reason_rejected": "Need deeper analysis", + } + ], + "Full static analysis catches more than linting alone.", + t0 + timedelta(milliseconds=380), + ), + make_decision( + "PLANNING", + { + "name": "review_by_file", + "confidence": 0.88, + "params": {"strategy": "file-by-file"}, + }, + [ + { + "name": "holistic_review", + "confidence": 0.6, + "reason_rejected": "7 files too many for one pass", + } + ], + "File-by-file review is more thorough for multi-file PRs.", + t0 + timedelta(milliseconds=100), + ), + ] + return { + "id": trace_id, + "name": "CodeReviewAgent", + "status": "COMPLETED", + "tags": ["code-review", "github", "python"], + "metadata": {"pr_number": 187, "repo": "acme/api"}, + "totalCost": 0.075, + "totalTokens": 5000, + "totalDuration": 4500, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=4500)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_data_analysis_agent() -> dict: + """DataAnalysisAgent — analyzes a CSV dataset.""" + t0 = now_minus(hours=18, minutes=30) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "DataAnalysisAgent", + "AGENT", + None, + t0, + 8200, + 0, + 0.0, + input_data={"dataset": "sales_q4_2025.csv", "rows": 45000}, + ), + make_span( + "load_and_profile", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 1200, + 0, + 0.0, + input_data={"file": "sales_q4_2025.csv"}, + output_data={"columns": 12, "null_pct": 2.3}, + ), + make_span( + "generate_sql_queries", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=1400), + 2100, + 2400, + 0.036, + input_data={"model": "gpt-4o-mini", "schema_columns": 12}, + output_data={"queries_generated": 5}, + ), + make_span( + "execute_queries", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=3600), + 2800, + 0, + 0.0, + input_data={"query_count": 5}, + output_data={"results": "aggregated"}, + ), + make_span( + "summarize_insights", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=6500), + 1600, + 3800, + 0.057, + input_data={"model": "gpt-4o", "data_points": 15}, + output_data={"insights": 4, "charts_suggested": 2}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "TOOL_SELECTION", + { + "name": "pandas_profiling", + "confidence": 0.9, + "params": {"dataset_size": "45K rows"}, + }, + [ + { + "name": "manual_stats", + "confidence": 0.4, + "reason_rejected": "Too slow for 45K rows", + } + ], + "Automated profiling is fastest for datasets this size.", + t0 + timedelta(milliseconds=80), + ), + make_decision( + "PLANNING", + { + "name": "top_down_analysis", + "confidence": 0.85, + "params": {"focus": "revenue_trends"}, + }, + [ + { + "name": "exploratory", + "confidence": 0.6, + "reason_rejected": "User asked specifically about revenue", + } + ], + "User's question is revenue-focused, so start there.", + t0 + timedelta(milliseconds=1350), + ), + ] + return { + "id": trace_id, + "name": "DataAnalysisAgent", + "status": "COMPLETED", + "tags": ["data-analysis", "csv", "sql"], + "metadata": {"dataset": "sales_q4_2025.csv"}, + "totalCost": 0.093, + "totalTokens": 6200, + "totalDuration": 8200, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=8200)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_customer_support_bot() -> dict: + """CustomerSupportBot — handles a billing ticket.""" + t0 = now_minus(hours=14, minutes=15) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "CustomerSupportBot", + "AGENT", + None, + t0, + 5800, + 0, + 0.0, + input_data={ + "ticket_id": "TKT-9102", + "subject": "Double charged on invoice", + }, + ), + make_span( + "classify_ticket", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=80), + 800, + 600, + 0.004, + input_data={"model": "gpt-4o-mini"}, + output_data={"category": "billing", "confidence": 0.96}, + ), + make_span( + "lookup_billing_history", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=950), + 450, + 0, + 0.0, + input_data={"customer_id": "cust_8827"}, + output_data={"invoices": 3, "duplicate_found": True}, + ), + make_span( + "route_to_billing_specialist", + "CHAIN", + root_id, + t0 + timedelta(milliseconds=1500), + 200, + 0, + 0.0, + ), + make_span( + "draft_response", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=1800), + 1800, + 2200, + 0.033, + input_data={"model": "gpt-4o", "tone": "empathetic"}, + output_data={"word_count": 180, "includes_refund_link": True}, + ), + make_span( + "send_email", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=3700), + 350, + 0, + 0.0, + input_data={"channel": "email"}, + output_data={"sent": True, "message_id": "msg_abc123"}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "ROUTING", + { + "name": "billing_specialist", + "confidence": 0.96, + "params": {"queue": "billing-l2"}, + }, + [ + { + "name": "general_support", + "confidence": 0.3, + "reason_rejected": "Billing issues need specialized handling", + } + ], + "Double-charge = billing category with high confidence.", + t0 + timedelta(milliseconds=900), + ), + make_decision( + "ESCALATION", + { + "name": "auto_resolve", + "confidence": 0.88, + "params": {"action": "issue_refund"}, + }, + [ + { + "name": "escalate_to_manager", + "confidence": 0.45, + "reason_rejected": "Clear duplicate charge, no need for manager", + } + ], + "Duplicate charge confirmed in billing system — auto-refund is safe.", + t0 + timedelta(milliseconds=1450), + ), + ] + return { + "id": trace_id, + "name": "CustomerSupportBot", + "status": "COMPLETED", + "tags": ["support", "billing", "enterprise"], + "metadata": {"ticket_id": "TKT-9102", "customer_tier": "enterprise"}, + "totalCost": 0.037, + "totalTokens": 2800, + "totalDuration": 5800, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=5800)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_research_assistant() -> dict: + """ResearchAssistant — deep research on a technical topic.""" + t0 = now_minus(hours=10, minutes=45) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "ResearchAssistant", + "AGENT", + None, + t0, + 15200, + 0, + 0.0, + input_data={ + "query": "Compare vector databases for RAG: Pinecone vs Weaviate vs Qdrant" + }, + ), + make_span( + "web_search_pinecone", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 1800, + 0, + 0.0, + output_data={"results": 8}, + ), + make_span( + "web_search_weaviate", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=2000), + 1600, + 0, + 0.0, + output_data={"results": 6}, + ), + make_span( + "web_search_qdrant", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=3700), + 1400, + 0, + 0.0, + output_data={"results": 7}, + ), + make_span( + "synthesize_comparison", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=5200), + 6500, + 8500, + 0.127, + input_data={"model": "gpt-4o", "sources": 21}, + output_data={"sections": 5, "word_count": 1200}, + ), + make_span( + "generate_recommendation", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=11800), + 3200, + 4200, + 0.063, + input_data={ + "model": "gpt-4o", + "criteria": ["cost", "performance", "ease_of_use"], + }, + output_data={"recommendation": "Qdrant", "confidence": 0.78}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "TOOL_SELECTION", + { + "name": "parallel_web_search", + "confidence": 0.92, + "params": {"queries": 3}, + }, + [ + { + "name": "sequential_search", + "confidence": 0.5, + "reason_rejected": "Parallel is faster for independent queries", + } + ], + "Three independent searches — run in parallel.", + t0 + timedelta(milliseconds=60), + ), + make_decision( + "PLANNING", + { + "name": "comparative_matrix", + "confidence": 0.87, + "params": {"dimensions": 6}, + }, + [ + { + "name": "prose_comparison", + "confidence": 0.6, + "reason_rejected": "Matrix format is clearer for 3-way comparison", + } + ], + "Structured comparison matrix gives the clearest output.", + t0 + timedelta(milliseconds=5100), + ), + make_decision( + "MEMORY_RETRIEVAL", + {"name": "cache_research", "confidence": 0.8, "params": {"ttl_hours": 48}}, + [ + { + "name": "no_cache", + "confidence": 0.3, + "reason_rejected": "Research is reusable for similar queries", + } + ], + "Cache this research for 48h to avoid redundant searches.", + t0 + timedelta(milliseconds=15000), + ), + ] + return { + "id": trace_id, + "name": "ResearchAssistant", + "status": "COMPLETED", + "tags": ["research", "vector-db", "comparison"], + "metadata": {"topic": "vector databases for RAG"}, + "totalCost": 0.19, + "totalTokens": 12700, + "totalDuration": 15200, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=15200)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_content_generator() -> dict: + """ContentGenerator — writes a blog post.""" + t0 = now_minus(hours=8, minutes=20) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "ContentGenerator", + "AGENT", + None, + t0, + 12000, + 0, + 0.0, + input_data={"topic": "Building reliable AI agents", "format": "blog"}, + ), + make_span( + "outline_generation", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 2200, + 1800, + 0.027, + input_data={"model": "gpt-4o"}, + output_data={"sections": 5}, + ), + make_span( + "draft_introduction", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=2400), + 2500, + 2200, + 0.033, + ), + make_span( + "draft_body", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=5000), + 4000, + 5500, + 0.082, + input_data={"model": "gpt-4o", "sections": 3}, + ), + make_span( + "draft_conclusion", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=9100), + 1500, + 1200, + 0.018, + ), + make_span( + "seo_optimization", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=10700), + 1100, + 0, + 0.0, + output_data={"keywords_added": 8, "readability_score": 72}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "PLANNING", + {"name": "outline_first", "confidence": 0.94, "params": {"sections": 5}}, + [ + { + "name": "freeform_write", + "confidence": 0.3, + "reason_rejected": "Blog needs structure", + } + ], + "Outline-first produces more coherent long-form content.", + t0 + timedelta(milliseconds=50), + ), + make_decision( + "TOOL_SELECTION", + { + "name": "seo_optimizer", + "confidence": 0.78, + "params": {"target_keywords": ["AI agents", "reliability"]}, + }, + [ + { + "name": "skip_seo", + "confidence": 0.4, + "reason_rejected": "Blog is for marketing — SEO matters", + } + ], + "Marketing blog needs SEO optimization before publishing.", + t0 + timedelta(milliseconds=10600), + ), + ] + return { + "id": trace_id, + "name": "ContentGenerator", + "status": "COMPLETED", + "tags": ["content", "blog", "marketing"], + "metadata": {"word_count": 1800, "topic": "AI agents"}, + "totalCost": 0.16, + "totalTokens": 10700, + "totalDuration": 12000, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=12000)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_sql_query_agent() -> dict: + """SQLQueryAgent — natural language to SQL.""" + t0 = now_minus(hours=6, minutes=10) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "SQLQueryAgent", + "AGENT", + None, + t0, + 3200, + 0, + 0.0, + input_data={ + "question": "What were top 10 products by revenue last quarter?" + }, + ), + make_span( + "schema_lookup", + "MEMORY_OP", + root_id, + t0 + timedelta(milliseconds=50), + 180, + 0, + 0.0, + output_data={ + "tables": ["products", "orders", "order_items"], + "cached": True, + }, + ), + make_span( + "generate_sql", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=250), + 1200, + 1100, + 0.008, + input_data={"model": "gpt-4o-mini"}, + output_data={"sql": "SELECT p.name, SUM(oi.revenue)..."}, + ), + make_span( + "validate_sql", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=1500), + 300, + 0, + 0.0, + output_data={"valid": True, "estimated_rows": 10}, + ), + make_span( + "execute_query", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=1850), + 800, + 0, + 0.0, + output_data={"rows_returned": 10, "execution_time_ms": 340}, + ), + make_span( + "format_response", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=2700), + 450, + 800, + 0.006, + input_data={"model": "gpt-4o-mini"}, + output_data={"format": "markdown_table"}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "MEMORY_RETRIEVAL", + { + "name": "cached_schema", + "confidence": 0.95, + "params": {"db": "analytics_prod"}, + }, + [ + { + "name": "introspect_live", + "confidence": 0.7, + "reason_rejected": "Schema cached 2 hours ago, still valid", + } + ], + "Schema was cached recently and hasn't changed.", + t0 + timedelta(milliseconds=30), + ), + make_decision( + "TOOL_SELECTION", + { + "name": "sql_validator", + "confidence": 0.9, + "params": {"dialect": "postgresql"}, + }, + [ + { + "name": "execute_directly", + "confidence": 0.4, + "reason_rejected": "Always validate before executing", + } + ], + "Validate SQL to prevent syntax errors or dangerous queries.", + t0 + timedelta(milliseconds=1480), + ), + ] + return { + "id": trace_id, + "name": "SQLQueryAgent", + "status": "COMPLETED", + "tags": ["sql", "analytics", "natural-language"], + "metadata": {"database": "analytics_prod", "dialect": "postgresql"}, + "totalCost": 0.014, + "totalTokens": 1900, + "totalDuration": 3200, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=3200)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_debug_agent_error() -> dict: + """DebugAgent — fails while debugging a production issue (ERROR trace).""" + t0 = now_minus(hours=4, minutes=50) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "DebugAgent", + "AGENT", + None, + t0, + 7500, + 0, + 0.0, + status="ERROR", + input_data={ + "error": "OOMKilled in pod analytics-worker-7b9f", + "cluster": "prod-us-east", + }, + ), + make_span( + "fetch_pod_logs", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 2200, + 0, + 0.0, + output_data={"log_lines": 450, "oom_events": 3}, + ), + make_span( + "analyze_memory_usage", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=2400), + 2800, + 3500, + 0.052, + input_data={"model": "gpt-4o", "log_lines": 450}, + output_data={"root_cause": "memory leak in batch processor"}, + ), + make_span( + "fetch_metrics_grafana", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=5300), + 2200, + 0, + 0.0, + status="ERROR", + input_data={"dashboard": "pod-resources", "timerange": "6h"}, + output_data={"error": "Grafana API timeout after 2000ms"}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "TOOL_SELECTION", + {"name": "fetch_pod_logs", "confidence": 0.92, "params": {"tail": 500}}, + [ + { + "name": "describe_pod", + "confidence": 0.6, + "reason_rejected": "Logs give more detail than describe", + } + ], + "Pod logs are the best starting point for OOM investigation.", + t0 + timedelta(milliseconds=70), + ), + make_decision( + "RETRY", + { + "name": "retry_grafana_with_longer_timeout", + "confidence": 0.7, + "params": {"timeout_ms": 5000}, + }, + [ + { + "name": "skip_metrics", + "confidence": 0.5, + "reason_rejected": "Metrics are critical for memory analysis", + } + ], + "Grafana timed out — retry with extended timeout.", + t0 + timedelta(milliseconds=7400), + ), + ] + events = [ + { + "id": uid(), + "type": "ERROR", + "name": "Grafana API timeout", + "timestamp": iso(t0 + timedelta(milliseconds=7500)), + "metadata": {"service": "grafana", "timeout_ms": 2000}, + }, + { + "id": uid(), + "type": "RETRY", + "name": "Retrying Grafana fetch", + "timestamp": iso(t0 + timedelta(milliseconds=7500)), + }, + ] + return { + "id": trace_id, + "name": "DebugAgent", + "status": "ERROR", + "tags": ["debugging", "kubernetes", "production"], + "metadata": {"cluster": "prod-us-east", "error_type": "OOMKilled"}, + "totalCost": 0.052, + "totalTokens": 3500, + "totalDuration": 7500, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=7500)), + "spans": spans, + "decisionPoints": decisions, + "events": events, + } + + +def trace_translation_agent() -> dict: + """TranslationAgent — translates documentation to multiple languages.""" + t0 = now_minus(hours=3, minutes=30) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "TranslationAgent", + "AGENT", + None, + t0, + 18500, + 0, + 0.0, + input_data={ + "source_lang": "en", + "target_langs": ["es", "ja", "de"], + "doc": "API guide", + }, + ), + make_span( + "extract_translatable_strings", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 800, + 0, + 0.0, + output_data={"strings": 47, "words": 3200}, + ), + make_span( + "translate_to_spanish", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=1000), + 4500, + 4800, + 0.072, + input_data={"model": "gpt-4o", "target": "es"}, + output_data={"translated_strings": 47}, + ), + make_span( + "translate_to_japanese", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=5600), + 5200, + 5100, + 0.076, + input_data={"model": "gpt-4o", "target": "ja"}, + output_data={"translated_strings": 47}, + ), + make_span( + "translate_to_german", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=10900), + 4800, + 4600, + 0.069, + input_data={"model": "gpt-4o", "target": "de"}, + output_data={"translated_strings": 47}, + ), + make_span( + "quality_check", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=15800), + 2500, + 2000, + 0.03, + input_data={"model": "gpt-4o", "check_type": "consistency"}, + output_data={"issues_found": 2, "auto_fixed": 2}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "PLANNING", + { + "name": "sequential_translation", + "confidence": 0.82, + "params": {"order": ["es", "ja", "de"]}, + }, + [ + { + "name": "parallel_translation", + "confidence": 0.7, + "reason_rejected": "Sequential allows cross-referencing for consistency", + } + ], + "Translate sequentially so each builds on context from prior.", + t0 + timedelta(milliseconds=50), + ), + make_decision( + "TOOL_SELECTION", + { + "name": "gpt-4o", + "confidence": 0.91, + "params": {"reason": "best quality for ja/de"}, + }, + [ + { + "name": "gpt-4o-mini", + "confidence": 0.6, + "reason_rejected": "Japanese and German need higher quality model", + } + ], + "GPT-4o produces significantly better CJK translations.", + t0 + timedelta(milliseconds=950), + ), + ] + return { + "id": trace_id, + "name": "TranslationAgent", + "status": "COMPLETED", + "tags": ["translation", "i18n", "documentation"], + "metadata": {"languages": ["es", "ja", "de"], "document": "API guide"}, + "totalCost": 0.247, + "totalTokens": 16500, + "totalDuration": 18500, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=18500)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_debug_agent_running() -> dict: + """DebugAgent (RUNNING) — currently investigating an issue.""" + t0 = now_minus(minutes=12) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "DebugAgent", + "AGENT", + None, + t0, + 0, + 0, + 0.0, + status="RUNNING", + input_data={ + "issue": "Latency spike in /api/search endpoint", + "p99_ms": 4500, + }, + ), + make_span( + "fetch_recent_deploys", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=100), + 600, + 0, + 0.0, + output_data={"deploys_24h": 3, "latest": "v2.14.1"}, + ), + make_span( + "analyze_latency_traces", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=800), + 3500, + 2800, + 0.042, + input_data={"model": "gpt-4o", "trace_count": 50}, + output_data={"bottleneck": "elasticsearch query"}, + ), + ] + spans[0]["id"] = root_id + # No endedAt for RUNNING trace + decisions = [ + make_decision( + "TOOL_SELECTION", + { + "name": "query_datadog", + "confidence": 0.88, + "params": {"metric": "p99_latency", "window": "1h"}, + }, + [ + { + "name": "check_logs", + "confidence": 0.65, + "reason_rejected": "Metrics give faster overview than logs", + } + ], + "Start with metrics to identify the time window of regression.", + t0 + timedelta(milliseconds=50), + ), + ] + return { + "id": trace_id, + "name": "DebugAgent", + "status": "RUNNING", + "tags": ["debugging", "latency", "production"], + "metadata": {"endpoint": "/api/search", "p99_ms": 4500}, + "totalCost": 0.042, + "totalTokens": 2800, + "totalDuration": 4200, + "startedAt": iso(t0), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +def trace_customer_support_error() -> dict: + """CustomerSupportBot — fails on a malformed ticket (ERROR).""" + t0 = now_minus(hours=1, minutes=40) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "CustomerSupportBot", + "AGENT", + None, + t0, + 1200, + 0, + 0.0, + status="ERROR", + input_data={"ticket_id": "TKT-9201", "subject": ""}, + ), + make_span( + "classify_ticket", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=80), + 900, + 400, + 0.003, + status="ERROR", + input_data={"model": "gpt-4o-mini", "subject": ""}, + output_data={"error": "Cannot classify empty subject"}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "ROUTING", + { + "name": "reject_ticket", + "confidence": 0.7, + "params": {"reason": "empty_subject"}, + }, + [ + { + "name": "classify_anyway", + "confidence": 0.4, + "reason_rejected": "No content to classify", + } + ], + "Ticket has empty subject — cannot classify.", + t0 + timedelta(milliseconds=1000), + ), + ] + events = [ + { + "id": uid(), + "type": "ERROR", + "name": "Empty ticket subject", + "timestamp": iso(t0 + timedelta(milliseconds=1100)), + "metadata": {"ticket_id": "TKT-9201"}, + }, + ] + return { + "id": trace_id, + "name": "CustomerSupportBot", + "status": "ERROR", + "tags": ["support", "error", "validation"], + "metadata": {"ticket_id": "TKT-9201", "error": "empty_subject"}, + "totalCost": 0.003, + "totalTokens": 400, + "totalDuration": 1200, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=1200)), + "spans": spans, + "decisionPoints": decisions, + "events": events, + } + + +def trace_code_review_fast() -> dict: + """CodeReviewAgent — fast review of a small PR.""" + t0 = now_minus(minutes=35) + trace_id = uid() + root_id = uid() + spans = [ + make_span( + "CodeReviewAgent", + "AGENT", + None, + t0, + 1800, + 0, + 0.0, + input_data={"pr_url": "https://github.com/acme/web/pull/42", "files": 2}, + ), + make_span( + "fetch_pr_diff", + "TOOL_CALL", + root_id, + t0 + timedelta(milliseconds=50), + 250, + 0, + 0.0, + output_data={"additions": 18, "deletions": 5}, + ), + make_span( + "quick_review", + "LLM_CALL", + root_id, + t0 + timedelta(milliseconds=350), + 1300, + 900, + 0.007, + input_data={"model": "gpt-4o-mini"}, + output_data={"verdict": "LGTM", "issues": 0}, + ), + ] + spans[0]["id"] = root_id + decisions = [ + make_decision( + "PLANNING", + { + "name": "quick_review", + "confidence": 0.95, + "params": {"depth": "shallow"}, + }, + [ + { + "name": "deep_review", + "confidence": 0.3, + "reason_rejected": "Only 2 files, 23 lines changed", + } + ], + "Small PR — quick review is sufficient.", + t0 + timedelta(milliseconds=300), + ), + ] + return { + "id": trace_id, + "name": "CodeReviewAgent", + "status": "COMPLETED", + "tags": ["code-review", "github", "quick"], + "metadata": {"pr_number": 42, "repo": "acme/web", "verdict": "LGTM"}, + "totalCost": 0.007, + "totalTokens": 900, + "totalDuration": 1800, + "startedAt": iso(t0), + "endedAt": iso(t0 + timedelta(milliseconds=1800)), + "spans": spans, + "decisionPoints": decisions, + "events": [], + } + + +# --------------------------------------------------------------------------- +# Main: send all traces +# --------------------------------------------------------------------------- + + +def main(): + print("AgentLens Demo Seed Script") + print(f"Target: {API_URL}") + print("=" * 50) + + # Build all traces + builders = [ + ("CodeReviewAgent (detailed)", trace_code_review_agent), + ("DataAnalysisAgent", trace_data_analysis_agent), + ("CustomerSupportBot (billing)", trace_customer_support_bot), + ("ResearchAssistant", trace_research_assistant), + ("ContentGenerator", trace_content_generator), + ("SQLQueryAgent", trace_sql_query_agent), + ("DebugAgent (ERROR)", trace_debug_agent_error), + ("TranslationAgent", trace_translation_agent), + ("DebugAgent (RUNNING)", trace_debug_agent_running), + ("CustomerSupportBot (ERROR)", trace_customer_support_error), + ("CodeReviewAgent (fast)", trace_code_review_fast), + ] + + traces = [] + for label, builder in builders: + trace = builder() + traces.append(trace) + status_emoji = {"COMPLETED": "+", "ERROR": "!", "RUNNING": "~"} + s = trace["status"] + marker = status_emoji.get(s, "?") + print( + f" [{marker}] {label:40s} status={s:10s} cost=${trace['totalCost']:.3f} tokens={trace['totalTokens']:>6} duration={trace['totalDuration']:>6}ms" + ) + + print(f"\nTotal traces: {len(traces)}") + print(f"Sending to {API_URL} ...") + + # Send via httpx + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", + } + + with httpx.Client(timeout=30.0) as client: + response = client.post( + API_URL, + json={"traces": traces}, + headers=headers, + ) + + print(f"\nResponse: {response.status_code}") + if response.status_code < 300: + print("SUCCESS — traces sent!") + try: + body = response.json() + print(f"Response body: {body}") + except Exception: + print(f"Response text: {response.text[:500]}") + else: + print(f"FAILED — {response.status_code}") + print(f"Response: {response.text[:1000]}") + + +if __name__ == "__main__": + main()