1364 lines
39 KiB
Python
1364 lines
39 KiB
Python
"""
|
|
AgentLens Demo Seed Script — Sends realistic traces directly to the live API.
|
|
|
|
This script does NOT use the SDK. It sends raw HTTP POST requests with
|
|
fabricated but realistic trace data so the AgentLens dashboard has
|
|
compelling demo content.
|
|
|
|
Usage:
|
|
python seed_demo_traces.py
|
|
"""
|
|
|
|
import uuid
|
|
import random
|
|
import httpx
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
API_URL = "https://agentlens.vectry.tech/api/traces"
|
|
API_KEY = "demo-seed-key"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def uid() -> str:
|
|
return str(uuid.uuid4())
|
|
|
|
|
|
def iso(dt: datetime) -> str:
|
|
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
|
|
|
|
|
|
def now_minus(hours: float = 0, minutes: float = 0) -> datetime:
|
|
return datetime.now(timezone.utc) - timedelta(hours=hours, minutes=minutes)
|
|
|
|
|
|
def make_span(
|
|
name: str,
|
|
span_type: str,
|
|
parent_id: str | None,
|
|
start: datetime,
|
|
duration_ms: int,
|
|
tokens: int = 0,
|
|
cost: float = 0.0,
|
|
status: str = "COMPLETED",
|
|
input_data: dict | None = None,
|
|
output_data: dict | None = None,
|
|
) -> dict:
|
|
end = start + timedelta(milliseconds=duration_ms)
|
|
return {
|
|
"id": uid(),
|
|
"name": name,
|
|
"type": span_type,
|
|
"status": status,
|
|
"parentSpanId": parent_id,
|
|
"startedAt": iso(start),
|
|
"endedAt": iso(end),
|
|
"durationMs": duration_ms,
|
|
"tokenCount": tokens,
|
|
"costUsd": cost,
|
|
"input": input_data or {},
|
|
"output": output_data or {},
|
|
}
|
|
|
|
|
|
def make_decision(
|
|
d_type: str,
|
|
chosen: dict,
|
|
alternatives: list,
|
|
reasoning: str,
|
|
timestamp: datetime,
|
|
) -> dict:
|
|
return {
|
|
"id": uid(),
|
|
"type": d_type,
|
|
"chosen": chosen,
|
|
"alternatives": alternatives,
|
|
"reasoning": reasoning,
|
|
"timestamp": iso(timestamp),
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Trace Definitions (10 varied traces)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def trace_code_review_agent() -> dict:
|
|
"""CodeReviewAgent — reviews a pull request."""
|
|
t0 = now_minus(hours=22)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"CodeReviewAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
4500,
|
|
0,
|
|
0.0,
|
|
input_data={"pr_url": "https://github.com/acme/api/pull/187"},
|
|
),
|
|
make_span(
|
|
"fetch_pr_diff",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=50),
|
|
320,
|
|
0,
|
|
0.0,
|
|
input_data={"pr_number": 187},
|
|
output_data={"files_changed": 7, "additions": 142, "deletions": 38},
|
|
),
|
|
make_span(
|
|
"analyze_code_quality",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=400),
|
|
2800,
|
|
3200,
|
|
0.048,
|
|
input_data={"model": "gpt-4o", "diff_lines": 180},
|
|
output_data={"issues_found": 3, "severity": "medium"},
|
|
),
|
|
make_span(
|
|
"generate_review_comment",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=3300),
|
|
1100,
|
|
1800,
|
|
0.027,
|
|
input_data={"model": "gpt-4o", "issues": 3},
|
|
output_data={"comment_length": 450},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "static_analysis",
|
|
"confidence": 0.82,
|
|
"params": {"language": "python"},
|
|
},
|
|
[
|
|
{
|
|
"name": "lint_only",
|
|
"confidence": 0.5,
|
|
"reason_rejected": "Need deeper analysis",
|
|
}
|
|
],
|
|
"Full static analysis catches more than linting alone.",
|
|
t0 + timedelta(milliseconds=380),
|
|
),
|
|
make_decision(
|
|
"PLANNING",
|
|
{
|
|
"name": "review_by_file",
|
|
"confidence": 0.88,
|
|
"params": {"strategy": "file-by-file"},
|
|
},
|
|
[
|
|
{
|
|
"name": "holistic_review",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "7 files too many for one pass",
|
|
}
|
|
],
|
|
"File-by-file review is more thorough for multi-file PRs.",
|
|
t0 + timedelta(milliseconds=100),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "CodeReviewAgent",
|
|
"status": "COMPLETED",
|
|
"tags": ["code-review", "github", "python"],
|
|
"metadata": {"pr_number": 187, "repo": "acme/api"},
|
|
"totalCost": 0.075,
|
|
"totalTokens": 5000,
|
|
"totalDuration": 4500,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=4500)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_data_analysis_agent() -> dict:
|
|
"""DataAnalysisAgent — analyzes a CSV dataset."""
|
|
t0 = now_minus(hours=18, minutes=30)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"DataAnalysisAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
8200,
|
|
0,
|
|
0.0,
|
|
input_data={"dataset": "sales_q4_2025.csv", "rows": 45000},
|
|
),
|
|
make_span(
|
|
"load_and_profile",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
1200,
|
|
0,
|
|
0.0,
|
|
input_data={"file": "sales_q4_2025.csv"},
|
|
output_data={"columns": 12, "null_pct": 2.3},
|
|
),
|
|
make_span(
|
|
"generate_sql_queries",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1400),
|
|
2100,
|
|
2400,
|
|
0.036,
|
|
input_data={"model": "gpt-4o-mini", "schema_columns": 12},
|
|
output_data={"queries_generated": 5},
|
|
),
|
|
make_span(
|
|
"execute_queries",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=3600),
|
|
2800,
|
|
0,
|
|
0.0,
|
|
input_data={"query_count": 5},
|
|
output_data={"results": "aggregated"},
|
|
),
|
|
make_span(
|
|
"summarize_insights",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=6500),
|
|
1600,
|
|
3800,
|
|
0.057,
|
|
input_data={"model": "gpt-4o", "data_points": 15},
|
|
output_data={"insights": 4, "charts_suggested": 2},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "pandas_profiling",
|
|
"confidence": 0.9,
|
|
"params": {"dataset_size": "45K rows"},
|
|
},
|
|
[
|
|
{
|
|
"name": "manual_stats",
|
|
"confidence": 0.4,
|
|
"reason_rejected": "Too slow for 45K rows",
|
|
}
|
|
],
|
|
"Automated profiling is fastest for datasets this size.",
|
|
t0 + timedelta(milliseconds=80),
|
|
),
|
|
make_decision(
|
|
"PLANNING",
|
|
{
|
|
"name": "top_down_analysis",
|
|
"confidence": 0.85,
|
|
"params": {"focus": "revenue_trends"},
|
|
},
|
|
[
|
|
{
|
|
"name": "exploratory",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "User asked specifically about revenue",
|
|
}
|
|
],
|
|
"User's question is revenue-focused, so start there.",
|
|
t0 + timedelta(milliseconds=1350),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "DataAnalysisAgent",
|
|
"status": "COMPLETED",
|
|
"tags": ["data-analysis", "csv", "sql"],
|
|
"metadata": {"dataset": "sales_q4_2025.csv"},
|
|
"totalCost": 0.093,
|
|
"totalTokens": 6200,
|
|
"totalDuration": 8200,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=8200)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_customer_support_bot() -> dict:
|
|
"""CustomerSupportBot — handles a billing ticket."""
|
|
t0 = now_minus(hours=14, minutes=15)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"CustomerSupportBot",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
5800,
|
|
0,
|
|
0.0,
|
|
input_data={
|
|
"ticket_id": "TKT-9102",
|
|
"subject": "Double charged on invoice",
|
|
},
|
|
),
|
|
make_span(
|
|
"classify_ticket",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=80),
|
|
800,
|
|
600,
|
|
0.004,
|
|
input_data={"model": "gpt-4o-mini"},
|
|
output_data={"category": "billing", "confidence": 0.96},
|
|
),
|
|
make_span(
|
|
"lookup_billing_history",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=950),
|
|
450,
|
|
0,
|
|
0.0,
|
|
input_data={"customer_id": "cust_8827"},
|
|
output_data={"invoices": 3, "duplicate_found": True},
|
|
),
|
|
make_span(
|
|
"route_to_billing_specialist",
|
|
"CHAIN",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1500),
|
|
200,
|
|
0,
|
|
0.0,
|
|
),
|
|
make_span(
|
|
"draft_response",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1800),
|
|
1800,
|
|
2200,
|
|
0.033,
|
|
input_data={"model": "gpt-4o", "tone": "empathetic"},
|
|
output_data={"word_count": 180, "includes_refund_link": True},
|
|
),
|
|
make_span(
|
|
"send_email",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=3700),
|
|
350,
|
|
0,
|
|
0.0,
|
|
input_data={"channel": "email"},
|
|
output_data={"sent": True, "message_id": "msg_abc123"},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"ROUTING",
|
|
{
|
|
"name": "billing_specialist",
|
|
"confidence": 0.96,
|
|
"params": {"queue": "billing-l2"},
|
|
},
|
|
[
|
|
{
|
|
"name": "general_support",
|
|
"confidence": 0.3,
|
|
"reason_rejected": "Billing issues need specialized handling",
|
|
}
|
|
],
|
|
"Double-charge = billing category with high confidence.",
|
|
t0 + timedelta(milliseconds=900),
|
|
),
|
|
make_decision(
|
|
"ESCALATION",
|
|
{
|
|
"name": "auto_resolve",
|
|
"confidence": 0.88,
|
|
"params": {"action": "issue_refund"},
|
|
},
|
|
[
|
|
{
|
|
"name": "escalate_to_manager",
|
|
"confidence": 0.45,
|
|
"reason_rejected": "Clear duplicate charge, no need for manager",
|
|
}
|
|
],
|
|
"Duplicate charge confirmed in billing system — auto-refund is safe.",
|
|
t0 + timedelta(milliseconds=1450),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "CustomerSupportBot",
|
|
"status": "COMPLETED",
|
|
"tags": ["support", "billing", "enterprise"],
|
|
"metadata": {"ticket_id": "TKT-9102", "customer_tier": "enterprise"},
|
|
"totalCost": 0.037,
|
|
"totalTokens": 2800,
|
|
"totalDuration": 5800,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=5800)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_research_assistant() -> dict:
|
|
"""ResearchAssistant — deep research on a technical topic."""
|
|
t0 = now_minus(hours=10, minutes=45)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"ResearchAssistant",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
15200,
|
|
0,
|
|
0.0,
|
|
input_data={
|
|
"query": "Compare vector databases for RAG: Pinecone vs Weaviate vs Qdrant"
|
|
},
|
|
),
|
|
make_span(
|
|
"web_search_pinecone",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
1800,
|
|
0,
|
|
0.0,
|
|
output_data={"results": 8},
|
|
),
|
|
make_span(
|
|
"web_search_weaviate",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=2000),
|
|
1600,
|
|
0,
|
|
0.0,
|
|
output_data={"results": 6},
|
|
),
|
|
make_span(
|
|
"web_search_qdrant",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=3700),
|
|
1400,
|
|
0,
|
|
0.0,
|
|
output_data={"results": 7},
|
|
),
|
|
make_span(
|
|
"synthesize_comparison",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=5200),
|
|
6500,
|
|
8500,
|
|
0.127,
|
|
input_data={"model": "gpt-4o", "sources": 21},
|
|
output_data={"sections": 5, "word_count": 1200},
|
|
),
|
|
make_span(
|
|
"generate_recommendation",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=11800),
|
|
3200,
|
|
4200,
|
|
0.063,
|
|
input_data={
|
|
"model": "gpt-4o",
|
|
"criteria": ["cost", "performance", "ease_of_use"],
|
|
},
|
|
output_data={"recommendation": "Qdrant", "confidence": 0.78},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "parallel_web_search",
|
|
"confidence": 0.92,
|
|
"params": {"queries": 3},
|
|
},
|
|
[
|
|
{
|
|
"name": "sequential_search",
|
|
"confidence": 0.5,
|
|
"reason_rejected": "Parallel is faster for independent queries",
|
|
}
|
|
],
|
|
"Three independent searches — run in parallel.",
|
|
t0 + timedelta(milliseconds=60),
|
|
),
|
|
make_decision(
|
|
"PLANNING",
|
|
{
|
|
"name": "comparative_matrix",
|
|
"confidence": 0.87,
|
|
"params": {"dimensions": 6},
|
|
},
|
|
[
|
|
{
|
|
"name": "prose_comparison",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "Matrix format is clearer for 3-way comparison",
|
|
}
|
|
],
|
|
"Structured comparison matrix gives the clearest output.",
|
|
t0 + timedelta(milliseconds=5100),
|
|
),
|
|
make_decision(
|
|
"MEMORY_RETRIEVAL",
|
|
{"name": "cache_research", "confidence": 0.8, "params": {"ttl_hours": 48}},
|
|
[
|
|
{
|
|
"name": "no_cache",
|
|
"confidence": 0.3,
|
|
"reason_rejected": "Research is reusable for similar queries",
|
|
}
|
|
],
|
|
"Cache this research for 48h to avoid redundant searches.",
|
|
t0 + timedelta(milliseconds=15000),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "ResearchAssistant",
|
|
"status": "COMPLETED",
|
|
"tags": ["research", "vector-db", "comparison"],
|
|
"metadata": {"topic": "vector databases for RAG"},
|
|
"totalCost": 0.19,
|
|
"totalTokens": 12700,
|
|
"totalDuration": 15200,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=15200)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_content_generator() -> dict:
|
|
"""ContentGenerator — writes a blog post."""
|
|
t0 = now_minus(hours=8, minutes=20)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"ContentGenerator",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
12000,
|
|
0,
|
|
0.0,
|
|
input_data={"topic": "Building reliable AI agents", "format": "blog"},
|
|
),
|
|
make_span(
|
|
"outline_generation",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
2200,
|
|
1800,
|
|
0.027,
|
|
input_data={"model": "gpt-4o"},
|
|
output_data={"sections": 5},
|
|
),
|
|
make_span(
|
|
"draft_introduction",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=2400),
|
|
2500,
|
|
2200,
|
|
0.033,
|
|
),
|
|
make_span(
|
|
"draft_body",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=5000),
|
|
4000,
|
|
5500,
|
|
0.082,
|
|
input_data={"model": "gpt-4o", "sections": 3},
|
|
),
|
|
make_span(
|
|
"draft_conclusion",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=9100),
|
|
1500,
|
|
1200,
|
|
0.018,
|
|
),
|
|
make_span(
|
|
"seo_optimization",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=10700),
|
|
1100,
|
|
0,
|
|
0.0,
|
|
output_data={"keywords_added": 8, "readability_score": 72},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"PLANNING",
|
|
{"name": "outline_first", "confidence": 0.94, "params": {"sections": 5}},
|
|
[
|
|
{
|
|
"name": "freeform_write",
|
|
"confidence": 0.3,
|
|
"reason_rejected": "Blog needs structure",
|
|
}
|
|
],
|
|
"Outline-first produces more coherent long-form content.",
|
|
t0 + timedelta(milliseconds=50),
|
|
),
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "seo_optimizer",
|
|
"confidence": 0.78,
|
|
"params": {"target_keywords": ["AI agents", "reliability"]},
|
|
},
|
|
[
|
|
{
|
|
"name": "skip_seo",
|
|
"confidence": 0.4,
|
|
"reason_rejected": "Blog is for marketing — SEO matters",
|
|
}
|
|
],
|
|
"Marketing blog needs SEO optimization before publishing.",
|
|
t0 + timedelta(milliseconds=10600),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "ContentGenerator",
|
|
"status": "COMPLETED",
|
|
"tags": ["content", "blog", "marketing"],
|
|
"metadata": {"word_count": 1800, "topic": "AI agents"},
|
|
"totalCost": 0.16,
|
|
"totalTokens": 10700,
|
|
"totalDuration": 12000,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=12000)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_sql_query_agent() -> dict:
|
|
"""SQLQueryAgent — natural language to SQL."""
|
|
t0 = now_minus(hours=6, minutes=10)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"SQLQueryAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
3200,
|
|
0,
|
|
0.0,
|
|
input_data={
|
|
"question": "What were top 10 products by revenue last quarter?"
|
|
},
|
|
),
|
|
make_span(
|
|
"schema_lookup",
|
|
"MEMORY_OP",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=50),
|
|
180,
|
|
0,
|
|
0.0,
|
|
output_data={
|
|
"tables": ["products", "orders", "order_items"],
|
|
"cached": True,
|
|
},
|
|
),
|
|
make_span(
|
|
"generate_sql",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=250),
|
|
1200,
|
|
1100,
|
|
0.008,
|
|
input_data={"model": "gpt-4o-mini"},
|
|
output_data={"sql": "SELECT p.name, SUM(oi.revenue)..."},
|
|
),
|
|
make_span(
|
|
"validate_sql",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1500),
|
|
300,
|
|
0,
|
|
0.0,
|
|
output_data={"valid": True, "estimated_rows": 10},
|
|
),
|
|
make_span(
|
|
"execute_query",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1850),
|
|
800,
|
|
0,
|
|
0.0,
|
|
output_data={"rows_returned": 10, "execution_time_ms": 340},
|
|
),
|
|
make_span(
|
|
"format_response",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=2700),
|
|
450,
|
|
800,
|
|
0.006,
|
|
input_data={"model": "gpt-4o-mini"},
|
|
output_data={"format": "markdown_table"},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"MEMORY_RETRIEVAL",
|
|
{
|
|
"name": "cached_schema",
|
|
"confidence": 0.95,
|
|
"params": {"db": "analytics_prod"},
|
|
},
|
|
[
|
|
{
|
|
"name": "introspect_live",
|
|
"confidence": 0.7,
|
|
"reason_rejected": "Schema cached 2 hours ago, still valid",
|
|
}
|
|
],
|
|
"Schema was cached recently and hasn't changed.",
|
|
t0 + timedelta(milliseconds=30),
|
|
),
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "sql_validator",
|
|
"confidence": 0.9,
|
|
"params": {"dialect": "postgresql"},
|
|
},
|
|
[
|
|
{
|
|
"name": "execute_directly",
|
|
"confidence": 0.4,
|
|
"reason_rejected": "Always validate before executing",
|
|
}
|
|
],
|
|
"Validate SQL to prevent syntax errors or dangerous queries.",
|
|
t0 + timedelta(milliseconds=1480),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "SQLQueryAgent",
|
|
"status": "COMPLETED",
|
|
"tags": ["sql", "analytics", "natural-language"],
|
|
"metadata": {"database": "analytics_prod", "dialect": "postgresql"},
|
|
"totalCost": 0.014,
|
|
"totalTokens": 1900,
|
|
"totalDuration": 3200,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=3200)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_debug_agent_error() -> dict:
|
|
"""DebugAgent — fails while debugging a production issue (ERROR trace)."""
|
|
t0 = now_minus(hours=4, minutes=50)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"DebugAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
7500,
|
|
0,
|
|
0.0,
|
|
status="ERROR",
|
|
input_data={
|
|
"error": "OOMKilled in pod analytics-worker-7b9f",
|
|
"cluster": "prod-us-east",
|
|
},
|
|
),
|
|
make_span(
|
|
"fetch_pod_logs",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
2200,
|
|
0,
|
|
0.0,
|
|
output_data={"log_lines": 450, "oom_events": 3},
|
|
),
|
|
make_span(
|
|
"analyze_memory_usage",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=2400),
|
|
2800,
|
|
3500,
|
|
0.052,
|
|
input_data={"model": "gpt-4o", "log_lines": 450},
|
|
output_data={"root_cause": "memory leak in batch processor"},
|
|
),
|
|
make_span(
|
|
"fetch_metrics_grafana",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=5300),
|
|
2200,
|
|
0,
|
|
0.0,
|
|
status="ERROR",
|
|
input_data={"dashboard": "pod-resources", "timerange": "6h"},
|
|
output_data={"error": "Grafana API timeout after 2000ms"},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{"name": "fetch_pod_logs", "confidence": 0.92, "params": {"tail": 500}},
|
|
[
|
|
{
|
|
"name": "describe_pod",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "Logs give more detail than describe",
|
|
}
|
|
],
|
|
"Pod logs are the best starting point for OOM investigation.",
|
|
t0 + timedelta(milliseconds=70),
|
|
),
|
|
make_decision(
|
|
"RETRY",
|
|
{
|
|
"name": "retry_grafana_with_longer_timeout",
|
|
"confidence": 0.7,
|
|
"params": {"timeout_ms": 5000},
|
|
},
|
|
[
|
|
{
|
|
"name": "skip_metrics",
|
|
"confidence": 0.5,
|
|
"reason_rejected": "Metrics are critical for memory analysis",
|
|
}
|
|
],
|
|
"Grafana timed out — retry with extended timeout.",
|
|
t0 + timedelta(milliseconds=7400),
|
|
),
|
|
]
|
|
events = [
|
|
{
|
|
"id": uid(),
|
|
"type": "ERROR",
|
|
"name": "Grafana API timeout",
|
|
"timestamp": iso(t0 + timedelta(milliseconds=7500)),
|
|
"metadata": {"service": "grafana", "timeout_ms": 2000},
|
|
},
|
|
{
|
|
"id": uid(),
|
|
"type": "RETRY",
|
|
"name": "Retrying Grafana fetch",
|
|
"timestamp": iso(t0 + timedelta(milliseconds=7500)),
|
|
},
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "DebugAgent",
|
|
"status": "ERROR",
|
|
"tags": ["debugging", "kubernetes", "production"],
|
|
"metadata": {"cluster": "prod-us-east", "error_type": "OOMKilled"},
|
|
"totalCost": 0.052,
|
|
"totalTokens": 3500,
|
|
"totalDuration": 7500,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=7500)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": events,
|
|
}
|
|
|
|
|
|
def trace_translation_agent() -> dict:
|
|
"""TranslationAgent — translates documentation to multiple languages."""
|
|
t0 = now_minus(hours=3, minutes=30)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"TranslationAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
18500,
|
|
0,
|
|
0.0,
|
|
input_data={
|
|
"source_lang": "en",
|
|
"target_langs": ["es", "ja", "de"],
|
|
"doc": "API guide",
|
|
},
|
|
),
|
|
make_span(
|
|
"extract_translatable_strings",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
800,
|
|
0,
|
|
0.0,
|
|
output_data={"strings": 47, "words": 3200},
|
|
),
|
|
make_span(
|
|
"translate_to_spanish",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=1000),
|
|
4500,
|
|
4800,
|
|
0.072,
|
|
input_data={"model": "gpt-4o", "target": "es"},
|
|
output_data={"translated_strings": 47},
|
|
),
|
|
make_span(
|
|
"translate_to_japanese",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=5600),
|
|
5200,
|
|
5100,
|
|
0.076,
|
|
input_data={"model": "gpt-4o", "target": "ja"},
|
|
output_data={"translated_strings": 47},
|
|
),
|
|
make_span(
|
|
"translate_to_german",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=10900),
|
|
4800,
|
|
4600,
|
|
0.069,
|
|
input_data={"model": "gpt-4o", "target": "de"},
|
|
output_data={"translated_strings": 47},
|
|
),
|
|
make_span(
|
|
"quality_check",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=15800),
|
|
2500,
|
|
2000,
|
|
0.03,
|
|
input_data={"model": "gpt-4o", "check_type": "consistency"},
|
|
output_data={"issues_found": 2, "auto_fixed": 2},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"PLANNING",
|
|
{
|
|
"name": "sequential_translation",
|
|
"confidence": 0.82,
|
|
"params": {"order": ["es", "ja", "de"]},
|
|
},
|
|
[
|
|
{
|
|
"name": "parallel_translation",
|
|
"confidence": 0.7,
|
|
"reason_rejected": "Sequential allows cross-referencing for consistency",
|
|
}
|
|
],
|
|
"Translate sequentially so each builds on context from prior.",
|
|
t0 + timedelta(milliseconds=50),
|
|
),
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "gpt-4o",
|
|
"confidence": 0.91,
|
|
"params": {"reason": "best quality for ja/de"},
|
|
},
|
|
[
|
|
{
|
|
"name": "gpt-4o-mini",
|
|
"confidence": 0.6,
|
|
"reason_rejected": "Japanese and German need higher quality model",
|
|
}
|
|
],
|
|
"GPT-4o produces significantly better CJK translations.",
|
|
t0 + timedelta(milliseconds=950),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "TranslationAgent",
|
|
"status": "COMPLETED",
|
|
"tags": ["translation", "i18n", "documentation"],
|
|
"metadata": {"languages": ["es", "ja", "de"], "document": "API guide"},
|
|
"totalCost": 0.247,
|
|
"totalTokens": 16500,
|
|
"totalDuration": 18500,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=18500)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_debug_agent_running() -> dict:
|
|
"""DebugAgent (RUNNING) — currently investigating an issue."""
|
|
t0 = now_minus(minutes=12)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"DebugAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
0,
|
|
0,
|
|
0.0,
|
|
status="RUNNING",
|
|
input_data={
|
|
"issue": "Latency spike in /api/search endpoint",
|
|
"p99_ms": 4500,
|
|
},
|
|
),
|
|
make_span(
|
|
"fetch_recent_deploys",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=100),
|
|
600,
|
|
0,
|
|
0.0,
|
|
output_data={"deploys_24h": 3, "latest": "v2.14.1"},
|
|
),
|
|
make_span(
|
|
"analyze_latency_traces",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=800),
|
|
3500,
|
|
2800,
|
|
0.042,
|
|
input_data={"model": "gpt-4o", "trace_count": 50},
|
|
output_data={"bottleneck": "elasticsearch query"},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
# No endedAt for RUNNING trace
|
|
decisions = [
|
|
make_decision(
|
|
"TOOL_SELECTION",
|
|
{
|
|
"name": "query_datadog",
|
|
"confidence": 0.88,
|
|
"params": {"metric": "p99_latency", "window": "1h"},
|
|
},
|
|
[
|
|
{
|
|
"name": "check_logs",
|
|
"confidence": 0.65,
|
|
"reason_rejected": "Metrics give faster overview than logs",
|
|
}
|
|
],
|
|
"Start with metrics to identify the time window of regression.",
|
|
t0 + timedelta(milliseconds=50),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "DebugAgent",
|
|
"status": "RUNNING",
|
|
"tags": ["debugging", "latency", "production"],
|
|
"metadata": {"endpoint": "/api/search", "p99_ms": 4500},
|
|
"totalCost": 0.042,
|
|
"totalTokens": 2800,
|
|
"totalDuration": 4200,
|
|
"startedAt": iso(t0),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
def trace_customer_support_error() -> dict:
|
|
"""CustomerSupportBot — fails on a malformed ticket (ERROR)."""
|
|
t0 = now_minus(hours=1, minutes=40)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"CustomerSupportBot",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
1200,
|
|
0,
|
|
0.0,
|
|
status="ERROR",
|
|
input_data={"ticket_id": "TKT-9201", "subject": ""},
|
|
),
|
|
make_span(
|
|
"classify_ticket",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=80),
|
|
900,
|
|
400,
|
|
0.003,
|
|
status="ERROR",
|
|
input_data={"model": "gpt-4o-mini", "subject": ""},
|
|
output_data={"error": "Cannot classify empty subject"},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"ROUTING",
|
|
{
|
|
"name": "reject_ticket",
|
|
"confidence": 0.7,
|
|
"params": {"reason": "empty_subject"},
|
|
},
|
|
[
|
|
{
|
|
"name": "classify_anyway",
|
|
"confidence": 0.4,
|
|
"reason_rejected": "No content to classify",
|
|
}
|
|
],
|
|
"Ticket has empty subject — cannot classify.",
|
|
t0 + timedelta(milliseconds=1000),
|
|
),
|
|
]
|
|
events = [
|
|
{
|
|
"id": uid(),
|
|
"type": "ERROR",
|
|
"name": "Empty ticket subject",
|
|
"timestamp": iso(t0 + timedelta(milliseconds=1100)),
|
|
"metadata": {"ticket_id": "TKT-9201"},
|
|
},
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "CustomerSupportBot",
|
|
"status": "ERROR",
|
|
"tags": ["support", "error", "validation"],
|
|
"metadata": {"ticket_id": "TKT-9201", "error": "empty_subject"},
|
|
"totalCost": 0.003,
|
|
"totalTokens": 400,
|
|
"totalDuration": 1200,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=1200)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": events,
|
|
}
|
|
|
|
|
|
def trace_code_review_fast() -> dict:
|
|
"""CodeReviewAgent — fast review of a small PR."""
|
|
t0 = now_minus(minutes=35)
|
|
trace_id = uid()
|
|
root_id = uid()
|
|
spans = [
|
|
make_span(
|
|
"CodeReviewAgent",
|
|
"AGENT",
|
|
None,
|
|
t0,
|
|
1800,
|
|
0,
|
|
0.0,
|
|
input_data={"pr_url": "https://github.com/acme/web/pull/42", "files": 2},
|
|
),
|
|
make_span(
|
|
"fetch_pr_diff",
|
|
"TOOL_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=50),
|
|
250,
|
|
0,
|
|
0.0,
|
|
output_data={"additions": 18, "deletions": 5},
|
|
),
|
|
make_span(
|
|
"quick_review",
|
|
"LLM_CALL",
|
|
root_id,
|
|
t0 + timedelta(milliseconds=350),
|
|
1300,
|
|
900,
|
|
0.007,
|
|
input_data={"model": "gpt-4o-mini"},
|
|
output_data={"verdict": "LGTM", "issues": 0},
|
|
),
|
|
]
|
|
spans[0]["id"] = root_id
|
|
decisions = [
|
|
make_decision(
|
|
"PLANNING",
|
|
{
|
|
"name": "quick_review",
|
|
"confidence": 0.95,
|
|
"params": {"depth": "shallow"},
|
|
},
|
|
[
|
|
{
|
|
"name": "deep_review",
|
|
"confidence": 0.3,
|
|
"reason_rejected": "Only 2 files, 23 lines changed",
|
|
}
|
|
],
|
|
"Small PR — quick review is sufficient.",
|
|
t0 + timedelta(milliseconds=300),
|
|
),
|
|
]
|
|
return {
|
|
"id": trace_id,
|
|
"name": "CodeReviewAgent",
|
|
"status": "COMPLETED",
|
|
"tags": ["code-review", "github", "quick"],
|
|
"metadata": {"pr_number": 42, "repo": "acme/web", "verdict": "LGTM"},
|
|
"totalCost": 0.007,
|
|
"totalTokens": 900,
|
|
"totalDuration": 1800,
|
|
"startedAt": iso(t0),
|
|
"endedAt": iso(t0 + timedelta(milliseconds=1800)),
|
|
"spans": spans,
|
|
"decisionPoints": decisions,
|
|
"events": [],
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main: send all traces
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def main():
|
|
print("AgentLens Demo Seed Script")
|
|
print(f"Target: {API_URL}")
|
|
print("=" * 50)
|
|
|
|
# Build all traces
|
|
builders = [
|
|
("CodeReviewAgent (detailed)", trace_code_review_agent),
|
|
("DataAnalysisAgent", trace_data_analysis_agent),
|
|
("CustomerSupportBot (billing)", trace_customer_support_bot),
|
|
("ResearchAssistant", trace_research_assistant),
|
|
("ContentGenerator", trace_content_generator),
|
|
("SQLQueryAgent", trace_sql_query_agent),
|
|
("DebugAgent (ERROR)", trace_debug_agent_error),
|
|
("TranslationAgent", trace_translation_agent),
|
|
("DebugAgent (RUNNING)", trace_debug_agent_running),
|
|
("CustomerSupportBot (ERROR)", trace_customer_support_error),
|
|
("CodeReviewAgent (fast)", trace_code_review_fast),
|
|
]
|
|
|
|
traces = []
|
|
for label, builder in builders:
|
|
trace = builder()
|
|
traces.append(trace)
|
|
status_emoji = {"COMPLETED": "+", "ERROR": "!", "RUNNING": "~"}
|
|
s = trace["status"]
|
|
marker = status_emoji.get(s, "?")
|
|
print(
|
|
f" [{marker}] {label:40s} status={s:10s} cost=${trace['totalCost']:.3f} tokens={trace['totalTokens']:>6} duration={trace['totalDuration']:>6}ms"
|
|
)
|
|
|
|
print(f"\nTotal traces: {len(traces)}")
|
|
print(f"Sending to {API_URL} ...")
|
|
|
|
# Send via httpx
|
|
headers = {
|
|
"Authorization": f"Bearer {API_KEY}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
with httpx.Client(timeout=30.0) as client:
|
|
response = client.post(
|
|
API_URL,
|
|
json={"traces": traces},
|
|
headers=headers,
|
|
)
|
|
|
|
print(f"\nResponse: {response.status_code}")
|
|
if response.status_code < 300:
|
|
print("SUCCESS — traces sent!")
|
|
try:
|
|
body = response.json()
|
|
print(f"Response body: {body}")
|
|
except Exception:
|
|
print(f"Response text: {response.text[:500]}")
|
|
else:
|
|
print(f"FAILED — {response.status_code}")
|
|
print(f"Response: {response.text[:1000]}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|