{"slug":"iris-eval/mcp-server","name":"Iris","description":"MCP-native agent evaluation and observability server with trace logging, output quality evaluation, cost tracking, 12 built-in eval rules, real-time dashboard, and PII detection","category":"development","tags":[],"official":true,"stars":6,"transport":"stdio","install":[{"cmd":"npx @iris-eval/mcp-server","imports":[]}],"tools":[{"name":"log_trace","description":"Log an agent execution with spans, tool calls, token usage, and cost"},{"name":"evaluate_output","description":"Score output quality against completeness, relevance, safety, and cost rules (heuristic, deterministic, free)"},{"name":"get_traces","description":"Query stored traces with filtering, pagination, and time-range support"},{"name":"list_rules","description":"Enumerate deployed custom eval rules (read-only)"},{"name":"deploy_rule","description":"Register a new custom eval rule so it fires on every evaluate_output of that category"},{"name":"delete_rule","description":"Remove a deployed custom rule (destructive, idempotent)"},{"name":"delete_trace","description":"Remove a single stored trace by ID (destructive, tenant-scoped)"},{"name":"evaluate_with_llm_judge","description":"Semantic eval via LLM (Anthropic or OpenAI). Five templates: accuracy, helpfulness, safety, correctness, faithfulness. Cost-capped, per-eval pricing disclosed. Bring your own API key."},{"name":"verify_citations","description":"Extract citations from output, fetch sources behind an SSRF-guarded + domain-allowlisted resolver, and use an LLM judge to check whether each source actually supports the cited claim. Opt-in outbound HTTP. Same BYOK requirement as evaluate_with_llm_judge."}],"env_vars":["IRIS_ANTHROPIC_API_KEY","IRIS_OPENAI_API_KEY","IRIS_PORT","IRIS_HOST","IRIS_DASHBOARD_PORT","IRIS_API_KEY"],"auth_type":"none","github":"https://github.com/iris-eval/mcp-server","homepage":"","server_url":"","status":"active","source":"mcpservers.org","updated_at":"Mon May 25"}