Core API
Scenario
from agentgate import Scenario
s = Scenario(
name="Book a flight",
input="Book SFO→NRT",
timeout_seconds=30,
max_steps=10,
max_llm_calls=5,
)
Methods
| Method |
Description |
expect_tool_call(name, **kwargs) |
Assert tool is called |
expect_no_tool_call(name) |
Assert tool is NOT called |
expect_tool_order([...]) |
Assert tool subsequence |
expect_state(key, value=...) |
Assert state reached |
expect_no_error() |
Assert no errors |
expect_output(contains=..., matches=...) |
Assert output content |
expect_max_steps(n) |
Assert step limit |
expect_max_duration(ms) |
Assert time limit |
expect_max_tokens(n) |
Assert token budget |
expect_no_side_effects(allowed, mutating) |
Assert no side effects |
expect_no_repetition(max_rate) |
Assert no loops |
expect_milestone(name, tool=..., weight=...) |
Partial credit |
expect_llm_judge(criteria, judge_fn=...) |
LLM evaluation |
expect_policy(name, forbidden_tools=...) |
Policy bundle |
on_tool_failure(name, expect=...) |
Recovery behavior |
check(trace) |
Run expectations → ScenarioResult |
TestSuite
from agentgate import TestSuite
suite = TestSuite("my-tests")
suite.add(scenario)
result = suite.run(
adapter,
runs=5,
min_pass_rate=0.8,
timeout_seconds=300,
scenario_timeout=60,
)
AgentTrace
from agentgate import AgentTrace, AgentStep, StepKind
trace = AgentTrace(input="query")
trace.steps.append(AgentStep(
kind=StepKind.TOOL_CALL,
name="search",
input={"q": "flights"},
output={"results": [...]},
))
# Properties
trace.tool_calls # list[AgentStep] — TOOL_CALL steps only
trace.tool_names # list[str] — tool names in order
trace.errors # list[AgentStep] — ERROR steps
trace.state_changes # list[AgentStep] — STATE_CHANGE steps
trace.get_state(key) # last value of a state key
StepKind
| Kind |
Description |
TOOL_CALL |
Tool/function call |
LLM_CALL |
LLM inference |
STATE_CHANGE |
Environment state change |
HUMAN_HANDOFF |
Escalation to human |
ERROR |
Error occurred |
MockAgent
from agentgate import MockAgent, AgentTrace, AgentStep, StepKind
mock = MockAgent()
mock.add_trace("query pattern", AgentTrace(input="...", steps=[...]))
# From recorded traces
mock = MockAgent.from_traces("traces/")