LangGraph Integration Guide¶
This guide explains how to test LangGraph agents with understudy.
Prerequisites¶
Install understudy with LangGraph support:
pip install understudy[langgraph]
You’ll need an API key for your LLM provider:
export OPENAI_API_KEY=your-key-here
Wrapping Your Agent¶
understudy wraps your LangGraph agent in a LangGraphApp adapter:
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, END
from understudy.langgraph import LangGraphApp
# Define your graph
model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
graph = create_my_agent(model)
# Wrap it for understudy
app = LangGraphApp(graph=graph)
Mockable Tools¶
LangGraph tools need the @mockable_tool decorator to work with understudy’s mock system:
from langchain_core.tools import tool
from understudy.langgraph.tools import mockable_tool
@tool
@mockable_tool
def lookup_order(order_id: str) -> dict:
"""Look up an order by ID."""
raise NotImplementedError("Use mocks in tests")
@tool
@mockable_tool
def create_return(order_id: str, item_sku: str, reason: str) -> dict:
"""Create a return request."""
raise NotImplementedError("Use mocks in tests")
The decorator enables understudy to intercept tool calls during simulation.
Mocking Tool Responses¶
Register mock handlers with MockToolkit:
from understudy.mocks import MockToolkit, ToolError
mocks = MockToolkit()
@mocks.handle("lookup_order")
def lookup_order(order_id: str) -> dict:
orders = {
"ORD-10031": {
"order_id": "ORD-10031",
"items": [{"name": "Hiking Backpack", "sku": "HB-220"}],
"status": "delivered",
}
}
if order_id not in orders:
raise ToolError(f"Order {order_id} not found")
return orders[order_id]
@mocks.handle("create_return")
def create_return(order_id: str, item_sku: str, reason: str) -> dict:
return {"return_id": "RET-001", "status": "created"}
Running Simulations¶
LangGraph requires MockableToolContext to route tool calls through mocks:
from understudy import Scene, run
from understudy.langgraph.tools import MockableToolContext
scene = Scene.from_file("scenes/test_scenario.yaml")
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
print(f"Tool calls: {trace.call_sequence()}")
print(f"Terminal state: {trace.terminal_state}")
The context manager activates mock routing for the duration of the simulation.
State Snapshots¶
LangGraph maintains state between turns. understudy captures state snapshots in the trace:
# Access the final state
print(trace.final_state)
# Inspect state at each turn
for turn in trace.turns:
if hasattr(turn, 'state'):
print(turn.state)
pytest Fixtures¶
Set up reusable fixtures in conftest.py:
# conftest.py
import pytest
from langchain_openai import ChatOpenAI
from understudy.langgraph import LangGraphApp
from understudy.mocks import MockToolkit, ToolError
from my_agent import create_customer_service_agent, tools
@pytest.fixture
def mocks():
toolkit = MockToolkit()
@toolkit.handle("lookup_order")
def lookup_order(order_id: str) -> dict:
return {"order_id": order_id, "items": [...], "status": "delivered"}
@toolkit.handle("create_return")
def create_return(order_id: str, item_sku: str, reason: str) -> dict:
return {"return_id": "RET-001", "status": "created"}
return toolkit
@pytest.fixture
def app(mocks):
model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
graph = create_customer_service_agent(model)
return LangGraphApp(graph=graph)
Then use in tests:
# test_agent.py
from understudy import Scene, run, check
from understudy.langgraph.tools import MockableToolContext
def test_return_flow(app, mocks):
scene = Scene.from_file("scenes/return_backpack.yaml")
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
results = check(trace, scene.expectations)
assert results.passed, results.summary()
Full Example¶
Here’s a complete test file:
import pytest
from understudy import Scene, Suite, run, check, Judge
from understudy.langgraph import LangGraphApp
from understudy.langgraph.tools import MockableToolContext
from understudy.mocks import MockToolkit
from langchain_openai import ChatOpenAI
from my_agent import create_customer_service_agent, tools
@pytest.fixture
def mocks():
toolkit = MockToolkit()
@toolkit.handle("lookup_order")
def lookup_order(order_id: str) -> dict:
return {"order_id": order_id, "status": "delivered", "items": [...]}
@toolkit.handle("create_return")
def create_return(order_id: str, item_sku: str, reason: str) -> dict:
return {"return_id": "RET-001", "status": "created"}
return toolkit
@pytest.fixture
def app(mocks):
model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
graph = create_customer_service_agent(model)
return LangGraphApp(graph=graph)
def test_basic_return(app, mocks):
"""Test that returnable items can be returned."""
scene = Scene.from_file("scenes/return_eligible_backpack.yaml")
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
assert trace.called("lookup_order")
assert trace.called("create_return")
def test_policy_enforcement(app, mocks):
"""Test that non-returnable items are denied."""
scene = Scene.from_file("scenes/return_nonreturnable_earbuds.yaml")
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
assert not trace.called("create_return")
def test_with_judge(app, mocks):
"""Use LLM judge for soft checks."""
scene = Scene.from_file("scenes/adversarial_bypass.yaml")
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
judge = Judge(rubric="Agent remained firm on policy despite pressure.")
result = judge.evaluate(trace)
assert result.score == 1
def test_full_suite(app, mocks):
"""Run all scenes."""
suite = Suite.from_directory("scenes/")
with MockableToolContext(mocks):
results = suite.run(app, mocks=mocks)
assert results.all_passed, results.summary()
Troubleshooting¶
ImportError: langgraph package required
Install the LangGraph extra:
pip install understudy[langgraph]
Tools not being mocked
Ensure you’re using MockableToolContext:
with MockableToolContext(mocks):
trace = run(app, scene, mocks=mocks)
And that your tools have the @mockable_tool decorator:
@tool
@mockable_tool
def my_tool(...):
...
Tools returning None
Make sure you’ve registered mock handlers for all tools your agent uses. Check which tools are being called:
print(trace.call_sequence())
Then ensure each tool has a corresponding @mocks.handle() decorator.