Reliability Evaluation with Graph

The ReliabilityEvaluator accepts a Graph instance (after execution) and extracts tool calls from all executed TaskNode objects.

Single-Node Graph

import asyncio
from upsonic import Agent, Task, Graph
from upsonic.eval import ReliabilityEvaluator
from upsonic.tools import tool

@tool
def calculate_sum(a: int, b: int) -> int:
    """Add two numbers together."""
    return a + b

agent = Agent(
    model="anthropic/claude-sonnet-4-5",
    name="GraphAgent",
    tools=[calculate_sum],
)

graph = Graph(
    default_agent=agent,
    show_progress=False,
)

graph_task = Task(description="Calculate 12 + 15 using calculate_sum")
graph.add(graph_task)

asyncio.run(graph.run_async(verbose=False))

evaluator = ReliabilityEvaluator(
    expected_tool_calls=["calculate_sum"],
)

result = evaluator.run(graph, print_results=True)
result.assert_passed()

Multi-Node Chain

Tool calls are collected from every executed node in the graph.

import asyncio
from upsonic import Agent, Task, Graph
from upsonic.eval import ReliabilityEvaluator
from upsonic.tools import tool

@tool
def calculate_sum(a: int, b: int) -> int:
    """Add two numbers together."""
    return a + b

@tool
def calculate_product(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

agent = Agent(
    model="anthropic/claude-sonnet-4-5",
    name="ChainAgent",
    tools=[calculate_sum, calculate_product],
)

graph = Graph(
    default_agent=agent,
    show_progress=False,
)

t1 = Task(description="Calculate 3 + 4 using calculate_sum")
t2 = Task(description="Now multiply the result by 5 using calculate_product")
graph.add(t1 >> t2)

asyncio.run(graph.run_async(verbose=False))

evaluator = ReliabilityEvaluator(
    expected_tool_calls=["calculate_sum", "calculate_product"],
)

result = evaluator.run(graph, print_results=True)
result.assert_passed()

for check in result.checks:
    print(f"  {check.tool_name}: called {check.times_called}x")

Exact Match with Graph

Ensure no unexpected tools were invoked across the entire graph execution.

import asyncio
from upsonic import Agent, Task, Graph
from upsonic.eval import ReliabilityEvaluator
from upsonic.tools import tool

@tool
def calculate_sum(a: int, b: int) -> int:
    """Add two numbers together."""
    return a + b

@tool
def get_weather(city: str) -> str:
    """Get weather for a city."""
    return f"Weather in {city}: Sunny, 72°F"

agent = Agent(
    model="anthropic/claude-sonnet-4-5",
    tools=[calculate_sum, get_weather],
)

graph = Graph(
    default_agent=agent,
    show_progress=False,
)

graph_task = Task(description="Use calculate_sum to add 100 + 200")
graph.add(graph_task)

asyncio.run(graph.run_async(verbose=False))

evaluator = ReliabilityEvaluator(
    expected_tool_calls=["calculate_sum"],
    exact_match=True,
)

result = evaluator.run(graph, print_results=True)

if not result.passed:
    print(f"Unexpected tools: {result.unexpected_tool_calls}")

GET STARTED

CONCEPTS

STARTING AN AGENT PROJECT

READY TO USE SNIPPETS

DEPLOYMENT

FURTHER READINGS

Reliability Evaluation with Graph

Single-Node Graph

Multi-Node Chain

Exact Match with Graph

GET STARTED

CONCEPTS

STARTING AN AGENT PROJECT

READY TO USE SNIPPETS

DEPLOYMENT

FURTHER READINGS

​Single-Node Graph

​Multi-Node Chain

​Exact Match with Graph

Single-Node Graph

Multi-Node Chain

Exact Match with Graph