Debate with Judge¶
PRO and CON take turns arguing a resolution. After N rounds a Judge
reads the full transcript and emits a typed Verdict — winner,
confidence, key points, reasoning — that downstream systems (tickets,
audit logs, databases) can consume directly.
This notebook covers:
Turn(side, round, text)accumulated into alist[Turn]in graph state — the transcript.output_schema=Verdicton the judge Agent, soresult.parsedis a populated Pydantic object, not a JSON string.- The judge node raises rather than fabricating a verdict if the configured model can't honor the schema.
check_structured_output_capable()short-circuits the notebook with setup guidance when running under the mock model or a model without constrained-decoding support.
Prerequisites¶
- Notebook 14 (structured output).
- Notebook 17 (basic graph).
Run¶
The default provider is OCI Generative AI. Pick a model that supports
constrained JSON decoding (canonical: openai.gpt-4.1 or
openai.gpt-5). Under LOCUS_MODEL_PROVIDER=mock the notebook exits
cleanly with setup instructions.
Source¶
#!/usr/bin/env python3
# Copyright (c) 2025, 2026 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v1.0 as shown at
# https://oss.oracle.com/licenses/upl/
"""Notebook 33: adversarial debate with a structured-output judge.
PRO and CON take turns arguing a resolution. After N rounds a Judge
reads the full transcript and emits a typed ``Verdict`` — winner,
confidence, key points, reasoning — that downstream systems (tickets,
audit logs, databases) can consume directly.
- Each turn is a ``Turn(side, round, text)`` Pydantic model. The
transcript is a ``list[Turn]`` accumulated in graph state.
- The Judge uses Locus's ``output_schema=Verdict``, so the result is a
populated Pydantic object — no JSON parsing in the caller.
- If the configured model can't honor the schema, the judge node raises
rather than fabricating a verdict from raw text.
- A ``check_structured_output_capable()`` guard at the top short-
circuits the notebook when the model can't produce JSON (mock,
Cohere R-series).
Run it:
.venv/bin/python examples/notebook_38_debate_with_judge.py
The default provider is OCI Generative AI. With ``~/.oci/config``
present the agents talk to a live OCI model that supports constrained
decoding (canonical pick: ``openai.gpt-4.1`` or ``openai.gpt-5``). Set
``LOCUS_MODEL_PROVIDER=mock`` and the notebook exits cleanly with
setup instructions.
Prerequisites:
- Notebook 14 (structured output).
- Notebook 17 (basic graph).
"""
from __future__ import annotations
import asyncio
from typing import Any
from config import get_model
from pydantic import BaseModel, Field
from locus.agent import Agent, AgentConfig
from locus.core.events import TerminateEvent
from locus.multiagent.graph import END, START, StateGraph
# ---------------------------------------------------------------------------
# Typed shapes — Turn for the transcript, Verdict for the final ruling
# ---------------------------------------------------------------------------
class Turn(BaseModel):
"""One turn of the debate."""
side: str # "pro" | "con"
round: int
text: str
class Verdict(BaseModel):
"""Judge's structured ruling."""
winner: str = Field(description="'pro', 'con', or 'tie'")
confidence: float = Field(ge=0.0, le=1.0, description="0..1 confidence in the call")
key_points: list[str] = Field(description="The 2–4 strongest arguments that drove the decision")
reasoning: str = Field(description="One-paragraph rationale")
PRO_PROMPT = (
"You are arguing the FOR side. Be specific, cite reasoning, and "
"directly rebut the OPPOSITION's most recent point if any. Three "
"sentences max."
)
CON_PROMPT = (
"You are arguing the AGAINST side. Be specific, cite reasoning, and "
"directly rebut the FOR side's most recent point if any. Three "
"sentences max."
)
JUDGE_PROMPT = (
"You are an impartial debate judge. Read the full transcript and "
"emit a Verdict object. Pick a winner only if one side clearly "
"outargued the other; otherwise return 'tie'."
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_agent(role: str, prompt: str, model: Any) -> Agent:
return Agent(
config=AgentConfig(
agent_id=f"debate-{role}",
model=model,
system_prompt=prompt,
max_iterations=2,
# Reasoning-class models (gpt-5.x, o-series) burn thinking
# tokens before any output; 2000 leaves room for both the
# thinking budget and a short debater turn.
max_tokens=2000,
)
)
async def _argue(agent: Agent, transcript: list[Turn], topic: str, side: str, rnd: int) -> str:
history = "\n".join(f"[{t.side.upper()} r{t.round}] {t.text}" for t in transcript)
prompt = (
f"Topic: {topic}\n\n"
f"Transcript so far:\n{history or '(no turns yet)'}\n\n"
f"You are arguing the {side.upper()} side, round {rnd}. Make your point now."
)
final = ""
async for event in agent.run(prompt):
if isinstance(event, TerminateEvent):
final = event.final_message or ""
return final.strip()
# ---------------------------------------------------------------------------
# Graph nodes
# ---------------------------------------------------------------------------
async def pro_turn(state: dict[str, Any]) -> dict[str, Any]:
agent = _make_agent("pro", PRO_PROMPT, state["__model__"])
rnd = state.get("round", 0)
text = await _argue(agent, state.get("transcript", []), state["topic"], "pro", rnd)
return {"transcript": state.get("transcript", []) + [Turn(side="pro", round=rnd, text=text)]}
async def con_turn(state: dict[str, Any]) -> dict[str, Any]:
agent = _make_agent("con", CON_PROMPT, state["__model__"])
rnd = state.get("round", 0)
text = await _argue(agent, state.get("transcript", []), state["topic"], "con", rnd)
return {
"transcript": state.get("transcript", []) + [Turn(side="con", round=rnd, text=text)],
"round": rnd + 1,
}
async def judge_turn(state: dict[str, Any]) -> dict[str, Any]:
"""Judge with ``output_schema=Verdict`` — the result is a typed Pydantic object.
If the configured model can't honor the JSON schema we raise rather
than fabricating a verdict from free text.
"""
import asyncio as _asyncio
agent = Agent(
config=AgentConfig(
agent_id="judge",
model=state["__model__"],
system_prompt=JUDGE_PROMPT,
output_schema=Verdict,
max_iterations=2,
# 4000 tokens covers both the reasoning-model thinking
# budget and the JSON verdict payload.
max_tokens=4000,
)
)
transcript_text = "\n".join(
f"[{t.side.upper()} r{t.round}] {t.text}" for t in state["transcript"]
)
prompt = f"Topic: {state['topic']}\n\nTranscript:\n{transcript_text}\n\nNow emit your Verdict."
# run_sync returns the parsed object. We're already inside an asyncio
# loop driving the graph, so hop the call onto a worker thread.
last_exc: BaseException | None = None
final = None
for attempt in range(3):
try:
final = await _asyncio.to_thread(agent.run_sync, prompt)
break
# Retry covers transient OCI flakiness.
except Exception as exc: # noqa: BLE001
last_exc = exc
await _asyncio.sleep(0.5 * (attempt + 1))
if final is None:
raise RuntimeError(f"Judge failed after 3 attempts. Last error: {last_exc!r}") from last_exc
if final.parsed is None:
raise RuntimeError(
"Judge returned no parsed Verdict. The configured model could not "
"honor the JSON schema. Use a stronger model (e.g. openai.gpt-4o, "
"openai.gpt-5, anthropic.claude-3-5-sonnet) for notebook 32. "
f"Raw output: {final.message!r}"
)
return {"verdict": final.parsed}
# ---------------------------------------------------------------------------
# Routing — N rounds of pro/con, then judge once
# ---------------------------------------------------------------------------
N_ROUNDS = 2
def route_after_con(state: dict[str, Any]) -> str:
if state.get("round", 0) >= N_ROUNDS:
return "judge"
return "pro"
def build_debate_graph() -> StateGraph:
graph = StateGraph(name="debate-with-judge")
graph.add_node("pro", pro_turn)
graph.add_node("con", con_turn)
graph.add_node("judge", judge_turn)
graph.add_edge(START, "pro")
graph.add_edge("pro", "con")
graph.add_conditional_edges("con", route_after_con, targets={"pro": "pro", "judge": "judge"})
graph.add_edge("judge", END)
return graph
# ---------------------------------------------------------------------------
# Driver
# ---------------------------------------------------------------------------
async def main() -> None:
from config import check_structured_output_capable
# Short-circuits the notebook when the model can't produce JSON.
check_structured_output_capable()
print("Notebook 33: adversarial debate with a structured-output judge")
print("=" * 60)
model = get_model()
graph = build_debate_graph()
initial = {
"topic": (
"Resolved: A 30-engineer SaaS team running a 250k-LOC Python "
"monolith on a single Postgres + Redis stack should split the "
"monolith into microservices over the next 12 months, given a "
"current weekly deploy cadence and ~2 outages per quarter "
"traceable to coupling between billing and provisioning code."
),
"transcript": [],
"round": 0,
"__model__": model,
}
print(f"\nTopic: {initial['topic']!r}\n")
print(f"Running {N_ROUNDS} rounds of PRO vs CON, then judge…\n")
result = await graph.execute(initial)
failed = [
(nid, nr.error) for nid, nr in result.node_results.items() if nr.status.value == "failed"
]
if failed:
for nid, err in failed:
print(f"\n ✗ node {nid} FAILED: {err}")
raise RuntimeError(f"graph had {len(failed)} failed node(s); see above")
transcript: list[Turn] = result.final_state.get("transcript", [])
verdict: Verdict = result.final_state["verdict"]
print(f"Total turns: {len(transcript)}")
print()
for t in transcript:
print(f" [{t.side.upper()} r{t.round}] {t.text}")
print()
print("Verdict:")
print("-" * 60)
print(f" Winner: {verdict.winner}")
print(f" Confidence: {verdict.confidence:.2f}")
print(" Key points:")
for p in verdict.key_points:
print(f" - {p}")
print(f" Reasoning: {verdict.reasoning}")
if __name__ == "__main__":
asyncio.run(main())