Tutorial 30: Advanced Guardrails — Topic Policy, Content Safety, Output Filtering¶
This tutorial covers:
- TopicPolicy: block specific conversation topics
- ContentPolicy: detect harmful content categories
- OutputFilterHook: filter agent responses (PII redaction, topic blocking)
Prerequisites:
- Configure model via environment variables
Difficulty: Advanced
Source¶
# Copyright (c) 2025, 2026 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v1.0 as shown at
# https://oss.oracle.com/licenses/upl/
"""
Tutorial 30: Advanced Guardrails — Topic Policy, Content Safety, Output Filtering
This tutorial covers:
- TopicPolicy: block specific conversation topics
- ContentPolicy: detect harmful content categories
- OutputFilterHook: filter agent responses (PII redaction, topic blocking)
Prerequisites:
- Configure model via environment variables
Difficulty: Advanced
"""
from config import get_model
from locus.agent import Agent, AgentConfig
from locus.hooks.builtin.guardrails import (
ContentPolicy,
OutputFilterHook,
TopicPolicy,
)
# =============================================================================
# Part 1: PII Redaction in Output
# =============================================================================
def example_pii_redaction():
"""Automatically redact PII from agent responses."""
print("=== Part 1: PII Redaction ===\n")
model = get_model()
hook = OutputFilterHook(redact_pii=True)
agent = Agent(
config=AgentConfig(
system_prompt="Always include support@example.com in your response.",
max_iterations=3,
model=model,
hooks=[hook],
)
)
result = agent.run_sync("How do I get help?")
print(f"Response: {result.message[:150]}")
print(f"PII redacted: {'REDACTED_EMAIL' in result.message}")
# =============================================================================
# Part 2: Topic Policy
# =============================================================================
def example_topic_policy():
"""Block specific conversation topics."""
print("\n=== Part 2: Topic Policy ===\n")
policy = TopicPolicy(
blocked_topics={"weapons", "drugs"},
keywords={
"weapons": ["gun", "rifle", "ammunition", "firearm"],
"drugs": ["cocaine", "heroin", "meth"],
},
)
# Test topic detection
print(f"'How to buy a gun': {policy.check('How to buy a gun')}")
print(f"'Python programming': {policy.check('Python programming')}")
import time as _t
agent = Agent(model=get_model(max_tokens=80), system_prompt="Reply in one sentence.")
t0 = _t.perf_counter()
res = agent.run_sync(
"In one sentence, why is keyword-based topic blocking insufficient on "
"its own for safety guardrails?"
)
dt = _t.perf_counter() - t0
print(
f" [model call: {dt:.2f}s · {res.metrics.prompt_tokens}→{res.metrics.completion_tokens} tokens]"
)
print(f" AI caveat: {res.message.strip()}")
# =============================================================================
# Part 3: Content Safety
# =============================================================================
def example_content_safety():
"""Detect harmful content categories."""
print("\n=== Part 3: Content Safety ===\n")
policy = ContentPolicy(enabled_categories={"violence", "illegal_activity"})
print(f"'how to make a bomb': {policy.check('how to make a bomb')}")
print(f"'how to bake a cake': {policy.check('how to bake a cake')}")
import time as _t
agent = Agent(model=get_model(max_tokens=80), system_prompt="Reply in one sentence.")
t0 = _t.perf_counter()
res = agent.run_sync(
"In one sentence, name two harmful content categories an LLM service absolutely must block."
)
dt = _t.perf_counter() - t0
print(
f" [model call: {dt:.2f}s · {res.metrics.prompt_tokens}→{res.metrics.completion_tokens} tokens]"
)
print(f" AI guidance: {res.message.strip()}")
if __name__ == "__main__":
example_pii_redaction()
example_topic_policy()
example_content_safety()