Tutorial 28: Agent Server — Deploy Agents as HTTP APIs¶
This tutorial covers:
- AgentServer: wrap any agent as a FastAPI app
- POST /invoke: synchronous invocation
- POST /stream: SSE streaming (uses the same SSE primitives as tutorial 21)
- GET /threads/{tid}: load a persisted thread
- DELETE /threads/{tid}: drop a persisted thread
- GET /health: health check
Threads are scoped to the bearer-principal hash so two API keys sharing one server can't read each other's conversations.
When to use AgentServer vs A2AServer (tutorial 34):
- AgentServer: first-party HTTP API. Persisted threads, principal scoping, bearer auth. Use when locus is the system of record and clients are yours.
- A2AServer: cross-framework interop with the A2A message spec. Use when another framework (Strands, ADK) needs to call your locus agent or vice versa.
Prerequisites:
- pip install fastapi uvicorn
- Configure model via environment variables
Difficulty: Intermediate
Source¶
# Copyright (c) 2025, 2026 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v1.0 as shown at
# https://oss.oracle.com/licenses/upl/
"""
Tutorial 28: Agent Server — Deploy Agents as HTTP APIs
This tutorial covers:
- AgentServer: wrap any agent as a FastAPI app
- POST /invoke: synchronous invocation
- POST /stream: SSE streaming (uses the same SSE primitives as tutorial 21)
- GET /threads/{tid}: load a persisted thread
- DELETE /threads/{tid}: drop a persisted thread
- GET /health: health check
Threads are scoped to the bearer-principal hash so two API keys sharing
one server can't read each other's conversations.
When to use AgentServer vs A2AServer (tutorial 34):
- AgentServer: first-party HTTP API. Persisted threads, principal scoping,
bearer auth. Use when locus is the system of record and clients are yours.
- A2AServer: cross-framework interop with the A2A message spec. Use when
another framework (Strands, ADK) needs to call your locus agent or vice
versa.
Prerequisites:
- pip install fastapi uvicorn
- Configure model via environment variables
Difficulty: Intermediate
"""
import os
from config import get_model
from locus.agent import Agent, AgentConfig
from locus.memory.backends.memory import MemoryCheckpointer
from locus.server import AgentServer
# =============================================================================
# Part 1: Create and configure the server
# =============================================================================
def example_server():
"""Create an agent server with health, invoke, and stream endpoints."""
print("=== Agent Server ===\n")
model = get_model()
agent = Agent(
config=AgentConfig(
system_prompt="You are a helpful assistant. Answer concisely.",
max_iterations=5,
model=model,
# In-memory checkpointer so /threads/{id} has something to read.
checkpointer=MemoryCheckpointer(),
)
)
server = AgentServer(
agent=agent,
title="My Agent API",
description="A helpful AI assistant exposed as HTTP API",
)
# Test with FastAPI TestClient (no actual server needed)
from fastapi.testclient import TestClient
client = TestClient(server.app)
# Health check
r = client.get("/health")
print(f"GET /health: {r.json()}")
# Invoke with an explicit thread_id so we can read it back.
r = client.post(
"/invoke",
json={"prompt": "What is 2+2?", "thread_id": "demo-thread"},
)
data = r.json()
print(f"POST /invoke: {data['message']} (success={data['success']})")
# Stream
r = client.post("/stream", json={"prompt": "Name 3 colors."})
print(f"POST /stream: status={r.status_code}")
# GET the persisted thread we just populated.
r = client.get("/threads/demo-thread")
if r.status_code == 200:
thread = r.json()
print(
f"GET /threads/demo-thread: iteration={thread['iteration']}, "
f"messages={len(thread['messages'])}"
)
else:
print(f"GET /threads/demo-thread: status={r.status_code}")
# 404 on a thread that doesn't exist.
r = client.get("/threads/never-existed")
print(f"GET /threads/never-existed: status={r.status_code}")
# DELETE the thread (idempotent — second delete returns deleted=False).
r = client.delete("/threads/demo-thread")
print(f"DELETE /threads/demo-thread: {r.json()}")
print("\nTo run as a real server, set LOCUS_TUTORIAL_BOOT=1 and run this")
print("file directly. Example session:")
print(" LOCUS_TUTORIAL_BOOT=1 LOCUS_MODEL_PROVIDER=oci \\")
print(" python examples/tutorial_28_agent_server.py")
print(" curl -s -X POST http://127.0.0.1:8000/invoke \\")
print(" -H 'Content-Type: application/json' \\")
print(' -d \'{"prompt":"What is 2+2?"}\'')
print("\nWith api_key= set, every /threads call is principal-scoped:")
print(" AgentServer(agent=agent, api_key='secret')")
print(" # Two clients with different bearer tokens see different threads")
print(" # for the same client-supplied thread_id.")
return server
def boot_live_server() -> None:
"""Build the agent server and bind a live uvicorn instance.
Gated behind ``LOCUS_TUTORIAL_BOOT=1`` so the integration runner that
imports / executes every tutorial doesn't hang here.
"""
model = get_model()
agent = Agent(
config=AgentConfig(
system_prompt="You are a helpful assistant. Answer concisely.",
max_iterations=5,
model=model,
checkpointer=MemoryCheckpointer(),
)
)
server = AgentServer(
agent=agent,
title="My Agent API",
description="A helpful AI assistant exposed as HTTP API",
)
print("Booting AgentServer on http://127.0.0.1:8000 — Ctrl-C to stop.")
print("Try: curl -X POST http://127.0.0.1:8000/invoke \\")
print(" -H 'Content-Type: application/json' \\")
print(' -d \'{"prompt":"What is 2+2?"}\'')
server.run(host="127.0.0.1", port=8000)
if __name__ == "__main__":
if os.getenv("LOCUS_TUTORIAL_BOOT") == "1":
boot_live_server()
else:
example_server()