Skip to content

Tutorial 22: RAG Basics - Retrieval Augmented Generation

This tutorial introduces RAG (Retrieval Augmented Generation), which enables your agents to access and use knowledge from your documents.

What you'll learn:

  • What RAG is and why it's useful
  • How embeddings work
  • Using vector stores to store and search documents
  • Building a complete RAG pipeline

Prerequisites:

  • Set OPENAI_API_KEY environment variable, or
  • Have OCI config with DEFAULT profile

Run: python examples/tutorial_22_rag_basics.py

Source

# Copyright (c) 2025, 2026 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v1.0 as shown at
# https://oss.oracle.com/licenses/upl/
"""
Tutorial 22: RAG Basics - Retrieval Augmented Generation

This tutorial introduces RAG (Retrieval Augmented Generation), which enables
your agents to access and use knowledge from your documents.

What you'll learn:
- What RAG is and why it's useful
- How embeddings work
- Using vector stores to store and search documents
- Building a complete RAG pipeline

Prerequisites:
- Set OPENAI_API_KEY environment variable, or
- Have OCI config with DEFAULT profile

Run:
    python examples/tutorial_22_rag_basics.py
"""

import asyncio
import os


# =============================================================================
# What is RAG?
# =============================================================================

"""
RAG (Retrieval Augmented Generation) allows LLMs to access external knowledge.

The flow is:
1. EMBED: Convert documents into vectors (embeddings)
2. STORE: Save vectors in a vector database
3. SEARCH: Find relevant documents using semantic similarity
4. GENERATE: Use retrieved context in LLM prompts

Why RAG?
- LLMs have knowledge cutoffs (they don't know recent events)
- LLMs can't access your private/proprietary data
- RAG grounds responses in your actual documents
- Reduces hallucinations by providing source material
"""


# =============================================================================
# Step 1: Understanding Embeddings
# =============================================================================


async def understand_embeddings():
    """
    Embeddings convert text into numerical vectors that capture meaning.

    Similar texts have similar vectors (high cosine similarity).
    Different texts have different vectors (low cosine similarity).
    """
    print("=" * 60)
    print("Tutorial 22: Understanding Embeddings")
    print("=" * 60)

    # Choose embedder based on available credentials
    embedder = get_embedder()
    print(f"Using embedder: {embedder.__class__.__name__}")
    print(f"Embedding dimension: {embedder.config.dimension}")

    # Embed some texts
    texts = [
        "Python is a programming language",
        "Python is used for machine learning",
        "Cats are fluffy animals",
    ]

    print("\nEmbedding texts...")
    results = await embedder.embed_batch(texts)

    # Show first few dimensions of each embedding
    for i, result in enumerate(results):
        preview = result.embedding[:5]
        print(f"\n'{texts[i]}'")
        print(f"  First 5 dims: {[round(x, 4) for x in preview]}")
        print(f"  Total dims: {len(result.embedding)}")

    # Calculate similarity
    import math

    def cosine_similarity(a, b):
        dot = sum(x * y for x, y in zip(a, b, strict=False))
        norm_a = math.sqrt(sum(x * x for x in a))
        norm_b = math.sqrt(sum(x * x for x in b))
        return dot / (norm_a * norm_b)

    sim_01 = cosine_similarity(results[0].embedding, results[1].embedding)
    sim_02 = cosine_similarity(results[0].embedding, results[2].embedding)

    print("\n" + "-" * 40)
    print("Similarity Analysis:")
    print(f"  'Python programming' vs 'Python ML': {sim_01:.4f}")
    print(f"  'Python programming' vs 'Cats': {sim_02:.4f}")
    print("\nNote: Higher similarity = more semantically related")


# =============================================================================
# Step 2: Using Vector Stores
# =============================================================================


async def using_vector_stores():
    """
    Vector stores save embeddings and enable fast similarity search.

    Locus supports multiple vector stores:
    - InMemoryVectorStore: Great for prototyping
    - QdrantVectorStore: Production-ready, cloud or local
    - OpenSearchVectorStore: Enterprise search with vectors
    """
    print("\n" + "=" * 60)
    print("Tutorial 22: Using Vector Stores")
    print("=" * 60)

    from locus.rag.stores.base import Document
    from locus.rag.stores.memory import InMemoryVectorStore

    embedder = get_embedder()

    # Create in-memory store
    store = InMemoryVectorStore(dimension=embedder.config.dimension)
    print(f"Created store with dimension: {store.config.dimension}")

    # Prepare documents
    docs_text = [
        "Python is great for data science and machine learning.",
        "JavaScript is the language of the web browser.",
        "Oracle Database is an enterprise relational database.",
        "PostgreSQL is a popular open-source database.",
        "Docker containers package applications with dependencies.",
    ]

    # Embed and add documents
    print("\nAdding documents...")
    for i, text in enumerate(docs_text):
        result = await embedder.embed(text)
        doc = Document(
            id=f"doc_{i}",
            content=text,
            embedding=result.embedding,
            metadata={"source": "tutorial", "index": i},
        )
        await store.add(doc)
        print(f"  Added: {text[:40]}...")

    # Search
    print("\n" + "-" * 40)
    print("Searching for 'database systems'...")

    query_result = await embedder.embed("database systems")
    search_results = await store.search(
        query_embedding=query_result.embedding,
        limit=3,
    )

    print("\nTop 3 results:")
    for i, result in enumerate(search_results, 1):
        print(f"  {i}. Score: {result.score:.4f}")
        print(f"     {result.document.content}")

    # Count and clear
    count = await store.count()
    print(f"\nTotal documents in store: {count}")


# =============================================================================
# Step 3: The RAG Retriever
# =============================================================================


async def using_rag_retriever():
    """
    The RAGRetriever combines embeddings and storage into a simple API.

    It handles:
    - Automatic embedding of documents and queries
    - Document chunking for long texts
    - Metadata preservation
    - Convenient retrieval methods
    """
    print("\n" + "=" * 60)
    print("Tutorial 22: Using RAG Retriever")
    print("=" * 60)

    from locus.rag import RAGRetriever
    from locus.rag.stores.memory import InMemoryVectorStore

    embedder = get_embedder()
    store = InMemoryVectorStore(dimension=embedder.config.dimension)

    # Create retriever
    retriever = RAGRetriever(
        embedder=embedder,
        store=store,
        chunk_size=500,  # Split long docs into 500-char chunks
        chunk_overlap=50,  # Overlap between chunks
    )

    print("Created RAGRetriever")
    print("  Chunk size: 500 chars")
    print("  Chunk overlap: 50 chars")

    # Add documents (no need to embed manually!)
    knowledge_base = [
        """
        Python was created by Guido van Rossum and first released in 1991.
        It emphasizes code readability with its notable use of significant
        indentation. Python is dynamically typed and garbage-collected.
        It supports multiple programming paradigms, including structured,
        object-oriented, and functional programming.
        """,
        """
        Oracle Cloud Infrastructure (OCI) is a cloud computing service
        offered by Oracle Corporation. It provides servers, storage,
        network, applications and services through a global network of
        Oracle Corporation managed data centers. OCI offers infrastructure
        as a service (IaaS), platform as a service (PaaS), and software
        as a service (SaaS).
        """,
        """
        Machine learning is a subset of artificial intelligence (AI) that
        provides systems the ability to automatically learn and improve
        from experience without being explicitly programmed. Machine learning
        focuses on the development of computer programs that can access data
        and use it to learn for themselves.
        """,
    ]

    print("\nAdding knowledge base documents...")
    for doc in knowledge_base:
        ids = await retriever.add_document(doc.strip())
        print(f"  Added document with {len(ids)} chunks")

    # Retrieve with natural language query
    print("\n" + "-" * 40)
    print("Querying: 'When was Python created?'")

    result = await retriever.retrieve(
        query="When was Python created?",
        limit=2,
    )

    print(f"\nFound {len(result.documents)} relevant chunks:")
    for i, doc_result in enumerate(result.documents, 1):
        print(f"\n  Result {i} (score: {doc_result.score:.4f}):")
        content = doc_result.document.content[:200]
        print(f"  {content}...")

    # Use retrieve_text for formatted output
    print("\n" + "-" * 40)
    print("Using retrieve_text() for clean output:")

    text = await retriever.retrieve_text(
        query="What is Oracle Cloud?",
        limit=2,
    )
    print(f"\n{text[:300]}...")


# =============================================================================
# Step 4: RAG with Metadata Filtering
# =============================================================================


async def rag_with_metadata():
    """
    Metadata allows you to filter results beyond just similarity.

    Use cases:
    - Filter by document type (pdf, html, code)
    - Filter by date range
    - Filter by author or department
    - Filter by category or tags
    """
    print("\n" + "=" * 60)
    print("Tutorial 22: RAG with Metadata")
    print("=" * 60)

    from locus.rag import RAGRetriever
    from locus.rag.stores.memory import InMemoryVectorStore

    embedder = get_embedder()
    store = InMemoryVectorStore(dimension=embedder.config.dimension)
    retriever = RAGRetriever(embedder=embedder, store=store)

    # Add documents with different categories
    documents = [
        (
            "Python supports async/await syntax for concurrency.",
            {"category": "programming", "language": "python"},
        ),
        ("Use pip to install Python packages.", {"category": "programming", "language": "python"}),
        (
            "JavaScript uses async/await for async operations.",
            {"category": "programming", "language": "javascript"},
        ),
        ("Set up Oracle Database with these steps.", {"category": "database", "type": "oracle"}),
        ("PostgreSQL is an open-source database.", {"category": "database", "type": "postgresql"}),
    ]

    print("Adding categorized documents...")
    for content, metadata in documents:
        await retriever.add_document(content, metadata=metadata)
        print(f"  Added: {content[:40]}... [{metadata}]")

    # Search with metadata filter (if supported by store)
    print("\n" + "-" * 40)
    print("Searching for 'async programming'...")

    result = await retriever.retrieve("async programming", limit=3)

    print("\nAll results:")
    for doc_result in result.documents:
        print(f"  Score: {doc_result.score:.4f} | {doc_result.document.content[:50]}...")
        print(f"    Metadata: {doc_result.document.metadata}")


# =============================================================================
# Helper Functions
# =============================================================================


def get_embedder():
    """Get embedder based on available credentials."""
    # Try OpenAI first
    if os.environ.get("OPENAI_API_KEY"):
        from locus.rag.embeddings import OpenAIEmbeddings

        return OpenAIEmbeddings(model="text-embedding-3-small")

    # Try OCI GenAI. OCIEmbeddings auto-derives the endpoint from
    # LOCUS_OCI_REGION / OCI_REGION (falls back to the profile region,
    # then us-chicago-1) when service_endpoint is left empty.
    if os.path.exists(os.path.expanduser("~/.oci/config")):
        try:
            from locus.rag.embeddings import OCIEmbeddings

            return OCIEmbeddings(
                model_id="cohere.embed-english-v3.0",
                profile_name=os.getenv("LOCUS_OCI_PROFILE", os.getenv("OCI_PROFILE", "DEFAULT")),
                auth_type=os.getenv("LOCUS_OCI_AUTH_TYPE", os.getenv("OCI_AUTH_TYPE", "api_key")),
                compartment_id=os.getenv("LOCUS_OCI_COMPARTMENT", os.getenv("OCI_COMPARTMENT", "")),
                service_endpoint=os.getenv("LOCUS_OCI_ENDPOINT", os.getenv("OCI_ENDPOINT", "")),
            )
        except Exception:
            pass

    raise RuntimeError("No embedding credentials found. Set OPENAI_API_KEY or configure OCI.")


# =============================================================================
# Main
# =============================================================================


async def main():
    """Run all examples."""
    await understand_embeddings()
    await using_vector_stores()
    await using_rag_retriever()
    await rag_with_metadata()

    print("\n" + "=" * 60)
    print("Tutorial 22 Complete!")
    print("=" * 60)
    print("\nKey concepts covered:")
    print("  - Embeddings convert text to vectors")
    print("  - Similar texts have similar vectors")
    print("  - Vector stores enable fast similarity search")
    print("  - RAGRetriever simplifies the entire pipeline")
    print("\nNext: Try tutorial_23_rag_providers.py for different embedding providers")


if __name__ == "__main__":
    asyncio.run(main())