Skip to main content

Overview

This page provides practical, runnable examples demonstrating how to use KnowledgeBase with Agent and Task in various scenarios.

Basic RAG Query

Query a knowledge base with a simple question:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig

# Setup components
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
    collection_name="example_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))

# Setup PDF loader
loader = PdfLoader(PdfLoaderConfig())

# Create knowledge base
kb = KnowledgeBase(
    sources=["document.pdf"],
    embedding_provider=embedding,
    vectordb=vectordb,
    loaders=[loader]
)

# Query with Agent
agent = Agent("openai/gpt-4o")
task = Task(
    description="What are the main topics in the document?",
    context=[kb]
)

result = agent.do(task)
print(result)

Multiple Documents

Process multiple documents and query across all of them:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
    collection_name="multi_doc_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./kb_db")
))


kb = KnowledgeBase(
    sources=["doc1.pdf", "doc2.md", "doc3.docx"],
    embedding_provider=embedding,
    vectordb=vectordb
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="Compare the information across all documents",
    context=[kb]
)

result = agent.do(task)
print(result)

Directory Processing

Process all files in a directory:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
    collection_name="directory_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./kb_db")
))

# Setup loader for directory processing
loader = PdfLoader(PdfLoaderConfig())

kb = KnowledgeBase(
    sources=["data/"],  # Processes all supported files in directory
    embedding_provider=embedding,
    vectordb=vectordb,
    loaders=[loader]
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="Summarize the key information from all files",
    context=[kb]
)

result = agent.do(task)
print(result)

Custom Loaders and Splitters

Use specific loaders and splitters for better control:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig
from upsonic.text_splitter.recursive import RecursiveChunker, RecursiveChunkingConfig
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

# Setup custom components
loader = PdfLoader(PdfLoaderConfig())
splitter = RecursiveChunker(RecursiveChunkingConfig(
    chunk_size=512,
    chunk_overlap=50
))

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
    collection_name="custom_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))

kb = KnowledgeBase(
    sources=["document.pdf"],
    embedding_provider=embedding,
    vectordb=vectordb,
    loaders=[loader],
    splitters=[splitter]
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="Extract key information from the document",
    context=[kb]
)

result = agent.do(task)
print(result)

Indexed Processing

Use different loaders and splitters for different sources:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.markdown import MarkdownLoader
from upsonic.loaders.config import PdfLoaderConfig, MarkdownLoaderConfig
from upsonic.text_splitter.recursive import RecursiveChunker, RecursiveChunkingConfig
from upsonic.text_splitter.semantic import SemanticChunker, SemanticChunkingConfig
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())

# Different loader for each source
loaders = [
    PdfLoader(PdfLoaderConfig()),
    MarkdownLoader(MarkdownLoaderConfig())
]

# Different splitter for each source
splitters = [
    RecursiveChunker(RecursiveChunkingConfig(chunk_size=512)),
    SemanticChunker(SemanticChunkingConfig(
        embedding_provider=embedding,
        chunk_size=1024
    ))
]

vectordb = ChromaProvider(ChromaConfig(
    collection_name="indexed_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))

kb = KnowledgeBase(
    sources=["manual.pdf", "guide.md"],
    embedding_provider=embedding,
    vectordb=vectordb,
    loaders=loaders,
    splitters=splitters
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="What information is available in both documents?",
    context=[kb]
)

result = agent.do(task)
print(result)

Async Usage

Use async/await for better performance:
import asyncio
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig

async def main():
    embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
    vectordb = ChromaProvider(ChromaConfig(
        collection_name="async_kb",
        vector_size=1536,
        connection=ConnectionConfig(mode=Mode.IN_MEMORY)
    ))

    loader = PdfLoader(PdfLoaderConfig())

    kb = KnowledgeBase(
        sources=["document.pdf"],
        embedding_provider=embedding,
        vectordb=vectordb,
        loaders=[loader]
    )

    agent = Agent("openai/gpt-4o")
    task = Task(
        description="Summarize the document",
        context=[kb]
    )

    result = await agent.do_async(task)
    print(result)

asyncio.run(main())

Streaming Response

Stream the agent’s response while querying the knowledge base:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
    collection_name="streaming_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))

loader = PdfLoader(PdfLoaderConfig())

kb = KnowledgeBase(
    sources=["document.pdf"],
    embedding_provider=embedding,
    vectordb=vectordb,
    loaders=[loader]
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="Explain the main concepts",
    context=[kb]
)

async with agent.stream(task) as result:
    async for text_chunk in result.stream_output():
        print(text_chunk, end='', flush=True)
    print()

final_output = result.get_final_output()

Multiple Knowledge Bases

Query multiple knowledge bases in a single task:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig

embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
loader = PdfLoader(PdfLoaderConfig())

# Create multiple knowledge bases
kb1 = KnowledgeBase(
    sources=["technical_docs/"],
    embedding_provider=embedding,
    vectordb=ChromaProvider(ChromaConfig(
        collection_name="tech_kb",
        vector_size=1536,
        connection=ConnectionConfig(mode=Mode.IN_MEMORY)
    )),
    loaders=[loader]
)

kb2 = KnowledgeBase(
    sources=["user_guides/"],
    embedding_provider=embedding,
    vectordb=ChromaProvider(ChromaConfig(
        collection_name="guides_kb",
        vector_size=1536,
        connection=ConnectionConfig(mode=Mode.IN_MEMORY)
    )),
    loaders=[loader]
)

agent = Agent("openai/gpt-4o")
task = Task(
    description="Compare technical documentation with user guides",
    context=[kb1, kb2]
)

result = agent.do(task)
print(result)