Skip to main content
Knowledge bases can be added to tasks to provide RAG (Retrieval-Augmented Generation) capabilities, allowing the agent to access and use external knowledge sources.

Basic Knowledge Base Integration

from upsonic import KnowledgeBase, Task, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

# Create embedding provider
embedding_provider = OpenAIEmbedding()

# Create ChromaDB config
chroma_config = ChromaConfig(
    collection_name="my_knowledge_base",
    vector_size=1536,  # OpenAI ada-002 embedding size
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)  # or Mode.EMBEDDED with db_path
)

# Create vector database with config
vectordb = ChromaProvider(config=chroma_config)

# Create knowledge base
knowledge_base = KnowledgeBase(
    sources=["document1.pdf", "document2.txt"],
    embedding_provider=embedding_provider,
    vectordb=vectordb
)

# Task with knowledge base context
task = Task(
    description="Answer questions about the uploaded documents",
    context=[knowledge_base]
)

agent = Agent(model="openai/gpt-4o")

result = agent.do(task)
print(result)

Multiple Knowledge Bases

from upsonic import KnowledgeBase, Task, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

# Create embedding provider and vector database
embedding_provider = OpenAIEmbedding()
chroma_config = ChromaConfig(
    collection_name="multi_kb",
    vector_size=1536,
    connection=ConnectionConfig(mode=Mode.IN_MEMORY)
)
vectordb = ChromaProvider(config=chroma_config)

# Create multiple knowledge bases
kb1 = KnowledgeBase(
    sources=["technical_docs/"],
    embedding_provider=embedding_provider,
    vectordb=vectordb,
    name="Technical Documentation"
)

kb2 = KnowledgeBase(
    sources=["company_policies.pdf"],
    embedding_provider=embedding_provider,
    vectordb=vectordb,
    name="Company Policies"
)

# Task with multiple knowledge bases
agent = Agent(model="openai/gpt-4o")
task = Task(
    description="Find information about both technical procedures and company policies",
    context=[kb1, kb2]
)

result = agent.do(task)
print(result)

Knowledge Base with Direct Content

from upsonic import KnowledgeBase, Task, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding_provider = OpenAIEmbedding()
chroma_config = ChromaConfig(collection_name="content_kb", vector_size=1536, connection=ConnectionConfig(mode=Mode.IN_MEMORY))
vectordb = ChromaProvider(config=chroma_config)

# Knowledge base with direct string content
knowledge_base = KnowledgeBase(
    sources=["This is important information about our product features and capabilities."],
    embedding_provider=embedding_provider,
    vectordb=vectordb
)

agent = Agent(model="openai/gpt-4o")
task = Task(
    description="What are the key features mentioned in the product information?",
    context=[knowledge_base]
)

result = agent.do(task)
print(result)

Knowledge Base Configuration

from upsonic import KnowledgeBase, Task, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding_provider = OpenAIEmbedding()
chroma_config = ChromaConfig(collection_name="advanced_kb", vector_size=1536, connection=ConnectionConfig(mode=Mode.IN_MEMORY))
vectordb = ChromaProvider(config=chroma_config)

# Advanced knowledge base configuration
knowledge_base = KnowledgeBase(
    sources=["data/"],
    embedding_provider=embedding_provider,
    vectordb=vectordb,
    name="Custom Knowledge Base",
    use_case="rag_retrieval",
    quality_preference="balanced",
    loader_config={"skip_empty_content": True},
    splitter_config={"chunk_overlap": 200}
)

agent = Agent(model="openai/gpt-4o")
task = Task(description="Search the knowledge base for relevant information", context=[knowledge_base])
result = agent.do(task)
print(result)

Vector Search Filters

Filter vector search results when using knowledge bases:
from upsonic import Task, KnowledgeBase, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding_provider = OpenAIEmbedding()
chroma_config = ChromaConfig(collection_name="filter_kb", vector_size=1536, connection=ConnectionConfig(mode=Mode.IN_MEMORY))
vectordb = ChromaProvider(config=chroma_config)

knowledge_base = KnowledgeBase(
    sources=["documents/"],
    embedding_provider=embedding_provider,
    vectordb=vectordb
)

agent = Agent(model="openai/gpt-4o")

# Task with search filter
task = Task(
    description="Find information about Q4 sales",
    context=[knowledge_base],
    vector_search_filter={"department": "sales", "year": 2024}
)

result = agent.do(task)
print(result)

Supported Sources

Knowledge bases support various source types:
  • File Paths: Individual files (PDF, TXT, DOCX, etc.)
  • Directories: Recursive directory scanning
  • Direct Content: String content passed directly
  • Mixed Sources: Combination of files and content
from upsonic import KnowledgeBase, Task, Agent
from upsonic.embeddings import OpenAIEmbedding
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode

embedding_provider = OpenAIEmbedding()
chroma_config = ChromaConfig(collection_name="mixed_kb", vector_size=1536, connection=ConnectionConfig(mode=Mode.IN_MEMORY))
vectordb = ChromaProvider(config=chroma_config)

# Mixed sources example
knowledge_base = KnowledgeBase(
    sources=[
        "documents/",  # Directory
        "important.pdf",  # Single file
        "Key insight: Our product is revolutionary"  # Direct content
    ],
    embedding_provider=embedding_provider,
    vectordb=vectordb
)

agent = Agent(model="openai/gpt-4o")
task = Task(description="What insights can you find in the knowledge base?", context=[knowledge_base])
result = agent.do(task)
print(result)

Best Practices

  • Source Organization: Organize your sources logically for better retrieval
  • Embedding Provider: Choose appropriate embedding providers for your use case
  • Vector Database: Select vector databases that match your scale requirements
  • Chunking Strategy: Configure chunking parameters based on your content type
  • Quality vs Speed: Balance quality_preference based on your performance needs
  • Naming: Use descriptive names for knowledge bases to avoid confusion