from upsonic import Agent, Task, KnowledgeBase
from upsonic.loaders import TextLoader, TextLoaderConfig
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.text_splitter import SemanticChunker, SemanticChunkingConfig, BreakpointThresholdType
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
# Configure splitter with embedding provider
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
splitter_config = SemanticChunkingConfig(
chunk_size=512,
chunk_overlap=50,
embedding_provider=embedding,
breakpoint_threshold_type=BreakpointThresholdType.PERCENTILE,
breakpoint_threshold_amount=95.0
)
splitter = SemanticChunker(splitter_config)
# Setup KnowledgeBase
loader = TextLoader(TextLoaderConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="semantic_docs",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["document.txt"],
embedding_provider=embedding,
vectordb=vectordb,
loaders=[loader],
splitters=[splitter]
)
# Query with Agent
agent = Agent("openai/gpt-4o")
task = Task("Identify different topics", context=[kb])
result = agent.do(task)
print(result)