from upsonic.loaders import PdfLoader, PdfLoaderConfig, MarkdownLoader, MarkdownLoaderConfig
from upsonic.text_splitter import RecursiveChunker, RecursiveChunkingConfig, SemanticChunker, SemanticChunkingConfig
# Different loader for each source
loaders = [
PdfLoader(PdfLoaderConfig()),
MarkdownLoader(MarkdownLoaderConfig()),
PdfLoader(PdfLoaderConfig())
]
# Different splitter for each source
splitters = [
RecursiveChunker(RecursiveChunkingConfig(chunk_size=512)),
SemanticChunker(SemanticChunkingConfig(embedding_provider=embedding)),
RecursiveChunker(RecursiveChunkingConfig(chunk_size=1024))
]
kb = KnowledgeBase(
sources=["doc1.pdf", "guide.md", "doc2.pdf"],
embedding_provider=embedding,
vectordb=vectordb,
loaders=loaders, # Index 0 -> doc1.pdf, Index 1 -> guide.md, Index 2 -> doc2.pdf
splitters=splitters # Index 0 -> doc1.pdf, Index 1 -> guide.md, Index 2 -> doc2.pdf
)