Documentation Index
Fetch the complete documentation index at: https://docs.upsonic.ai/llms.txt
Use this file to discover all available pages before exploring further.
Provider Selection Helper
Use the infer_provider function to create OCR instances by provider name without importing engine classes.
from upsonic.ocr import infer_provider
# Create OCR by provider name
ocr = infer_provider('easyocr', languages=['en'], rotation_fix=True)
text = ocr.get_text('document.pdf')
# Available provider names:
# 'easyocr', 'rapidocr', 'tesseract', 'deepseek', 'deepseek_ocr'
# 'paddleocr', 'paddle', 'ppstructurev3', 'ppchatocrv4', 'paddleocrvl'
Async Processing
All OCR methods support async execution. The framework is async-first — sync methods are convenience wrappers around the async core.
import asyncio
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine
async def process_documents():
engine = EasyOCREngine(languages=['en'], gpu=True)
ocr = OCR(layer_1_ocr_engine=engine)
# Async text extraction
text = await ocr.get_text_async('document.pdf')
print(text)
# Async file processing with full results
result = await ocr.process_file_async('report.pdf')
print(f"Confidence: {result.confidence:.2%}")
asyncio.run(process_documents())
Timeout Control
Use layer_1_timeout to set a maximum processing time for the OCR engine. If the timeout is exceeded, an OCRTimeoutError is raised.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import EasyOCREngine
from upsonic.ocr.exceptions import OCRTimeoutError
engine = EasyOCREngine(languages=['en'])
ocr = OCR(layer_1_ocr_engine=engine, layer_1_timeout=30)
try:
text = ocr.get_text('large_document.pdf')
except OCRTimeoutError:
print("OCR processing timed out")
Batch Processing with DeepSeek
DeepSeek OCR provides optimized batch processing for multi-page PDFs, processing all pages in a single batch for better performance.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import DeepSeekOCREngine
# Create DeepSeek engine
engine = DeepSeekOCREngine(
model_name="deepseek-ai/DeepSeek-OCR",
temperature=0.0,
max_tokens=8192
)
# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)
# Automatically uses batch processing for PDFs
result = ocr.process_file('multi_page_document.pdf')
print(f"Processed {result.page_count} pages")
Advanced PaddleOCR Features
PaddleOCR providers offer specialized features for complex document understanding.
Structure Recognition with PPStructureV3Engine
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPStructureV3Engine
# Create structure-aware engine
engine = PPStructureV3Engine(
use_table_recognition=True,
use_formula_recognition=True,
use_chart_recognition=True
)
# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)
# Extract structured content
result = ocr.provider.predict('research_paper.pdf')
# Get markdown representation
markdown_text = ocr.provider.concatenate_markdown_pages(result)
print(markdown_text)
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import PPChatOCRv4Engine
# Create chat-based engine
engine = PPChatOCRv4Engine(
use_table_recognition=True,
use_seal_recognition=True
)
# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)
# Extract visual information
visual_result = ocr.provider.visual_predict('invoice.pdf')
# Build vector embeddings for retrieval
vector_info = ocr.provider.build_vector(
visual_result,
min_characters=3500,
block_size=300
)
# Extract specific fields using chat interface
invoice_data = ocr.provider.chat(
key_list=['invoice_number', 'date', 'total_amount', 'vendor_name'],
visual_info=visual_result,
use_vector_retrieval=True,
vector_info=vector_info
)
print(f"Invoice Number: {invoice_data.get('invoice_number')}")
print(f"Date: {invoice_data.get('date')}")
print(f"Total: {invoice_data.get('total_amount')}")
Image Preprocessing
Apply preprocessing to improve OCR accuracy for low-quality images.
from upsonic.ocr import OCR
from upsonic.ocr.layer_1.engines import TesseractOCREngine
# Create engine with all preprocessing enabled
engine = TesseractOCREngine(
languages=['eng'],
rotation_fix=True, # Fix skewed/rotated images
enhance_contrast=True, # Improve text clarity
remove_noise=True, # Remove background noise
pdf_dpi=300 # High quality PDF rendering
)
# Create OCR orchestrator
ocr = OCR(layer_1_ocr_engine=engine)
# Process low-quality image
text = ocr.get_text('skewed_noisy_image.jpg')