Documentation Index
Fetch the complete documentation index at: https://docs.upsonic.ai/llms.txt
Use this file to discover all available pages before exploring further.
This task example shows how to build an Upsonic LLM agent that:
- Reuses the
find_company_website agent to find and validate the official website of a company (via Serper API)
- Chains the result into the
extract_categories tool to scrape ecommerce sales categories from that website
Overview
The Find Sales Categories example demonstrates how to build composite agents that combine multiple tools and agents to perform complex tasks. It showcases:
- Agent Composition: Combining website discovery with content extraction
- Tool Integration: Using custom tools alongside Upsonic agents
- E-commerce Analysis: Extracting business intelligence from company websites
- Structured Output: Returning organized category information
Key Features
- Agent Orchestration: Combines multiple agents and tools in a single workflow
- E-commerce Intelligence: Extracts sales categories from company websites
- Smart Filtering: Filters out non-sales related navigation items
- Flexible Extraction: Works with various website structures and designs
- Reusable Components: Modular design for easy extension
Code Structure
@tool
def extract_categories(website_url: str) -> list[str]:
"""
Tool: Extract ecommerce sales categories from a website.
"""
if not website_url:
return []
try:
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(website_url, headers=headers, timeout=10)
resp.raise_for_status()
except Exception as e:
print(f"Error fetching {website_url}: {e}")
return []
soup = BeautifulSoup(resp.text, "html.parser")
candidate_roots = soup.select(
"nav, header, [class*='menu'], [id*='menu'], [class*='nav'], "
"[id*='nav'], [class*='category'], [id*='category'], [class*='departments']"
)
if not candidate_roots:
candidate_roots = [soup]
parsed_url = urlparse(website_url)
base_domain = parsed_url.netloc
disallowed = {
"home","about","contact","blog","support","faq","login","signup",
"account","search","cart","wishlist","privacy","terms","careers"
}
cats, seen = [], set()
for root in candidate_roots:
for link in root.find_all("a", href=True):
text = link.get_text(" ", strip=True)
if not text:
continue
clean = re.sub(r"\s+", " ", text).strip()
lower = clean.lower()
if len(lower) < 3 or len(lower) > 40:
continue
if lower in disallowed:
continue
if lower in seen:
continue
if not any(c.isalpha() for c in lower):
continue
seen.add(lower)
cats.append(clean)
return cats
Main Agent
def find_sales_categories(company_name: str) -> dict:
"""
Given a company name, find its website and extract sales categories.
"""
# Step 1: Use existing agent to find & validate website
website_result = find_company_website(company_name)
if not website_result.website:
return {"website": "", "categories": [], "reason": website_result.reason}
# Step 2: Extract categories from that website
categories = extract_categories(str(website_result.website))
return {
"website": str(website_result.website),
"categories": categories,
"validated": website_result.validated,
"score": website_result.score
}
Complete Implementation
import re
from urllib.parse import urljoin, urlparse
import requests
from bs4 import BeautifulSoup
from upsonic.tools import tool
@tool
def extract_categories(website_url: str) -> list[str]:
"""
Tool: Extract ecommerce sales categories from a website.
"""
if not website_url:
return []
try:
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(website_url, headers=headers, timeout=10)
resp.raise_for_status()
except Exception as e:
print(f"Error fetching {website_url}: {e}")
return []
soup = BeautifulSoup(resp.text, "html.parser")
candidate_roots = soup.select(
"nav, header, [class*='menu'], [id*='menu'], [class*='nav'], "
"[id*='nav'], [class*='category'], [id*='category'], [class*='departments']"
)
if not candidate_roots:
candidate_roots = [soup]
parsed_url = urlparse(website_url)
base_domain = parsed_url.netloc
disallowed = {
"home","about","contact","blog","support","faq","login","signup",
"account","search","cart","wishlist","privacy","terms","careers"
}
cats, seen = [], set()
for root in candidate_roots:
for link in root.find_all("a", href=True):
text = link.get_text(" ", strip=True)
if not text:
continue
clean = re.sub(r"\s+", " ", text).strip()
lower = clean.lower()
if len(lower) < 3 or len(lower) > 40:
continue
if lower in disallowed:
continue
if lower in seen:
continue
if not any(c.isalpha() for c in lower):
continue
seen.add(lower)
cats.append(clean)
return cats
find_sales_categories.py
import sys, os, argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from upsonic import Direct, Task
from examples.find_company_website.find_company_website import find_company_website
from examples.find_sales_categories.category_extractor import extract_categories
# Define a new agent
sales_category_agent = Direct(name="sales_category_agent")
# Define the orchestration function
def find_sales_categories(company_name: str) -> dict:
"""
Given a company name, find its website and extract sales categories.
"""
# Step 1: Use existing agent to find & validate website
website_result = find_company_website(company_name)
if not website_result.website:
return {"website": "", "categories": [], "reason": website_result.reason}
# Step 2: Extract categories from that website
categories = extract_categories(str(website_result.website))
return {
"website": str(website_result.website),
"categories": categories,
"validated": website_result.validated,
"score": website_result.score
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Find sales categories for a company")
parser.add_argument("--company", required=True, help="Company name, e.g. 'Nike'")
args = parser.parse_args()
# Create a task
task = Task(
description=f"Find website and extract sales categories for {args.company}",
tools=[find_company_website, extract_categories],
agent=sales_category_agent
)
# Execute the task
result = sales_category_agent.do(task)
print(f"\nResult for {args.company}: {result}")
How It Works
The flow is split into two reusable components:
1. Website Finder
- Uses Serper to search for the company
- Validates candidate websites
- Returns the best match as a structured
WebsiteResponse
- Fetches the website HTML
- Looks for navigation and menu elements
- Extracts category names, filtering out non-sales links
3. Sales Categories Agent
- Orchestrates the two steps above
- Input: Company name
- Output: JSON containing website, validation info, and extracted categories
Usage
Setup
- Install dependencies:
- Copy the example environment file and add your Serper API key:
- Edit
.env and replace the placeholder with your key:
SERPER_API_KEY=your_api_key_here
You can get a free API key from Serper.dev.
Run the Agent
Run the sales categories agent with any company name:
uv run examples/find_sales_categories/find_sales_categories.py --company "Nike"
Example output:
Result for Nike: The official website for Nike is [https://www.nike.com/](https://www.nike.com/).
The sales categories on Nike's website include:
- Men: Shoes, Clothing, Accessories
- Women: Bras, Leggings, Skirts & Dresses, Tops
- Kids: Big Kids, Little Kids, Baby & Toddler
- Sports: Basketball, Soccer, Running, Training, Golf
- Collections: Nike Air, Nike FlyEase, Nike React
- Sale: Discounted Shoes, Clothing, Accessories
You can replace “Nike” with any other company, e.g.:
uv run examples/find_sales_categories/find_sales_categories.py --company "Mavi"
uv run examples/find_sales_categories/find_sales_categories.py --company "Adidas"
Advanced Usage
Custom Category Filtering
def extract_categories_with_filter(website_url: str, allowed_keywords: list[str]) -> list[str]:
"""Extract categories that match specific keywords."""
all_categories = extract_categories(website_url)
filtered = []
for category in all_categories:
if any(keyword.lower() in category.lower() for keyword in allowed_keywords):
filtered.append(category)
return filtered
Batch Company Analysis
def analyze_multiple_companies(companies: list[str]) -> dict:
"""Analyze sales categories for multiple companies."""
results = {}
for company in companies:
result = find_sales_categories(company)
results[company] = result
return results
Enhanced Category Analysis
class CategoryAnalysis(BaseModel):
company: str
website: str
categories: list[str]
category_count: int
main_categories: list[str]
subcategories: list[str]
confidence_score: float
def analyze_categories_detailed(company: str) -> CategoryAnalysis:
"""Perform detailed category analysis."""
result = find_sales_categories(company)
# Analyze category structure
main_categories = [cat for cat in result["categories"] if len(cat.split()) <= 2]
subcategories = [cat for cat in result["categories"] if len(cat.split()) > 2]
return CategoryAnalysis(
company=company,
website=result["website"],
categories=result["categories"],
category_count=len(result["categories"]),
main_categories=main_categories,
subcategories=subcategories,
confidence_score=result.get("score", 0.0)
)
Use Cases
- Competitive Analysis: Understand competitor product categories and structure
- Market Research: Analyze how companies organize their product offerings
- E-commerce Intelligence: Gather insights about business models and focus areas
- Category Mapping: Map product categories across different companies
- Business Development: Identify potential partnership opportunities
- Content Strategy: Understand how companies structure their navigation
File Structure
examples/find_sales_categories/
├── find_sales_categories.py # Agent: orchestrates website finder + category extractor
├── category_extractor.py # Tool: scrapes ecommerce categories
└── README.md # Documentation
Notes
- This agent depends on the
find_company_website example. Make sure you have its code and .env setup in place
- Modular Design: Components can be reused independently
- Smart Filtering: Automatically excludes non-sales navigation items
- Flexible Extraction: Works with various website structures
- Error Handling: Graceful handling of failed requests and invalid websites
Dependencies
This example requires:
- Serper API: For website discovery
- Find Company Website: For website validation
- BeautifulSoup: For HTML parsing
- Upsonic Framework: For agent orchestration
Repository
View the complete example: Find Sales Categories Example