Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/crewai-tools/src/crewai_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@
from crewai_tools.tools.weaviate_tool.vector_search import WeaviateVectorSearchTool
from crewai_tools.tools.website_search.website_search_tool import WebsiteSearchTool
from crewai_tools.tools.xml_search_tool.xml_search_tool import XMLSearchTool
from crewai_tools.tools.you_contents_tool.you_contents_tool import YouContentsTool
from crewai_tools.tools.you_research_tool.you_research_tool import YouResearchTool
from crewai_tools.tools.you_search_tool.you_search_tool import YouSearchTool
from crewai_tools.tools.youtube_channel_search_tool.youtube_channel_search_tool import (
YoutubeChannelSearchTool,
)
Expand Down Expand Up @@ -285,6 +288,9 @@
"WeaviateVectorSearchTool",
"WebsiteSearchTool",
"XMLSearchTool",
"YouContentsTool",
"YouResearchTool",
"YouSearchTool",
"YoutubeChannelSearchTool",
"YoutubeVideoSearchTool",
"ZapierActionTool",
Expand Down
6 changes: 6 additions & 0 deletions lib/crewai-tools/src/crewai_tools/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@
from crewai_tools.tools.youtube_video_search_tool.youtube_video_search_tool import (
YoutubeVideoSearchTool,
)
from crewai_tools.tools.you_contents_tool.you_contents_tool import YouContentsTool
from crewai_tools.tools.you_research_tool.you_research_tool import YouResearchTool
from crewai_tools.tools.you_search_tool.you_search_tool import YouSearchTool
Comment thread
cursor[bot] marked this conversation as resolved.
from crewai_tools.tools.zapier_action_tool.zapier_action_tool import ZapierActionTools


Expand Down Expand Up @@ -268,6 +271,9 @@
"WeaviateVectorSearchTool",
"WebsiteSearchTool",
"XMLSearchTool",
"YouContentsTool",
"YouResearchTool",
"YouSearchTool",
"YoutubeChannelSearchTool",
"YoutubeVideoSearchTool",
"ZapierActionTools",
Expand Down
127 changes: 127 additions & 0 deletions lib/crewai-tools/src/crewai_tools/tools/you_contents_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# YouContentsTool

Extracts content from web pages using the You.com Contents API.

## Installation

1. Get your API key from https://you.com/platform/api-keys

2. Install dependencies and set your API key:

```bash
uv add requests aiohttp
export YOU_API_KEY="your-api-key-here"
```

## Usage

```python
from crewai_tools import YouContentsTool

# Basic usage - single URL
contents_tool = YouContentsTool()
content = contents_tool.run(urls="https://example.com")

# Multiple URLs
content = contents_tool.run(urls=[
"https://example.com/page1",
"https://example.com/page2"
])

# With advanced options
contents_tool = YouContentsTool(
formats=["markdown", "metadata"],
crawl_timeout=45
)
content = contents_tool.run(urls="https://example.com")
```

## Parameters

- **urls** (required): Single URL string or list of URL strings (must be valid URLs)
- **formats**: List of output formats - "markdown", "html", or "metadata" (default: ["markdown"])
- **crawl_timeout**: Page crawling timeout in seconds, 1-60 (default: 10)
- **timeout**: API request timeout in seconds (default: 60)

## Output Formats

### Markdown
Clean text extraction, best for reading and analysis.

```python
contents_tool = YouContentsTool(formats=["markdown"])
result = contents_tool.run(urls="https://example.com")
# Returns: { "url": "...", "title": "...", "markdown": "# Content..." }
```

### HTML
Preserves layout and structure, best for rendering.

```python
contents_tool = YouContentsTool(formats=["html"])
result = contents_tool.run(urls="https://example.com")
# Returns: { "url": "...", "title": "...", "html": "<html>..." }
```

### Metadata
Structured data including OpenGraph and JSON-LD information.

```python
contents_tool = YouContentsTool(formats=["metadata"])
result = contents_tool.run(urls="https://example.com")
# Returns: {
# "url": "...",
# "title": "...",
# "metadata": {
# "site_name": "Example Site",
# "favicon_url": "https://example.com/favicon.ico"
# }
# }
```

### Multiple Formats
Request multiple formats in a single API call.

```python
contents_tool = YouContentsTool(formats=["markdown", "html", "metadata"])
result = contents_tool.run(urls="https://example.com")
# Returns all requested formats in the response
```

## With CrewAI

```python
from crewai import Agent, Task
from crewai_tools import YouContentsTool

contents_tool = YouContentsTool(formats=["markdown"])

analyzer = Agent(
role="Content Analyzer",
goal="Extract and analyze web content",
backstory="Expert at content extraction and analysis",
tools=[contents_tool]
)

task = Task(
description="Extract content from https://example.com and summarize key points",
agent=analyzer,
expected_output="Summary of key points from the webpage"
)
```

## Combining with YouSearchTool

```python
from crewai_tools import YouSearchTool, YouContentsTool

search_tool = YouSearchTool(count=5)
contents_tool = YouContentsTool()

# Search for pages
results = search_tool.run(query="Python async programming")

# Extract content from top results
# (you would parse the JSON results and extract URLs)
content = contents_tool.run(urls=["url1", "url2"])
```
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import json
import os
from typing import Any, Literal

from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, Field
import requests


class YouContentsToolSchema(BaseModel):
"""Input for YouContentsTool."""

urls: list[str] | str = Field(
...,
description="The URL(s) to extract content from. Can be a single URL or a list of URLs.",
)
Comment thread
cursor[bot] marked this conversation as resolved.


class YouContentsTool(BaseTool):
"""A tool that extracts content from web pages using the You.com Contents API."""

name: str = "You.com Contents Extractor"
description: str = (
"Extracts content from one or more web pages using the You.com Contents API. "
"Returns structured data in markdown, HTML, or metadata format."
)
args_schema: type[BaseModel] = YouContentsToolSchema
contents_url: str = "https://ydc-index.io/v1/contents"
formats: list[Literal["markdown", "html", "metadata"]] = Field(
default_factory=lambda: ["markdown"],
)
crawl_timeout: int | None = 10
timeout: int = 60
env_vars: list[EnvVar] = Field(
default_factory=lambda: [
EnvVar(
name="YOU_API_KEY",
description="API key for You.com contents extraction service",
required=True,
),
],
)

def __init__(self, *args, **kwargs):
"""Initialize the YouContentsTool.

Args:
*args: Positional arguments.
**kwargs: Keyword arguments.
"""
super().__init__(*args, **kwargs)
if "YOU_API_KEY" not in os.environ:
raise ValueError(
"YOU_API_KEY environment variable is required for YouContentsTool. "
"Get your API key at https://you.com/platform/api-keys",
)

def _run(
self,
urls: list[str] | str,
) -> str:
"""Extract content from the given URL(s).

Args:
urls: The URL(s) to extract content from.

Returns:
JSON string containing the extracted content.
"""
try:
# Normalize urls to list
url_list = [urls] if isinstance(urls, str) else urls

if not url_list:
raise ValueError("At least one URL is required")

# Build request payload
payload: dict[str, Any] = {
"urls": url_list,
"formats": self.formats,
}

# Add crawl timeout if specified (must be between 1-60)
if self.crawl_timeout is not None:
payload["crawl_timeout"] = min(max(self.crawl_timeout, 1), 60)

# Setup request headers
headers = {
"X-API-Key": os.environ["YOU_API_KEY"],
"Content-Type": "application/json",
}

# Make API request
response = requests.post(
self.contents_url,
headers=headers,
json=payload,
timeout=self.timeout,
)
response.raise_for_status()
results = response.json()

return json.dumps(results, indent=2)

except requests.RequestException as e:
return f"Error extracting content: {e!s}"
except ValueError as e:
return f"Invalid parameters: {e!s}"
except KeyError as e:
return f"Error parsing API response: {e!s}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# YouResearchTool

Performs comprehensive, multi-step research using the You.com Research API.

## Installation

1. Get your API key from https://you.com/platform/api-keys

2. Install dependencies and set your API key:

```bash
uv add requests
export YOU_API_KEY="your-api-key-here"
```

## Usage

```python
from crewai_tools import YouResearchTool

# Basic usage
research_tool = YouResearchTool()
result = research_tool.run(input="What are the latest advances in quantum computing?")

# With effort level control
result = research_tool.run(
input="What measurable actions improved air quality in major cities over the past decade?",
research_effort="deep"
)

# Configure default effort level at initialization
research_tool = YouResearchTool(research_effort="exhaustive")
result = research_tool.run(input="Explain the economic impact of renewable energy adoption globally")
```

## Parameters

- **input** (required): The research question or complex query (max 40,000 characters)
- **research_effort**: Controls depth vs. speed tradeoff (default: "standard")
- `lite`: Fast answers, good for straightforward questions
- `standard`: Balanced speed and depth, fits most questions
- `deep`: More thorough cross-referencing, use when accuracy matters
- `exhaustive`: Most comprehensive, best for complex research tasks
- **timeout**: API request timeout in seconds (default: 120)

## Response Format

```json
{
"output": {
"content": "Comprehensive answer with inline citations [[1]], [[2]]...",
"content_type": "text",
"sources": [
{
"url": "https://example.com/article",
"title": "Article Title",
"snippets": ["Relevant excerpt from the source..."]
}
]
}
}
```

## With CrewAI

```python
from crewai import Agent, Task
from crewai_tools import YouResearchTool

research_tool = YouResearchTool(research_effort="deep")

researcher = Agent(
role="Research Analyst",
goal="Conduct thorough research on complex topics",
backstory="Expert at synthesizing information from multiple sources",
tools=[research_tool]
)

task = Task(
description="Research the long-term economic effects of remote work adoption",
agent=researcher,
expected_output="Comprehensive analysis with cited sources"
)
```

## Combining with Other You.com Tools

```python
from crewai_tools import YouSearchTool, YouResearchTool, YouContentsTool

# Quick search for recent results
search_tool = YouSearchTool(count=5)

# Deep research for complex questions
research_tool = YouResearchTool(research_effort="deep")

# Extract full content from specific pages
contents_tool = YouContentsTool()
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from crewai_tools.tools.you_research_tool.you_research_tool import YouResearchTool

__all__ = ["YouResearchTool"]
Loading