Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions deploy/docker/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,17 @@ async def get_markdown(
body: MarkdownRequest,
_td: Dict = Depends(token_dep),
):
"""
Convert a web page into Markdown format.

Supports multiple extraction modes:
- fit (default): Readability-based extraction for clean content
- raw: Direct DOM to Markdown conversion
- bm25: BM25 relevance ranking with optional query
- llm: LLM-based summarization with optional query

Use this tool when you need clean, readable text from web pages.
"""
if not body.url.startswith(("http://", "https://")) and not body.url.startswith(("raw:", "raw://")):
raise HTTPException(
400, "Invalid URL format. Must start with http://, https://, or for raw HTML (raw:, raw://)")
Expand Down
52 changes: 52 additions & 0 deletions tests/unit/test_mcp_tool_docstrings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Test that MCP tools have proper docstrings for LLM tool descriptions.

This test uses AST parsing to avoid importing the server module which has
many dependencies that may not be available in a test environment.
"""

import ast
import os
from typing import Optional


def get_function_docstring(filepath: str, function_name: str) -> Optional[str]:
"""Parse a Python file and extract the docstring of a specific function."""
with open(filepath, 'r') as f:
tree = ast.parse(f.read())

for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
if node.name == function_name:
return ast.get_docstring(node)
return None


def test_get_markdown_has_docstring():
"""Test that the get_markdown endpoint has a docstring for MCP tool description."""
server_path = os.path.join(
os.path.dirname(__file__), '..', '..', 'deploy', 'docker', 'server.py'
)
docstring = get_function_docstring(server_path, 'get_markdown')

assert docstring is not None, "get_markdown should have a docstring"
assert len(docstring) > 0, "get_markdown docstring should not be empty"
assert "Markdown" in docstring or "markdown" in docstring, \
"get_markdown docstring should mention markdown"


def test_generate_html_has_docstring():
"""Test that the generate_html endpoint has a docstring for MCP tool description."""
server_path = os.path.join(
os.path.dirname(__file__), '..', '..', 'deploy', 'docker', 'server.py'
)
docstring = get_function_docstring(server_path, 'generate_html')

assert docstring is not None, "generate_html should have a docstring"
assert len(docstring) > 0, "generate_html docstring should not be empty"


if __name__ == "__main__":
test_get_markdown_has_docstring()
test_generate_html_has_docstring()
print("All docstring tests passed!")