Complete API documentation for the Browser Controller component.
The main controller class for managing browser instances and sessions.
class BrowserController:
"""
Main Browser Controller class providing high-level browser automation interface.
Handles browser lifecycle, session management, and provides integration
with LAM (Large Action Model) systems.
"""def __init__(
self,
config: Optional[BrowserConfig] = None,
config_file: Optional[str] = None
) -> NoneParameters:
config(BrowserConfig, optional): Browser configuration objectconfig_file(str, optional): Path to JSON configuration file
Examples:
# Using configuration object
config = BrowserConfig(browser_type=BrowserType.CHROME, headless=True)
controller = BrowserController(config=config)
# Using configuration file
controller = BrowserController(config_file="browser_config.json")
# Using default configuration
controller = BrowserController()async def __aenter__(self) -> 'BrowserController'
async def __aexit__(self, exc_type, exc_val, exc_tb) -> NoneUsage:
async with BrowserController(config) as controller:
# Browser automatically launched
session = await controller.create_session()
# ... automation code ...
# Browser automatically closed when exitingasync def launch(self) -> NoneLaunch the browser instance.
Raises:
BrowserLaunchError: If browser fails to launchBrowserControllerError: General browser controller errors
Example:
controller = BrowserController(config)
await controller.launch()async def close(self) -> NoneClose the browser instance and cleanup all sessions.
Example:
await controller.close()def is_launched(self) -> boolCheck if browser is currently launched.
Returns:
bool: True if browser is running, False otherwise
async def create_session(
self,
session_config: Optional[Dict[str, Any]] = None
) -> BrowserSessionCreate a new browser session.
Parameters:
session_config(dict, optional): Session-specific configuration overrides
Returns:
BrowserSession: New browser session instance
Raises:
SessionError: If session creation fails
Example:
# Basic session creation
session = await controller.create_session()
# Session with custom configuration
session = await controller.create_session({
"window_size": (1920, 1080),
"user_agent": "Custom Agent"
})async def close_session(session_id: str) -> boolClose a specific browser session.
Parameters:
session_id(str): ID of session to close
Returns:
bool: True if session was closed successfully
Example:
success = await controller.close_session(session.session_id)def get_session(session_id: str) -> Optional[BrowserSession]Get session by ID.
Parameters:
session_id(str): Session identifier
Returns:
BrowserSessionorNone: Session instance if found
def get_active_sessions(self) -> List[str]Get list of active session IDs.
Returns:
List[str]: List of active session identifiers
def get_session_count(self) -> intGet number of active sessions.
Returns:
int: Count of active sessions
async def get_browser_info(self) -> Dict[str, Any]Get browser information and capabilities.
Returns:
Dict[str, Any]: Browser information including version, capabilities
Example:
info = await controller.get_browser_info()
print(f"Browser: {info['name']} {info['version']}")async def take_screenshot(
self,
file_path: Optional[str] = None
) -> Union[str, bytes]Take screenshot of current browser state.
Parameters:
file_path(str, optional): Path to save screenshot
Returns:
str: File path if saved to filebytes: Screenshot data if no file path provided
Individual browser session for page interaction.
class BrowserSession:
"""
Represents an individual browser session with full page interaction capabilities.
Provides methods for navigation, element interaction, form handling, and more.
"""@property
def session_id(self) -> str
"""Get the session identifier."""
@property
def config(self) -> BrowserConfig
"""Get the session configuration."""async def navigate_to(
self,
url: str,
wait_for_load: bool = True,
timeout: Optional[float] = None
) -> NavigationResultNavigate to a URL.
Parameters:
url(str): Target URLwait_for_load(bool): Whether to wait for page load completiontimeout(float, optional): Custom timeout in seconds
Returns:
NavigationResult: Navigation result with status and timing
Example:
result = await session.navigate_to("https://example.com")
print(f"Navigation took {result.load_time:.2f} seconds")async def get_current_url(self) -> strGet current page URL.
Returns:
str: Current URL
async def get_title(self) -> strGet current page title.
Returns:
str: Page title
async def get_dom(self) -> strGet the complete DOM (HTML source) of the current page.
Returns:
str: Complete HTML source code as string for DOM analyzer component
Raises:
BrowserControllerError: If DOM retrieval fails
Example:
# Get DOM for analysis
dom_html = await controller.get_dom()
print(f"DOM size: {len(dom_html):,} characters")
# Feed to DOM analyzer component
analysis_result = dom_analyzer.analyze(dom_html)Notes:
- Automatically launches browser if not already running
- Executes asynchronously to avoid blocking
- Logs DOM size and current URL for debugging
- Perfect for integration with LAM DOM analysis components
async def go_back(self) -> NoneNavigate back in browser history.
async def go_forward(self) -> NoneNavigate forward in browser history.
async def refresh(self) -> NoneRefresh current page.
async def find_element(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> Optional[WebElement]Find a single element on the page.
Parameters:
locator(ElementLocator): CSS selector, XPath, or element locatortimeout(float, optional): Custom timeout for element search
Returns:
WebElementorNone: Found element or None if not found
Examples:
# CSS selector
element = await session.find_element("button.submit")
# XPath
element = await session.find_element("//button[contains(@class, 'submit')]")
# With timeout
element = await session.find_element("div.loading", timeout=30)async def find_elements(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> List[WebElement]Find multiple elements on the page.
Parameters:
locator(ElementLocator): CSS selector, XPath, or element locatortimeout(float, optional): Custom timeout for element search
Returns:
List[WebElement]: List of found elements (empty if none found)
async def click_element(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> boolClick an element on the page.
Parameters:
locator(ElementLocator): Element locatortimeout(float, optional): Custom timeout
Returns:
bool: True if click was successful
Example:
success = await session.click_element("button#submit")
if success:
print("Button clicked successfully")async def type_text(
self,
locator: ElementLocator,
text: str,
clear_first: bool = True,
timeout: Optional[float] = None
) -> boolType text into an input element.
Parameters:
locator(ElementLocator): Element locatortext(str): Text to typeclear_first(bool): Whether to clear field before typingtimeout(float, optional): Custom timeout
Returns:
bool: True if typing was successful
Example:
success = await session.type_text("input[name='username']", "john_doe")async def get_element_text(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> strGet text content of an element.
Parameters:
locator(ElementLocator): Element locatortimeout(float, optional): Custom timeout
Returns:
str: Element text content
async def get_element_attribute(
self,
locator: ElementLocator,
attribute: str,
timeout: Optional[float] = None
) -> Optional[str]Get attribute value of an element.
Parameters:
locator(ElementLocator): Element locatorattribute(str): Attribute nametimeout(float, optional): Custom timeout
Returns:
strorNone: Attribute value or None if not found
Example:
value = await session.get_element_attribute("input[name='email']", "value")async def wait_for_element(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> Optional[WebElement]Wait for element to appear on page.
Parameters:
locator(ElementLocator): Element locatortimeout(float, optional): Maximum wait time
Returns:
WebElementorNone: Element if found within timeout
async def wait_for_element_to_disappear(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> boolWait for element to disappear from page.
Parameters:
locator(ElementLocator): Element locatortimeout(float, optional): Maximum wait time
Returns:
bool: True if element disappeared within timeout
async def wait_for_clickable(
self,
locator: ElementLocator,
timeout: Optional[float] = None
) -> Optional[WebElement]Wait for element to become clickable.
Parameters:
locator(ElementLocator): Element locatortimeout(float, optional): Maximum wait time
Returns:
WebElementorNone: Clickable element if found
async def take_screenshot(
self,
file_path: Optional[str] = None
) -> Union[str, bytes, bool]Take screenshot of current page.
Parameters:
file_path(str, optional): Path to save screenshot
Returns:
str: File path if saved to filebytes: Screenshot data if no file path providedbool: True if successful (for some implementations)
Example:
# Save to file
await session.take_screenshot("page.png")
# Get as bytes
screenshot_data = await session.take_screenshot()async def take_element_screenshot(
self,
locator: ElementLocator,
file_path: str,
timeout: Optional[float] = None
) -> boolTake screenshot of specific element.
Parameters:
locator(ElementLocator): Element locatorfile_path(str): Path to save screenshottimeout(float, optional): Custom timeout
Returns:
bool: True if screenshot was successful
async def execute_script(
self,
script: str,
*args
) -> AnyExecute JavaScript in browser context.
Parameters:
script(str): JavaScript code to executeargs: Arguments to pass to script
Returns:
Any: Script return value
Example:
title = await session.execute_script("return document.title;")
await session.execute_script("window.scrollTo(0, document.body.scrollHeight);")async def add_cookie(self, cookie_dict: Dict[str, Any]) -> NoneAdd cookie to browser session.
Parameters:
cookie_dict(dict): Cookie information
Example:
await session.add_cookie({
"name": "session_id",
"value": "abc123",
"domain": "example.com"
})async def get_cookies(self) -> List[Dict[str, Any]]Get all cookies from current domain.
Returns:
List[Dict[str, Any]]: List of cookie dictionaries
async def delete_all_cookies(self) -> NoneDelete all cookies from browser session.
Pydantic-based configuration class with validation.
class BrowserConfig(BaseModel):
"""
Browser configuration with Pydantic validation.
Supports environment variable loading and validation.
"""# Browser settings
browser_type: BrowserType = BrowserType.CHROME
headless: bool = True
window_size: Tuple[int, int] = (1280, 720)
# Timeouts
page_load_timeout: float = 30.0
implicit_wait: float = 10.0
script_timeout: float = 30.0
# Browser options
browser_options: Dict[str, Any] = {}
# Proxy settings
proxy_settings: Optional[Dict[str, str]] = None
# Logging
log_level: str = "INFO"
enable_logging: bool = TrueThe following environment variables are automatically loaded:
# Browser settings
BROWSER_TYPE=chrome # chrome, firefox, edge
HEADLESS=true # true, false
WINDOW_WIDTH=1280 # integer
WINDOW_HEIGHT=720 # integer
# Timeouts
PAGE_LOAD_TIMEOUT=30 # float seconds
IMPLICIT_WAIT=10 # float seconds
SCRIPT_TIMEOUT=30 # float seconds
# Logging
LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR
ENABLE_LOGGING=true # true, false
# Proxy (optional)
HTTP_PROXY=http://proxy:8080
HTTPS_PROXY=https://proxy:8080# Basic configuration
config = BrowserConfig()
# Custom configuration
config = BrowserConfig(
browser_type=BrowserType.CHROME,
headless=False,
window_size=(1920, 1080),
page_load_timeout=45,
browser_options={
"disable_images": True,
"user_agent": "Custom Agent"
}
)
# With proxy
config = BrowserConfig(
proxy_settings={
"http": "http://proxy:8080",
"https": "https://proxy:8080"
}
)Utility class for loading configuration from various sources.
class ConfigManager:
"""Manages configuration loading from files and environment."""@classmethod
def load_from_file(cls, file_path: str) -> BrowserConfigLoad configuration from JSON file.
@classmethod
def load_from_env(cls) -> BrowserConfigLoad configuration from environment variables.
class BrowserType(str, Enum):
CHROME = "chrome"
FIREFOX = "firefox"
EDGE = "edge"
SAFARI = "safari"class ActionType(str, Enum):
NAVIGATE = "navigate"
CLICK = "click"
TYPE = "type"
WAIT = "wait"
SCREENSHOT = "screenshot"class WaitStrategy(str, Enum):
PRESENCE = "presence"
VISIBLE = "visible"
CLICKABLE = "clickable"
INVISIBLE = "invisible"@dataclass
class NavigationResult:
url: str
success: bool
load_time: float
status_code: Optional[int] = None
error: Optional[str] = None@dataclass
class PageInfo:
url: str
title: str
source_length: int
viewport_size: Tuple[int, int]
ready_state: str@dataclass
class SessionMetadata:
session_id: str
created_at: datetime
last_activity: datetime
page_count: int
current_url: strElementLocator = Union[str, Tuple[str, str]] # CSS selector or (strategy, locator)
Coordinates = Tuple[int, int] # (x, y) coordinates
WindowSize = Tuple[int, int] # (width, height)def get_logger(name: str) -> LoggerGet a structured logger instance.
Parameters:
name(str): Logger name
Returns:
Logger: Loguru logger instance
Example:
from src.utils.logger import get_logger
logger = get_logger("MyAutomation")
logger.info("Starting automation", session_id="12345")
logger.error("Failed to find element", selector="button.submit")def configure_logging(
level: str = "INFO",
file_rotation: str = "10 MB",
retention: str = "30 days",
format: Optional[str] = None
) -> NoneConfigure logging system.
Parameters:
level(str): Log level (DEBUG, INFO, WARNING, ERROR)file_rotation(str): File rotation triggerretention(str): Log retention periodformat(str, optional): Custom log format
async def wait_for_condition(
session: BrowserSession,
condition: Callable,
timeout: float = 30,
poll_frequency: float = 0.5
) -> AnyWait for custom condition to be met.
Parameters:
session(BrowserSession): Browser sessioncondition(Callable): Condition function to checktimeout(float): Maximum wait timepoll_frequency(float): Check frequency in seconds
Returns:
Any: Condition result when met
BrowserControllerError
├── BrowserLaunchError
├── BrowserCloseError
├── SessionError
│ ├── SessionCreationError
│ ├── SessionTimeoutError
│ └── SessionCleanupError
├── NavigationError
│ ├── PageLoadError
│ └── NavigationTimeoutError
└── ElementError
├── ElementNotFoundError
├── ElementNotClickableError
└── ElementInteractionError
class BrowserControllerError(Exception):
"""Base exception for browser controller errors."""
def __init__(
self,
message: str,
session_id: Optional[str] = None,
url: Optional[str] = None,
**kwargs
):
self.message = message
self.session_id = session_id
self.url = url
self.extra_data = kwargs
super().__init__(self.message)class SessionError(BrowserControllerError):
"""Session-related errors."""class NavigationError(BrowserControllerError):
"""Navigation-related errors."""class ElementError(BrowserControllerError):
"""Element interaction errors."""
def __init__(
self,
message: str,
selector: Optional[str] = None,
**kwargs
):
self.selector = selector
super().__init__(message, **kwargs)import asyncio
from src.core.browser_controller import BrowserController
from src.config.browser_config import BrowserConfig
from src.types.browser_types import BrowserType
from src.utils.logger import get_logger
async def complete_api_example():
"""Demonstrates complete API usage"""
# Configure logging
logger = get_logger("APIExample")
# Create configuration
config = BrowserConfig(
browser_type=BrowserType.CHROME,
headless=True,
window_size=(1280, 720),
page_load_timeout=30
)
# Use browser controller
async with BrowserController(config) as controller:
logger.info("Browser controller initialized")
# Create session
session = await controller.create_session()
logger.info(f"Created session: {session.session_id}")
try:
# Navigate
result = await session.navigate_to("https://httpbin.org/forms/post")
logger.info(f"Navigation completed in {result.load_time:.2f}s")
# Interact with form
await session.type_text("input[name='custname']", "API Test User")
await session.type_text("input[name='custemail']", "test@api.com")
await session.click_element("input[value='medium']")
# Take screenshot
await session.take_screenshot("form_filled.png")
logger.info("Screenshot saved")
# Submit form
await session.click_element("input[type='submit']")
# Wait for response
await session.wait_for_element("pre", timeout=10)
# Get response data
response_text = await session.get_element_text("pre")
logger.info(f"Form response received: {len(response_text)} characters")
except Exception as e:
logger.error(f"API example failed: {e}")
finally:
# Cleanup
await controller.close_session(session.session_id)
logger.info("Session closed")
logger.info("API example completed")
# Run example
asyncio.run(complete_api_example())import asyncio
from src.core.browser_controller import BrowserController
from src.config.browser_config import BrowserConfig
from src.types.browser_types import BrowserType
async def dom_analysis_example():
"""Demonstrates DOM retrieval for analysis components"""
config = BrowserConfig(
browser_type=BrowserType.CHROME,
headless=True,
window_size=(1200, 800)
)
async with BrowserController(config) as controller:
# Navigate to target page
await controller.navigate_to("https://example.com")
# Get DOM for analysis
dom_html = await controller.get_dom()
print(f"Retrieved DOM: {len(dom_html):,} characters")
# Mock DOM analyzer integration
def analyze_dom(html_content):
"""Simulate DOM analysis component"""
import re
return {
"size": len(html_content),
"elements": {
"headings": len(re.findall(r'<h[1-6]', html_content, re.IGNORECASE)),
"paragraphs": len(re.findall(r'<p>', html_content, re.IGNORECASE)),
"links": len(re.findall(r'<a\s+[^>]*href', html_content, re.IGNORECASE)),
"forms": len(re.findall(r'<form', html_content, re.IGNORECASE))
},
"has_javascript": '<script' in html_content.lower(),
"has_css": '<style' in html_content.lower() or 'stylesheet' in html_content.lower()
}
# Analyze DOM
analysis = analyze_dom(dom_html)
print(f"DOM Analysis: {analysis}")
# Use with session context manager
async with controller.new_session() as session:
await session.navigate_to("https://httpbin.org/html")
# DOM retrieval works even with active session
dom_html = await controller.get_dom()
print(f"Session DOM size: {len(dom_html):,} characters")
# Run examples
asyncio.run(complete_api_example())
asyncio.run(dom_analysis_example())This completes the comprehensive API reference documentation for the Browser Controller. All classes, methods, parameters, and examples are documented with proper types and usage patterns.