Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ adaptive-crawler = [
"jaro-winkler>=2.0.3",
"playwright>=1.27.0",
"scikit-learn>=1.6.0",
"apify_fingerprint_datapoints>=0.0.2",
"browserforge>=1.2.3"
"apify_fingerprint_datapoints>=0.0.3",
"browserforge>=1.2.4"
]
beautifulsoup = ["beautifulsoup4[lxml]>=4.12.0", "html5lib>=1.0"]
cli = ["cookiecutter>=2.6.0", "inquirer>=3.3.0", "rich>=13.9.0", "typer>=0.12.0"]
Expand Down
46 changes: 0 additions & 46 deletions src/crawlee/_browserforge_workaround.py

This file was deleted.

6 changes: 0 additions & 6 deletions src/crawlee/browsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
# ruff: noqa: E402, TID252

from crawlee._utils.try_import import install_import_hook as _install_import_hook
from crawlee._utils.try_import import try_import as _try_import

_install_import_hook(__name__)

# Due to patch_browserforge
from .._browserforge_workaround import patch_browserforge

patch_browserforge()

# The following imports are wrapped in try_import to handle optional dependencies,
# ensuring the module can still function even if these dependencies are missing.
Expand Down
7 changes: 0 additions & 7 deletions src/crawlee/fingerprint_suite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
# ruff: noqa: E402, TID252

# Due to patch_browserforge
from .._browserforge_workaround import patch_browserforge

patch_browserforge()

from ._browserforge_adapter import BrowserforgeFingerprintGenerator as DefaultFingerprintGenerator
from ._fingerprint_generator import FingerprintGenerator
from ._header_generator import HeaderGenerator
Expand Down
10 changes: 3 additions & 7 deletions src/crawlee/fingerprint_suite/_browserforge_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from copy import deepcopy
from functools import reduce
from operator import or_
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal

import apify_fingerprint_datapoints
from browserforge.bayesian_network import extract_json
from browserforge.fingerprints import Fingerprint as bf_Fingerprint
from browserforge.fingerprints import FingerprintGenerator as bf_FingerprintGenerator
from browserforge.fingerprints import Screen
from browserforge.headers.generator import DATA_DIR, ListOrString
from browserforge.headers.generator import HeaderGenerator as bf_HeaderGenerator
from browserforge.headers.generator import ListOrString
from typing_extensions import override

from crawlee._utils.docs import docs_group
Expand Down Expand Up @@ -252,11 +252,7 @@ def generate(self, browser_type: SupportedBrowserType = 'chrome') -> dict[str, s

def get_available_header_network() -> dict:
"""Get header network that contains possible header values."""
if Path(DATA_DIR / 'header-network.zip').is_file():
return extract_json(DATA_DIR / 'header-network.zip')
if Path(DATA_DIR / 'header-network-definition.zip').is_file():
return extract_json(DATA_DIR / 'header-network-definition.zip')
raise FileNotFoundError('Missing header-network file.')
return extract_json(apify_fingerprint_datapoints.get_header_network())


def get_available_header_values(header_network: dict, node_name: str | set[str]) -> set[str]:
Expand Down
11 changes: 6 additions & 5 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.