Skip to content

Commit 6a728cb

Browse files
unclecodeclaude
andcommitted
feat: add stealth mode and enhance undetected browser support
- Add playwright-stealth integration with enable_stealth parameter in BrowserConfig - Merge undetected browser strategy into main async_crawler_strategy.py using adapter pattern - Add browser adapters (BrowserAdapter, PlaywrightAdapter, UndetectedAdapter) for flexible browser switching - Update install.py to install both playwright and patchright browsers automatically - Add comprehensive documentation for anti-bot features (stealth mode + undetected browser) - Create examples demonstrating stealth mode usage and comparison tests - Update pyproject.toml and requirements.txt with patchright>=1.49.0 and other dependencies - Remove duplicate/unused dependencies (alphashape, cssselect, pyperclip, shapely, selenium) - Add dependency checker tool in tests/check_dependencies.py Breaking changes: None - all existing functionality preserved 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 5c33cbc commit 6a728cb

27 files changed

+2833
-460
lines changed

crawl4ai/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@
8888
ErrorDetail
8989
)
9090

91+
# Browser Adapters
92+
from .browser_adapter import (
93+
BrowserAdapter,
94+
PlaywrightAdapter,
95+
UndetectedAdapter
96+
)
97+
9198
from .utils import (
9299
start_colab_display_server,
93100
setup_colab_environment
@@ -173,6 +180,10 @@
173180
"CompilationResult",
174181
"ValidationResult",
175182
"ErrorDetail",
183+
# Browser Adapters
184+
"BrowserAdapter",
185+
"PlaywrightAdapter",
186+
"UndetectedAdapter",
176187
]
177188

178189

crawl4ai/async_configs.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,8 @@ class BrowserConfig:
383383
light_mode (bool): Disables certain background features for performance gains. Default: False.
384384
extra_args (list): Additional command-line arguments passed to the browser.
385385
Default: [].
386+
enable_stealth (bool): If True, applies playwright-stealth to bypass basic bot detection.
387+
Cannot be used with use_undetected browser mode. Default: False.
386388
"""
387389

388390
def __init__(
@@ -423,6 +425,7 @@ def __init__(
423425
extra_args: list = None,
424426
debugging_port: int = 9222,
425427
host: str = "localhost",
428+
enable_stealth: bool = False,
426429
):
427430
self.browser_type = browser_type
428431
self.headless = headless
@@ -463,6 +466,7 @@ def __init__(
463466
self.verbose = verbose
464467
self.debugging_port = debugging_port
465468
self.host = host
469+
self.enable_stealth = enable_stealth
466470

467471
fa_user_agenr_generator = ValidUAGenerator()
468472
if self.user_agent_mode == "random":
@@ -494,6 +498,13 @@ def __init__(
494498
# If persistent context is requested, ensure managed browser is enabled
495499
if self.use_persistent_context:
496500
self.use_managed_browser = True
501+
502+
# Validate stealth configuration
503+
if self.enable_stealth and self.use_managed_browser and self.browser_mode == "builtin":
504+
raise ValueError(
505+
"enable_stealth cannot be used with browser_mode='builtin'. "
506+
"Stealth mode requires a dedicated browser instance."
507+
)
497508

498509
@staticmethod
499510
def from_kwargs(kwargs: dict) -> "BrowserConfig":
@@ -530,6 +541,7 @@ def from_kwargs(kwargs: dict) -> "BrowserConfig":
530541
extra_args=kwargs.get("extra_args", []),
531542
debugging_port=kwargs.get("debugging_port", 9222),
532543
host=kwargs.get("host", "localhost"),
544+
enable_stealth=kwargs.get("enable_stealth", False),
533545
)
534546

535547
def to_dict(self):
@@ -564,6 +576,7 @@ def to_dict(self):
564576
"verbose": self.verbose,
565577
"debugging_port": self.debugging_port,
566578
"host": self.host,
579+
"enable_stealth": self.enable_stealth,
567580
}
568581

569582

0 commit comments

Comments
 (0)