Skip to content

Commit 34c0996

Browse files
committed
fix: Add CDP endpoint verification with exponential backoff for managed browsers (unclecode#1445)
browser_manager: - Add CDP endpoint verification with retry logic and exponential backoff - Call verification before connecting to CDP in `start()` method - Graceful handling of timing issues during browser startup test_cdp_strategy: - Fix cookie persistence test by adding storage state management - Fix session management test to work with managed browser architecture - Add comprehensive CDP timing tests covering: - Fast startup scenarios - Delayed browser startup simulation - Exponential backoff behavior validation - Concurrent browser connections - Stress testing with multiple successive startups - Retry count verification Impact: - Eliminates browser startup failures due to CDP timing issues - Provides robust fallback with automatic retries - Maintains fast startup when CDP is immediately available - Comprehensive test coverage ensures reliability Resolves CDP connection timing issues in managed browser mode.
1 parent 3fe49a7 commit 34c0996

File tree

2 files changed

+301
-36
lines changed

2 files changed

+301
-36
lines changed

crawl4ai/browser_manager.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,11 @@ async def start(self):
658658
if self.config.cdp_url or self.config.use_managed_browser:
659659
self.config.use_managed_browser = True
660660
cdp_url = await self.managed_browser.start() if not self.config.cdp_url else self.config.cdp_url
661+
662+
# Add CDP endpoint verification before connecting
663+
if not await self._verify_cdp_ready(cdp_url):
664+
raise Exception(f"CDP endpoint at {cdp_url} is not ready after startup")
665+
661666
self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url)
662667
contexts = self.browser.contexts
663668
if contexts:
@@ -678,6 +683,24 @@ async def start(self):
678683

679684
self.default_context = self.browser
680685

686+
async def _verify_cdp_ready(self, cdp_url: str) -> bool:
687+
"""Verify CDP endpoint is ready with exponential backoff"""
688+
import aiohttp
689+
self.logger.debug(f"Starting CDP verification for {cdp_url}", tag="BROWSER")
690+
for attempt in range(5):
691+
try:
692+
async with aiohttp.ClientSession() as session:
693+
async with session.get(f"{cdp_url}/json/version", timeout=aiohttp.ClientTimeout(total=2)) as response:
694+
if response.status == 200:
695+
self.logger.debug(f"CDP endpoint ready after {attempt + 1} attempts", tag="BROWSER")
696+
return True
697+
except Exception as e:
698+
self.logger.debug(f"CDP check attempt {attempt + 1} failed: {e}", tag="BROWSER")
699+
delay = 0.5 * (1.4 ** attempt)
700+
self.logger.debug(f"Waiting {delay:.2f}s before next CDP check...", tag="BROWSER")
701+
await asyncio.sleep(delay)
702+
self.logger.debug(f"CDP verification failed after 5 attempts", tag="BROWSER")
703+
return False
681704

682705
def _build_browser_args(self) -> dict:
683706
"""Build browser launch arguments from config."""

0 commit comments

Comments
 (0)