Skip to content

Commit 2dc6588

Browse files
committed
fix: remove_overlay_elements functionality by calling injected JS function. ref: unclecode#1396
- Fix critical bug where overlay removal JS function was injected but never called - Change remove_overlay_elements() to properly execute the injected async function - Wrap JS execution in async to handle the async overlay removal logic - Add test_remove_overlay_elements() test case to verify functionality works - Ensure overlay elements (cookie banners, popups, modals) are actually removed The remove_overlay_elements feature now works as intended: - Before: Function definition injected but never executed (silent failure) - After: Function injected and called, successfully removing overlay elements
1 parent 3fe49a7 commit 2dc6588

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

crawl4ai/async_crawler_strategy.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,9 +1383,10 @@ async def remove_overlay_elements(self, page: Page) -> None:
13831383
try:
13841384
await self.adapter.evaluate(page,
13851385
f"""
1386-
(() => {{
1386+
(async () => {{
13871387
try {{
1388-
{remove_overlays_js}
1388+
const removeOverlays = {remove_overlays_js};
1389+
await removeOverlays();
13891390
return {{ success: true }};
13901391
}} catch (error) {{
13911392
return {{

tests/general/test_async_crawler_strategy.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,5 +364,19 @@ async def test_network_error_handling():
364364
async with AsyncPlaywrightCrawlerStrategy() as strategy:
365365
await strategy.crawl("https://invalid.example.com", config)
366366

367+
@pytest.mark.asyncio
368+
async def test_remove_overlay_elements(crawler_strategy):
369+
config = CrawlerRunConfig(
370+
remove_overlay_elements=True,
371+
delay_before_return_html=5,
372+
)
373+
374+
response = await crawler_strategy.crawl(
375+
"https://www2.hm.com/en_us/index.html",
376+
config
377+
)
378+
assert response.status_code == 200
379+
assert "Accept all cookies" not in response.html
380+
367381
if __name__ == "__main__":
368382
pytest.main([__file__, "-v"])

0 commit comments

Comments
 (0)