Skip to content

Commit 6181815

Browse files
committed
Update BrowserUse and move browser initialization to utils
1 parent dcb1c09 commit 6181815

File tree

3 files changed

+196
-110
lines changed

3 files changed

+196
-110
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import logging
2+
import os
3+
from typing import List, Optional
4+
5+
from browser_use import Browser, BrowserConfig, BrowserContextConfig, Controller
6+
from browser_use.agent.views import ActionResult
7+
from browser_use.browser.context import BrowserContext
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
async def set_file_input(index: int, paths: str | List[str], browser: BrowserContext):
13+
"""
14+
Set the file input value to the given path or list of paths.
15+
16+
Args:
17+
index: The DOM element index to target
18+
paths: Local file path or list of local file paths to upload
19+
browser: Browser context for interaction
20+
21+
Returns:
22+
ActionResult: Result of the upload operation
23+
"""
24+
if isinstance(paths, str):
25+
paths = [paths]
26+
27+
for path in paths:
28+
if not os.path.exists(path):
29+
return ActionResult(error=f"File {path} does not exist")
30+
31+
dom_el = await browser.get_dom_element_by_index(index)
32+
file_upload_dom_el = dom_el.get_file_upload_element()
33+
34+
if file_upload_dom_el is None:
35+
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
36+
logger.info(msg)
37+
return ActionResult(error=msg)
38+
39+
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
40+
41+
if file_upload_el is None:
42+
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
43+
logger.info(msg)
44+
return ActionResult(error=msg)
45+
46+
try:
47+
await file_upload_el.set_input_files(paths)
48+
msg = f"Successfully set file input value to {paths}"
49+
logger.info(msg)
50+
return ActionResult(extracted_content=msg, include_in_memory=True)
51+
except Exception as e:
52+
msg = f"Failed to upload file to index {index}: {str(e)}"
53+
logger.info(msg)
54+
return ActionResult(error=msg)
55+
56+
57+
async def close_current_tab(browser: BrowserContext):
58+
await browser.close_current_tab()
59+
msg = "🔄 Closed current tab"
60+
logger.info(msg)
61+
return ActionResult(extracted_content=msg, include_in_memory=True)
62+
63+
64+
class BrowserInitializer:
65+
"""
66+
Initialize and cache browser and controller instances.
67+
68+
This class uses a singleton pattern to ensure we only create one browser
69+
instance throughout the application lifecycle, which saves resources.
70+
"""
71+
72+
_browser = None
73+
_controller = None
74+
_browser_context = None
75+
76+
@classmethod
77+
def init_browser(cls, config=BrowserConfig()):
78+
"""
79+
Initialize and cache the Browser instance.
80+
81+
Returns:
82+
Browser: Browser instance for web automation
83+
"""
84+
if cls._browser is not None:
85+
return cls._browser
86+
87+
cls._browser = Browser(config=config)
88+
return cls._browser
89+
90+
@classmethod
91+
def init_browser_context(cls, config: Optional[BrowserConfig], downloads_path: Optional[str] = None):
92+
"""
93+
Initialize and cache the BrowserContext instance.
94+
95+
Returns:
96+
BrowserContext: BrowserContext instance for managing browser context
97+
"""
98+
if cls._browser_context is not None:
99+
return cls._browser_context
100+
101+
if downloads_path and not os.path.exists(downloads_path):
102+
os.makedirs(downloads_path)
103+
104+
context_config = BrowserContextConfig(
105+
# cookies_file=cookies_file,
106+
browser_window_size={"width": 1920, "height": 1080},
107+
)
108+
browser = cls.init_browser(config=config)
109+
110+
class BrowserContextWithDownloadHandling(BrowserContext):
111+
async def handle_download(self, download):
112+
suggested_filename = download.suggested_filename
113+
unique_filename = await self._get_unique_filename(downloads_path, suggested_filename)
114+
download_path = os.path.join(downloads_path, unique_filename)
115+
await download.save_as(download_path)
116+
logger.info(f"Downloaded file saved to {download_path}")
117+
118+
async def _initialize_session(self):
119+
async def _download_listener(download):
120+
logger.info("[BUD] Download event triggered")
121+
await self.handle_download(download)
122+
return download
123+
124+
def _new_page_listener(page):
125+
logger.info("[BUD] Adding download event listener to page")
126+
page.on("download", _download_listener)
127+
return page
128+
129+
await super()._initialize_session()
130+
131+
logger.info("[BUD] Adding page event listener to context")
132+
self.session.context.on("page", _new_page_listener)
133+
134+
logger.info(f"[BUD] Adding download event listener to {len(self.session.context.pages)} existing pages")
135+
for page in self.session.context.pages:
136+
page.on("download", _download_listener)
137+
138+
cls._browser_context = (
139+
BrowserContextWithDownloadHandling(browser=browser, config=context_config)
140+
if downloads_path
141+
else BrowserContext(browser=browser, config=context_config)
142+
)
143+
return cls._browser_context
144+
145+
@classmethod
146+
def init_controller(cls):
147+
"""
148+
Initialize and cache the Controller instance.
149+
150+
Returns:
151+
Controller: Controller instance for managing browser actions
152+
"""
153+
if cls._controller is not None:
154+
return cls._controller
155+
156+
controller = Controller()
157+
158+
controller.action(
159+
"Set the value of a file input to the given path or list of paths",
160+
)(set_file_input)
161+
162+
controller.action(
163+
description="Close the tab that is currently active",
164+
)(close_current_tab)
165+
166+
cls._controller = controller
167+
return cls._controller

patchwork/steps/BrowserUse/BrowserUse.py

Lines changed: 24 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -10,111 +10,6 @@
1010

1111
logger = logging.getLogger(__name__)
1212

13-
# Global variables to cache browser initialization
14-
_browser = None
15-
_controller = None
16-
17-
18-
def init_browser():
19-
"""
20-
Initialize and cache browser and controller instances.
21-
22-
This function uses a singleton pattern to ensure we only create one browser
23-
instance throughout the application lifecycle, which saves resources.
24-
25-
Returns:
26-
tuple: (Browser, Controller) instances for web automation
27-
"""
28-
global _browser, _controller
29-
30-
# Return cached instances if already initialized
31-
if _browser is not None and _controller is not None:
32-
return _browser, _controller
33-
34-
from browser_use import Browser, BrowserConfig, BrowserContextConfig, Controller
35-
from browser_use.agent.views import ActionResult
36-
from browser_use.browser.context import BrowserContext
37-
38-
# Set up downloads directory for browser operations
39-
downloads_path = os.path.join(os.getcwd(), "downloads")
40-
if not os.path.exists(downloads_path):
41-
os.makedirs(downloads_path)
42-
43-
context_config = BrowserContextConfig(save_downloads_path=downloads_path)
44-
config = BrowserConfig(headless=True, disable_security=True, new_context_config=context_config)
45-
controller = Controller()
46-
47-
# Register custom action to upload files to web elements
48-
@controller.action(
49-
description="Upload file to interactive element with file path",
50-
)
51-
async def upload_file(index: int, path: str, browser: BrowserContext):
52-
"""
53-
Upload a file to a file input element identified by its index.
54-
55-
Args:
56-
index: The DOM element index to target
57-
path: Local file path to upload
58-
browser: Browser context for interaction
59-
60-
Returns:
61-
ActionResult: Result of the upload operation
62-
"""
63-
if not os.path.exists(path):
64-
return ActionResult(error=f"File {path} does not exist")
65-
66-
dom_el = await browser.get_dom_element_by_index(index)
67-
file_upload_dom_el = dom_el.get_file_upload_element()
68-
69-
if file_upload_dom_el is None:
70-
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
71-
logger.info(msg)
72-
return ActionResult(error=msg)
73-
74-
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
75-
76-
if file_upload_el is None:
77-
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
78-
logger.info(msg)
79-
return ActionResult(error=msg)
80-
81-
try:
82-
await file_upload_el.set_input_files(path)
83-
msg = f"Successfully uploaded file to index {index}"
84-
logger.info(msg)
85-
return ActionResult(extracted_content=msg, include_in_memory=True)
86-
except Exception as e:
87-
msg = f"Failed to upload file to index {index}: {str(e)}"
88-
logger.info(msg)
89-
return ActionResult(error=msg)
90-
91-
# Register custom action to read file contents
92-
@controller.action(description="Read the file content of a file given a path")
93-
async def read_file(path: str):
94-
"""
95-
Read and return the contents of a file at the specified path.
96-
97-
Args:
98-
path: Path to the file to read
99-
100-
Returns:
101-
ActionResult: File contents or error message
102-
"""
103-
if not os.path.exists(path):
104-
return ActionResult(error=f"File {path} does not exist")
105-
106-
with open(path, "r") as f:
107-
content = f.read()
108-
msg = f"File content: {content}"
109-
logger.info(msg)
110-
return ActionResult(extracted_content=msg, include_in_memory=True)
111-
112-
# Cache the initialized instances
113-
_browser = Browser(config=config)
114-
_controller = controller
115-
116-
return _browser, _controller
117-
11813

11914
class BrowserUse(Step, input_class=BrowserUseInputs, output_class=BrowserUseOutputs):
12015
"""
@@ -155,9 +50,13 @@ def __init__(self, inputs):
15550
api_key=self.inputs["anthropic_api_key"],
15651
)
15752

53+
gifs_base_path = os.path.join(os.path.dirname(__file__), "../../../tmp/gifs")
54+
if not os.path.exists(gifs_base_path):
55+
os.makedirs(gifs_base_path)
56+
15857
# Configure GIF generation for debugging/visualization
15958
self.generate_gif = (
160-
f"agent_history_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.gif"
59+
f"{gifs_base_path}/agent_history_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.gif"
16160
if ("generate_gif" in self.inputs and self.inputs["generate_gif"])
16261
or ("debug" in self.inputs and self.inputs["debug"])
16362
else False
@@ -173,21 +72,37 @@ def run(self) -> dict:
17372
Returns:
17473
dict: Results of the browser automation task
17574
"""
176-
from browser_use import Agent
75+
from browser_use import Agent, BrowserConfig
76+
77+
from patchwork.common.utils.browser_initializer import BrowserInitializer
78+
79+
browser_config = BrowserConfig(
80+
headless=self.inputs.get("headless", True),
81+
disable_security=True,
82+
)
83+
browser_context = BrowserInitializer.init_browser_context(
84+
browser_config, self.inputs.get("downloads_path", None)
85+
)
86+
controller = BrowserInitializer.init_controller()
87+
logger.info("Browser initialized")
17788

178-
browser, controller = init_browser()
17989
agent = Agent(
180-
browser=browser,
90+
browser_context=browser_context,
18191
controller=controller,
18292
task=mustache_render(self.inputs["task"], self.inputs["task_value"]),
18393
llm=self.llm,
18494
generate_gif=self.generate_gif,
18595
validate_output=True,
96+
initial_actions=self.inputs.get("initial_actions", None),
97+
use_vision=self.inputs.get("use_vision", True),
18698
)
18799

188100
# Run the agent in an event loop
189101
loop = asyncio.new_event_loop()
190102
self.history = loop.run_until_complete(agent.run())
103+
loop.run_until_complete(browser_context.close())
104+
loop.run_until_complete(browser_context.browser.close())
105+
loop.close()
191106

192107
# Format results as JSON if schema provided
193108
if "example_json" in self.inputs:

patchwork/steps/BrowserUse/typed.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing_extensions import Annotated, Any, Dict, Optional, TypedDict
1+
from typing_extensions import Annotated, Any, Dict, Optional, TypedDict, List
22

33
from patchwork.common.utils.step_typing import StepTypeConfig
44

@@ -14,6 +14,10 @@ class BrowserUseInputs(__BrowserUseInputsRequired, total=False):
1414
anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
1515
google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
1616
generate_gif: Optional[bool]
17+
headless: Optional[bool]
18+
initial_actions: Optional[List[Dict[str, Dict[str, Any]]]]
19+
downloads_path: Optional[str]
20+
use_vision: Optional[bool]
1721

1822

1923
class BrowserUseOutputs(TypedDict):

0 commit comments

Comments
 (0)