Skip to content

Commit f7a3366

Browse files
unclecode#1375 : refactor(proxy) Deprecate 'proxy' parameter in BrowserConfig and enhance proxy string parsing
- Updated ProxyConfig.from_string to support multiple proxy formats, including URLs with credentials. - Deprecated the 'proxy' parameter in BrowserConfig, replacing it with 'proxy_config' for better flexibility. - Added warnings for deprecated usage and clarified behavior when both parameters are provided. - Updated documentation and tests to reflect changes in proxy configuration handling.
1 parent 4e1c4bd commit f7a3366

File tree

9 files changed

+188
-42
lines changed

9 files changed

+188
-42
lines changed

crawl4ai/async_configs.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from typing import Union
3+
import warnings
34
from .config import (
45
DEFAULT_PROVIDER,
56
DEFAULT_PROVIDER_API_KEY,
@@ -257,24 +258,39 @@ def _extract_ip_from_server(self) -> Optional[str]:
257258

258259
@staticmethod
259260
def from_string(proxy_str: str) -> "ProxyConfig":
260-
"""Create a ProxyConfig from a string in the format 'ip:port:username:password'."""
261-
parts = proxy_str.split(":")
262-
if len(parts) == 4: # ip:port:username:password
261+
"""Create a ProxyConfig from a string.
262+
263+
Supported formats:
264+
- 'http://username:password@ip:port'
265+
- 'http://ip:port'
266+
- 'socks5://ip:port'
267+
- 'ip:port:username:password'
268+
- 'ip:port'
269+
"""
270+
s = (proxy_str or "").strip()
271+
# URL with credentials
272+
if "@" in s and "://" in s:
273+
auth_part, server_part = s.split("@", 1)
274+
protocol, credentials = auth_part.split("://", 1)
275+
if ":" in credentials:
276+
username, password = credentials.split(":", 1)
277+
return ProxyConfig(
278+
server=f"{protocol}://{server_part}",
279+
username=username,
280+
password=password,
281+
)
282+
# URL without credentials (keep scheme)
283+
if "://" in s and "@" not in s:
284+
return ProxyConfig(server=s)
285+
# Colon separated forms
286+
parts = s.split(":")
287+
if len(parts) == 4:
263288
ip, port, username, password = parts
264-
return ProxyConfig(
265-
server=f"http://{ip}:{port}",
266-
username=username,
267-
password=password,
268-
ip=ip
269-
)
270-
elif len(parts) == 2: # ip:port only
289+
return ProxyConfig(server=f"http://{ip}:{port}", username=username, password=password)
290+
if len(parts) == 2:
271291
ip, port = parts
272-
return ProxyConfig(
273-
server=f"http://{ip}:{port}",
274-
ip=ip
275-
)
276-
else:
277-
raise ValueError(f"Invalid proxy string format: {proxy_str}")
292+
return ProxyConfig(server=f"http://{ip}:{port}")
293+
raise ValueError(f"Invalid proxy string format: {proxy_str}")
278294

279295
@staticmethod
280296
def from_dict(proxy_dict: Dict) -> "ProxyConfig":
@@ -438,6 +454,7 @@ def __init__(
438454
host: str = "localhost",
439455
enable_stealth: bool = False,
440456
):
457+
441458
self.browser_type = browser_type
442459
self.headless = headless
443460
self.browser_mode = browser_mode
@@ -450,13 +467,23 @@ def __init__(
450467
if self.browser_type in ["firefox", "webkit"]:
451468
self.channel = ""
452469
self.chrome_channel = ""
470+
if proxy:
471+
warnings.warn("The 'proxy' parameter is deprecated and will be removed in a future release. Use 'proxy_config' instead.", DeprecationWarning)
453472
self.proxy = proxy
454473
self.proxy_config = proxy_config
455474
if isinstance(self.proxy_config, dict):
456475
self.proxy_config = ProxyConfig.from_dict(self.proxy_config)
457476
if isinstance(self.proxy_config, str):
458477
self.proxy_config = ProxyConfig.from_string(self.proxy_config)
459-
478+
479+
if self.proxy and self.proxy_config:
480+
warnings.warn("Both 'proxy' and 'proxy_config' are provided. 'proxy_config' will take precedence.", UserWarning)
481+
print(f"[DEBUG] Both proxy and proxy_config provided. Setting proxy to None.")
482+
self.proxy = None
483+
elif self.proxy:
484+
# Convert proxy string to ProxyConfig if proxy_config is not provided
485+
self.proxy_config = ProxyConfig.from_string(self.proxy)
486+
self.proxy = None
460487

461488
self.viewport_width = viewport_width
462489
self.viewport_height = viewport_height

crawl4ai/browser_manager.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .config import DOWNLOAD_PAGE_TIMEOUT
1616
from .async_configs import BrowserConfig, CrawlerRunConfig
1717
from .utils import get_chromium_path
18+
import warnings
1819

1920

2021
BROWSER_DISABLE_OPTIONS = [
@@ -741,17 +742,18 @@ def _build_browser_args(self) -> dict:
741742
)
742743
os.makedirs(browser_args["downloads_path"], exist_ok=True)
743744

744-
if self.config.proxy or self.config.proxy_config:
745+
if self.config.proxy:
746+
warnings.warn(
747+
"BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
748+
DeprecationWarning,
749+
)
750+
if self.config.proxy_config:
745751
from playwright.async_api import ProxySettings
746752

747-
proxy_settings = (
748-
ProxySettings(server=self.config.proxy)
749-
if self.config.proxy
750-
else ProxySettings(
751-
server=self.config.proxy_config.server,
752-
username=self.config.proxy_config.username,
753-
password=self.config.proxy_config.password,
754-
)
753+
proxy_settings = ProxySettings(
754+
server=self.config.proxy_config.server,
755+
username=self.config.proxy_config.username,
756+
password=self.config.proxy_config.password,
755757
)
756758
browser_args["proxy"] = proxy_settings
757759

deploy/docker/c4ai-code-context.md

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7520,17 +7520,18 @@ class BrowserManager:
75207520
)
75217521
os.makedirs(browser_args["downloads_path"], exist_ok=True)
75227522

7523-
if self.config.proxy or self.config.proxy_config:
7523+
if self.config.proxy:
7524+
warnings.warn(
7525+
"BrowserConfig.proxy is deprecated and ignored. Use proxy_config instead.",
7526+
DeprecationWarning,
7527+
)
7528+
if self.config.proxy_config:
75247529
from playwright.async_api import ProxySettings
75257530

7526-
proxy_settings = (
7527-
ProxySettings(server=self.config.proxy)
7528-
if self.config.proxy
7529-
else ProxySettings(
7530-
server=self.config.proxy_config.server,
7531-
username=self.config.proxy_config.username,
7532-
password=self.config.proxy_config.password,
7533-
)
7531+
proxy_settings = ProxySettings(
7532+
server=self.config.proxy_config.server,
7533+
username=self.config.proxy_config.username,
7534+
password=self.config.proxy_config.password,
75347535
)
75357536
browser_args["proxy"] = proxy_settings
75367537

docs/md_v2/advanced/proxy-security.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ Simple proxy configuration with `BrowserConfig`:
77
```python
88
from crawl4ai.async_configs import BrowserConfig
99

10-
# Using proxy URL
11-
browser_config = BrowserConfig(proxy="http://proxy.example.com:8080")
10+
# Using HTTP proxy
11+
browser_config = BrowserConfig(proxy_config={"server": "http://proxy.example.com:8080"})
1212
async with AsyncWebCrawler(config=browser_config) as crawler:
1313
result = await crawler.arun(url="https://example.com")
1414

1515
# Using SOCKS proxy
16-
browser_config = BrowserConfig(proxy="socks5://proxy.example.com:1080")
16+
browser_config = BrowserConfig(proxy_config={"server": "socks5://proxy.example.com:1080"})
1717
async with AsyncWebCrawler(config=browser_config) as crawler:
1818
result = await crawler.arun(url="https://example.com")
1919
```
@@ -25,7 +25,11 @@ Use an authenticated proxy with `BrowserConfig`:
2525
```python
2626
from crawl4ai.async_configs import BrowserConfig
2727

28-
browser_config = BrowserConfig(proxy="http://[username]:[password]@[host]:[port]")
28+
browser_config = BrowserConfig(proxy_config={
29+
"server": "http://[host]:[port]",
30+
"username": "[username]",
31+
"password": "[password]",
32+
})
2933
async with AsyncWebCrawler(config=browser_config) as crawler:
3034
result = await crawler.arun(url="https://example.com")
3135
```

docs/md_v2/api/parameters.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ browser_cfg = BrowserConfig(
2323
| **`headless`** | `bool` (default: `True`) | Headless means no visible UI. `False` is handy for debugging. |
2424
| **`viewport_width`** | `int` (default: `1080`) | Initial page width (in px). Useful for testing responsive layouts. |
2525
| **`viewport_height`** | `int` (default: `600`) | Initial page height (in px). |
26-
| **`proxy`** | `str` (default: `None`) | Single-proxy URL if you want all traffic to go through it, e.g. `"http://user:pass@proxy:8080"`. |
26+
| **`proxy`** | `str` (deprecated) | Deprecated. Use `proxy_config` instead. If set, it will be auto-converted internally. |
2727
| **`proxy_config`** | `dict` (default: `None`) | For advanced or multi-proxy needs, specify details like `{"server": "...", "username": "...", ...}`. |
2828
| **`use_persistent_context`** | `bool` (default: `False`) | If `True`, uses a **persistent** browser context (keep cookies, sessions across runs). Also sets `use_managed_browser=True`. |
2929
| **`user_data_dir`** | `str or None` (default: `None`) | Directory to store user data (profiles, cookies). Must be set if you want permanent sessions. |
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import sys
2+
import warnings
3+
4+
from crawl4ai.async_configs import BrowserConfig, ProxyConfig
5+
6+
7+
def main() -> int:
8+
warnings.simplefilter("always", DeprecationWarning)
9+
10+
# Case 1: Using deprecated proxy string should emit DeprecationWarning and auto-convert
11+
captured = []
12+
proxy_str = "23.95.150.145:6114:username:password"
13+
with warnings.catch_warnings(record=True) as w:
14+
cfg = BrowserConfig(proxy=proxy_str, headless=True)
15+
captured = [m for m in w if issubclass(m.category, DeprecationWarning)]
16+
17+
if not captured:
18+
print("[FAIL] No DeprecationWarning emitted for BrowserConfig(proxy=...) usage.")
19+
return 1
20+
21+
if cfg.proxy is not None:
22+
print("[FAIL] cfg.proxy should be None after auto-conversion.")
23+
return 1
24+
25+
if not isinstance(cfg.proxy_config, ProxyConfig):
26+
print("[FAIL] cfg.proxy_config should be a ProxyConfig instance after auto-conversion.")
27+
return 1
28+
29+
# Basic sanity checks on auto-parsed proxy_config
30+
if not cfg.proxy_config.server or ":" not in (cfg.proxy_config.server or ""):
31+
print("[FAIL] proxy_config.server appears invalid after conversion:", cfg.proxy_config.server)
32+
return 1
33+
34+
if not cfg.proxy_config.username or not cfg.proxy_config.password:
35+
print("[FAIL] proxy_config credentials missing after conversion.")
36+
return 1
37+
38+
print("[OK] DeprecationWarning captured and proxy auto-converted to proxy_config.")
39+
40+
# Case 2: Using proxy_config directly should not emit DeprecationWarning
41+
with warnings.catch_warnings(record=True) as w2:
42+
cfg2 = BrowserConfig(
43+
proxy_config={
44+
"server": "http://127.0.0.1:8080",
45+
"username": "u",
46+
"password": "p",
47+
},
48+
headless=True,
49+
)
50+
51+
if any(issubclass(m.category, DeprecationWarning) for m in w2):
52+
print("[FAIL] Unexpected DeprecationWarning when using proxy_config.")
53+
return 1
54+
55+
if cfg2.proxy is not None:
56+
print("[FAIL] cfg2.proxy should be None (only proxy_config should be used).")
57+
return 1
58+
59+
if not isinstance(cfg2.proxy_config, ProxyConfig):
60+
print("[FAIL] cfg2.proxy_config should be a ProxyConfig instance.")
61+
return 1
62+
63+
print("[OK] proxy_config path works without deprecation warnings.")
64+
print("All checks passed.")
65+
return 0
66+
67+
68+
if __name__ == "__main__":
69+
raise SystemExit(main())
70+

tests/async/test_0.4.2_browser_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ async def test_proxy_settings():
112112
headless=True,
113113
verbose=False,
114114
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36",
115-
proxy="http://127.0.0.1:8080", # Assuming local proxy server for test
115+
proxy_config={"server": "http://127.0.0.1:8080"}, # Assuming local proxy server for test
116116
use_managed_browser=False,
117117
use_persistent_context=False,
118118
) as crawler:

tests/memory/test_docker_config_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
# --- BrowserConfig variants ---
2525
"BrowserConfig()",
2626
"BrowserConfig(headless=False, extra_args=['--disable-gpu'])",
27-
"BrowserConfig(browser_mode='builtin', proxy='http://1.2.3.4:8080')",
27+
"BrowserConfig(browser_mode='builtin', proxy_config={'server': 'http://1.2.3.4:8080'})",
2828
]
2929

3030
for code in CASES:
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import warnings
2+
3+
import pytest
4+
5+
from crawl4ai.async_configs import BrowserConfig, ProxyConfig
6+
7+
8+
def test_browser_config_proxy_string_emits_deprecation_and_autoconverts():
9+
warnings.simplefilter("always", DeprecationWarning)
10+
11+
proxy_str = "23.95.150.145:6114:username:password"
12+
with warnings.catch_warnings(record=True) as caught:
13+
cfg = BrowserConfig(proxy=proxy_str, headless=True)
14+
15+
dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
16+
assert dep_warnings, "Expected DeprecationWarning when using BrowserConfig(proxy=...)"
17+
18+
assert cfg.proxy is None, "cfg.proxy should be None after auto-conversion"
19+
assert isinstance(cfg.proxy_config, ProxyConfig), "cfg.proxy_config should be ProxyConfig instance"
20+
assert cfg.proxy_config.username == "username"
21+
assert cfg.proxy_config.password == "password"
22+
assert cfg.proxy_config.server.startswith("http://")
23+
assert cfg.proxy_config.server.endswith(":6114")
24+
25+
26+
def test_browser_config_with_proxy_config_emits_no_deprecation():
27+
warnings.simplefilter("always", DeprecationWarning)
28+
29+
with warnings.catch_warnings(record=True) as caught:
30+
cfg = BrowserConfig(
31+
headless=True,
32+
proxy_config={
33+
"server": "http://127.0.0.1:8080",
34+
"username": "u",
35+
"password": "p",
36+
},
37+
)
38+
39+
dep_warnings = [w for w in caught if issubclass(w.category, DeprecationWarning)]
40+
assert not dep_warnings, "Did not expect DeprecationWarning when using proxy_config"
41+
assert cfg.proxy is None
42+
assert isinstance(cfg.proxy_config, ProxyConfig)

0 commit comments

Comments
 (0)