Skip to content

Commit 8045216

Browse files
committed
feat: Add Nstproxy Proxies
1 parent a99cd37 commit 8045216

File tree

6 files changed

+233
-0
lines changed

6 files changed

+233
-0
lines changed

crawl4ai/async_configs.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
from typing import Union
33
import warnings
4+
import requests
45
from .config import (
56
DEFAULT_PROVIDER,
67
DEFAULT_PROVIDER_API_KEY,
@@ -649,6 +650,85 @@ def load(data: dict) -> "BrowserConfig":
649650
return config
650651
return BrowserConfig.from_kwargs(config)
651652

653+
def set_nstproxy(
654+
self,
655+
token: str,
656+
channel_id: str,
657+
country: str = "ANY",
658+
state: str = "",
659+
city: str = "",
660+
protocol: str = "http",
661+
session_duration: int = 10,
662+
):
663+
"""
664+
Fetch a proxy from NSTProxy API and automatically assign it to proxy_config.
665+
666+
Get your NSTProxy token from: https://app.nstproxy.com/profile
667+
668+
Args:
669+
token (str): NSTProxy API token.
670+
channel_id (str): NSTProxy channel ID.
671+
country (str, optional): Country code (default: "ANY").
672+
state (str, optional): State code (default: "").
673+
city (str, optional): City name (default: "").
674+
protocol (str, optional): Proxy protocol ("http" or "socks5"). Defaults to "http".
675+
session_duration (int, optional): Session duration in minutes (0 = rotate each request). Defaults to 10.
676+
677+
Raises:
678+
ValueError: If the API response format is invalid.
679+
PermissionError: If the API returns an error message.
680+
"""
681+
682+
# --- Validate input early ---
683+
if not token or not channel_id:
684+
raise ValueError("[NSTProxy] token and channel_id are required")
685+
686+
if protocol not in ("http", "socks5"):
687+
raise ValueError(f"[NSTProxy] Invalid protocol: {protocol}")
688+
689+
# --- Build NSTProxy API URL ---
690+
params = {
691+
"fType": 2,
692+
"count": 1,
693+
"channelId": channel_id,
694+
"country": country,
695+
"protocol": protocol,
696+
"sessionDuration": session_duration,
697+
"token": token,
698+
}
699+
if state:
700+
params["state"] = state
701+
if city:
702+
params["city"] = city
703+
704+
url = "https://api.nstproxy.com/api/v1/generate/apiproxies"
705+
706+
try:
707+
response = requests.get(url, params=params, timeout=10)
708+
response.raise_for_status()
709+
710+
data = response.json()
711+
712+
# --- Handle API error response ---
713+
if isinstance(data, dict) and data.get("err"):
714+
raise PermissionError(f"[NSTProxy] API Error: {data.get('msg', 'Unknown error')}")
715+
716+
if not isinstance(data, list) or not data:
717+
raise ValueError("[NSTProxy] Invalid API response — expected a non-empty list")
718+
719+
proxy_info = data[0]
720+
721+
# --- Apply proxy config ---
722+
self.proxy_config = ProxyConfig(
723+
server=f"{protocol}://{proxy_info['ip']}:{proxy_info['port']}",
724+
username=proxy_info["username"],
725+
password=proxy_info["password"],
726+
)
727+
728+
except Exception as e:
729+
print(f"[NSTProxy] ❌ Failed to set proxy: {e}")
730+
raise
731+
652732
class VirtualScrollConfig:
653733
"""Configuration for virtual scroll handling.
654734
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
NSTProxy Integration Examples for crawl4ai
3+
------------------------------------------
4+
5+
NSTProxy is a premium residential proxy provider.
6+
👉 Purchase Proxies: https://nstproxy.com
7+
💰 Use coupon code "crawl4ai" for 10% off your plan.
8+
9+
"""
10+
import asyncio, requests
11+
from crawl4ai import AsyncWebCrawler, BrowserConfig
12+
13+
14+
async def main():
15+
"""
16+
Example: Dynamically fetch a proxy from NSTProxy API before crawling.
17+
"""
18+
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
19+
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
20+
country = "ANY" # e.g. "ANY", "US", "DE"
21+
22+
# Fetch proxy from NSTProxy API
23+
api_url = (
24+
f"https://api.nstproxy.com/api/v1/generate/apiproxies"
25+
f"?fType=2&channelId={CHANNEL_ID}&country={country}"
26+
f"&protocol=http&sessionDuration=10&count=1&token={NST_TOKEN}"
27+
)
28+
response = requests.get(api_url, timeout=10).json()
29+
proxy = response[0]
30+
31+
ip = proxy.get("ip")
32+
port = proxy.get("port")
33+
username = proxy.get("username", "")
34+
password = proxy.get("password", "")
35+
36+
browser_config = BrowserConfig(proxy_config={
37+
"server": f"http://{ip}:{port}",
38+
"username": username,
39+
"password": password,
40+
})
41+
42+
async with AsyncWebCrawler(config=browser_config) as crawler:
43+
result = await crawler.arun(url="https://example.com")
44+
print("[API Proxy] Status:", result.status_code)
45+
46+
47+
if __name__ == "__main__":
48+
asyncio.run(main())
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""
2+
NSTProxy Integration Examples for crawl4ai
3+
------------------------------------------
4+
5+
NSTProxy is a premium residential proxy provider.
6+
👉 Purchase Proxies: https://nstproxy.com
7+
💰 Use coupon code "crawl4ai" for 10% off your plan.
8+
9+
"""
10+
import asyncio
11+
from crawl4ai import AsyncWebCrawler, BrowserConfig
12+
13+
14+
async def main():
15+
"""
16+
Example: Use NSTProxy with manual username/password authentication.
17+
"""
18+
19+
browser_config = BrowserConfig(proxy_config={
20+
"server": "http://gate.nstproxy.io:24125",
21+
"username": "your_username",
22+
"password": "your_password",
23+
})
24+
25+
async with AsyncWebCrawler(config=browser_config) as crawler:
26+
result = await crawler.arun(url="https://example.com")
27+
print("[Auth Proxy] Status:", result.status_code)
28+
29+
30+
if __name__ == "__main__":
31+
asyncio.run(main())
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
NSTProxy Integration Examples for crawl4ai
3+
------------------------------------------
4+
5+
NSTProxy is a premium residential proxy provider.
6+
👉 Purchase Proxies: https://nstproxy.com
7+
💰 Use coupon code "crawl4ai" for 10% off your plan.
8+
9+
"""
10+
import asyncio
11+
from crawl4ai import AsyncWebCrawler, BrowserConfig
12+
13+
14+
async def main():
15+
# Using HTTP proxy
16+
browser_config = BrowserConfig(proxy_config={"server": "http://gate.nstproxy.io:24125"})
17+
async with AsyncWebCrawler(config=browser_config) as crawler:
18+
result = await crawler.arun(url="https://example.com")
19+
print("[HTTP Proxy] Status:", result.status_code)
20+
21+
# Using SOCKS proxy
22+
browser_config = BrowserConfig(proxy_config={"server": "socks5://gate.nstproxy.io:24125"})
23+
async with AsyncWebCrawler(config=browser_config) as crawler:
24+
result = await crawler.arun(url="https://example.com")
25+
print("[SOCKS5 Proxy] Status:", result.status_code)
26+
27+
28+
if __name__ == "__main__":
29+
asyncio.run(main())
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
NSTProxy Integration Examples for crawl4ai
3+
------------------------------------------
4+
5+
NSTProxy is a premium residential proxy provider.
6+
👉 Purchase Proxies: https://nstproxy.com
7+
💰 Use coupon code "crawl4ai" for 10% off your plan.
8+
9+
"""
10+
import asyncio
11+
from crawl4ai import AsyncWebCrawler, BrowserConfig
12+
13+
14+
async def main():
15+
"""
16+
Example: Using NSTProxy with AsyncWebCrawler.
17+
"""
18+
19+
NST_TOKEN = "YOUR_NST_PROXY_TOKEN" # Get from https://app.nstproxy.com/profile
20+
CHANNEL_ID = "YOUR_NST_PROXY_CHANNEL_ID" # Your NSTProxy Channel ID
21+
22+
browser_config = BrowserConfig()
23+
browser_config.set_nstproxy(
24+
token=NST_TOKEN,
25+
channel_id=CHANNEL_ID,
26+
country="ANY", # e.g. "US", "JP", or "ANY"
27+
state="", # optional, leave empty if not needed
28+
city="", # optional, leave empty if not needed
29+
session_duration=0 # Session duration in minutes,0 = rotate on every request
30+
)
31+
32+
# === Run crawler ===
33+
async with AsyncWebCrawler(config=browser_config) as crawler:
34+
result = await crawler.arun(url="https://example.com")
35+
print("[Nstproxy] Status:", result.status_code)
36+
37+
38+
if __name__ == "__main__":
39+
asyncio.run(main())

docs/md_v2/core/examples.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ This page provides a comprehensive list of example scripts that demonstrate vari
1111
| Quickstart Set 1 | Basic examples for getting started with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_1.py) |
1212
| Quickstart Set 2 | More advanced examples for working with Crawl4AI. | [View Code](https://github.com/unclecode/crawl4ai/blob/main/docs/examples/quickstart_examples_set_2.py) |
1313

14+
## Proxies
15+
16+
| Example | Description | Link |
17+
|----------|--------------|------|
18+
| **NSTProxy** | [NSTProxy](https://www.nstproxy.com/?utm_source=crawl4ai) Seamlessly integrates with crawl4ai — no setup required. Access high-performance residential, datacenter, ISP, and IPv6 proxies with smart rotation and anti-blocking technology. Starts from $0.1/GB. Use code crawl4ai for 10% off. | [View Code](https://github.com/unclecode/crawl4ai/tree/main/docs/examples/proxy) |
19+
1420
## Browser & Crawling Features
1521

1622
| Example | Description | Link |

0 commit comments

Comments
 (0)