1+ """
2+ Test the complete fix for both the filter serialization and JSON serialization issues.
3+ """
4+
5+ import asyncio
6+ import httpx
7+
8+ from crawl4ai import BrowserConfig , CacheMode , CrawlerRunConfig
9+ from crawl4ai .deep_crawling import BFSDeepCrawlStrategy , FilterChain , URLPatternFilter
10+
11+ BASE_URL = "http://localhost:11234/" # Adjust port as needed
12+
13+ async def test_with_docker_client ():
14+ """Test using the Docker client (same as 1419.py)."""
15+ from crawl4ai .docker_client import Crawl4aiDockerClient
16+
17+ print ("=" * 60 )
18+ print ("Testing with Docker Client" )
19+ print ("=" * 60 )
20+
21+ try :
22+ async with Crawl4aiDockerClient (
23+ base_url = BASE_URL ,
24+ verbose = True ,
25+ ) as client :
26+
27+ # Create filter chain - testing the serialization fix
28+ filter_chain = [
29+ URLPatternFilter (
30+ # patterns=["*about*", "*privacy*", "*terms*"],
31+ patterns = ["*advanced*" ],
32+ reverse = True
33+ ),
34+ ]
35+
36+ crawler_config = CrawlerRunConfig (
37+ deep_crawl_strategy = BFSDeepCrawlStrategy (
38+ max_depth = 2 , # Keep it shallow for testing
39+ # max_pages=5, # Limit pages for testing
40+ filter_chain = FilterChain (filter_chain )
41+ ),
42+ cache_mode = CacheMode .BYPASS ,
43+ )
44+
45+ print ("\n 1. Testing crawl with filters..." )
46+ results = await client .crawl (
47+ ["https://docs.crawl4ai.com" ], # Simple test page
48+ browser_config = BrowserConfig (headless = True ),
49+ crawler_config = crawler_config ,
50+ )
51+
52+ if results :
53+ print (f"✅ Crawl succeeded! Type: { type (results )} " )
54+ if hasattr (results , 'success' ):
55+ print (f"✅ Results success: { results .success } " )
56+ # Test that we can iterate results without JSON errors
57+ if hasattr (results , '__iter__' ):
58+ for i , result in enumerate (results ):
59+ if hasattr (result , 'url' ):
60+ print (f" Result { i } : { result .url [:50 ]} ..." )
61+ else :
62+ print (f" Result { i } : { str (result )[:50 ]} ..." )
63+ else :
64+ # Handle list of results
65+ print (f"✅ Got { len (results )} results" )
66+ for i , result in enumerate (results [:3 ]): # Show first 3
67+ print (f" Result { i } : { result .url [:50 ]} ..." )
68+ else :
69+ print ("❌ Crawl failed - no results returned" )
70+ return False
71+
72+ print ("\n ✅ Docker client test completed successfully!" )
73+ return True
74+
75+ except Exception as e :
76+ print (f"❌ Docker client test failed: { e } " )
77+ import traceback
78+ traceback .print_exc ()
79+ return False
80+
81+
82+ async def test_with_rest_api ():
83+ """Test using REST API directly."""
84+ print ("\n " + "=" * 60 )
85+ print ("Testing with REST API" )
86+ print ("=" * 60 )
87+
88+ # Create filter configuration
89+ deep_crawl_strategy_payload = {
90+ "type" : "BFSDeepCrawlStrategy" ,
91+ "params" : {
92+ "max_depth" : 2 ,
93+ # "max_pages": 5,
94+ "filter_chain" : {
95+ "type" : "FilterChain" ,
96+ "params" : {
97+ "filters" : [
98+ {
99+ "type" : "URLPatternFilter" ,
100+ "params" : {
101+ "patterns" : ["*advanced*" ],
102+ "reverse" : True
103+ }
104+ }
105+ ]
106+ }
107+ }
108+ }
109+ }
110+
111+ crawl_payload = {
112+ "urls" : ["https://docs.crawl4ai.com" ],
113+ "browser_config" : {"type" : "BrowserConfig" , "params" : {"headless" : True }},
114+ "crawler_config" : {
115+ "type" : "CrawlerRunConfig" ,
116+ "params" : {
117+ "deep_crawl_strategy" : deep_crawl_strategy_payload ,
118+ "cache_mode" : "bypass"
119+ }
120+ }
121+ }
122+
123+ try :
124+ async with httpx .AsyncClient () as client :
125+ print ("\n 1. Sending crawl request to REST API..." )
126+ response = await client .post (
127+ f"{ BASE_URL } crawl" ,
128+ json = crawl_payload ,
129+ timeout = 30
130+ )
131+
132+ if response .status_code == 200 :
133+ print (f"✅ REST API returned 200 OK" )
134+ data = response .json ()
135+ if data .get ("success" ):
136+ results = data .get ("results" , [])
137+ print (f"✅ Got { len (results )} results" )
138+ for i , result in enumerate (results [:3 ]):
139+ print (f" Result { i } : { result .get ('url' , 'unknown' )[:50 ]} ..." )
140+ else :
141+ print (f"❌ Crawl not successful: { data } " )
142+ return False
143+ else :
144+ print (f"❌ REST API returned { response .status_code } " )
145+ print (f" Response: { response .text [:500 ]} " )
146+ return False
147+
148+ print ("\n ✅ REST API test completed successfully!" )
149+ return True
150+
151+ except Exception as e :
152+ print (f"❌ REST API test failed: { e } " )
153+ import traceback
154+ traceback .print_exc ()
155+ return False
156+
157+
158+ async def main ():
159+ """Run all tests."""
160+ print ("\n 🧪 TESTING COMPLETE FIX FOR DOCKER FILTER AND JSON ISSUES" )
161+ print ("=" * 60 )
162+ print ("Make sure the server is running with the updated code!" )
163+ print ("=" * 60 )
164+
165+ results = []
166+
167+ # Test 1: Docker client
168+ docker_passed = await test_with_docker_client ()
169+ results .append (("Docker Client" , docker_passed ))
170+
171+ # Test 2: REST API
172+ rest_passed = await test_with_rest_api ()
173+ results .append (("REST API" , rest_passed ))
174+
175+ # Summary
176+ print ("\n " + "=" * 60 )
177+ print ("FINAL TEST SUMMARY" )
178+ print ("=" * 60 )
179+
180+ all_passed = True
181+ for test_name , passed in results :
182+ status = "✅ PASSED" if passed else "❌ FAILED"
183+ print (f"{ test_name :20} { status } " )
184+ if not passed :
185+ all_passed = False
186+
187+ print ("=" * 60 )
188+ if all_passed :
189+ print ("🎉 ALL TESTS PASSED! Both issues are fully resolved!" )
190+ print ("\n The fixes:" )
191+ print ("1. Filter serialization: Fixed by not serializing private __slots__" )
192+ print ("2. JSON serialization: Fixed by removing property descriptors from model_dump()" )
193+ else :
194+ print ("⚠️ Some tests failed. Please check the server logs for details." )
195+
196+ return 0 if all_passed else 1
197+
198+
199+ if __name__ == "__main__" :
200+ import sys
201+ sys .exit (asyncio .run (main ()))
0 commit comments