File tree Expand file tree Collapse file tree 2 files changed +3
-9
lines changed Expand file tree Collapse file tree 2 files changed +3
-9
lines changed Original file line number Diff line number Diff line change @@ -32,8 +32,6 @@ def get_all_website_links(url):
3232 """
3333 # all URLs of `url`
3434 urls = set ()
35- # domain name of the URL without the protocol
36- domain_name = urlparse (url ).netloc
3735 soup = BeautifulSoup (requests .get (url ).content , "html.parser" )
3836 for a_tag in soup .findAll ("a" ):
3937 href = a_tag .attrs .get ("href" )
@@ -89,16 +87,15 @@ def crawl(url, max_urls=30):
8987 args = parser .parse_args ()
9088 url = args .url
9189 max_urls = args .max_urls
92-
90+ # domain name of the URL without the protocol
91+ domain_name = urlparse (url ).netloc
9392 crawl (url , max_urls = max_urls )
9493
9594 print ("[+] Total Internal links:" , len (internal_urls ))
9695 print ("[+] Total External links:" , len (external_urls ))
9796 print ("[+] Total URLs:" , len (external_urls ) + len (internal_urls ))
9897 print ("[+] Total crawled URLs:" , max_urls )
9998
100- domain_name = urlparse (url ).netloc
101-
10299 # save the internal links to a file
103100 with open (f"{ domain_name } _internal_links.txt" , "w" ) as f :
104101 for internal_link in internal_urls :
Original file line number Diff line number Diff line change @@ -32,8 +32,6 @@ def get_all_website_links(url):
3232 """
3333 # all URLs of `url`
3434 urls = set ()
35- # domain name of the URL without the protocol
36- domain_name = urlparse (url ).netloc
3735 # initialize an HTTP session
3836 session = HTMLSession ()
3937 # make HTTP request & retrieve response
@@ -98,15 +96,14 @@ def crawl(url, max_urls=30):
9896 args = parser .parse_args ()
9997 url = args .url
10098 max_urls = args .max_urls
101-
99+ domain_name = urlparse ( url ). netloc
102100 crawl (url , max_urls = max_urls )
103101
104102 print ("[+] Total Internal links:" , len (internal_urls ))
105103 print ("[+] Total External links:" , len (external_urls ))
106104 print ("[+] Total URLs:" , len (external_urls ) + len (internal_urls ))
107105 print ("[+] Total crawled URLs:" , max_urls )
108106
109- domain_name = urlparse (url ).netloc
110107
111108 # save the internal links to a file
112109 with open (f"{ domain_name } _internal_links.txt" , "w" ) as f :
You can’t perform that action at this time.
0 commit comments