Skip to content

Commit ed9cbaf

Browse files
committed
tqdm implemented
Signed-off-by: Finbarrs Oketunji <f@finbarrs.eu>
1 parent 3ac0dd9 commit ed9cbaf

File tree

5 files changed

+8
-17
lines changed

5 files changed

+8
-17
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.5
1+
1.0.6

amazon_scraper/scraper.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import random
66
import argparse
7+
from tqdm import tqdm
78

89
class AmazonScraper:
910
user_agents = [
@@ -52,56 +53,48 @@ def start_scraping(self):
5253
self.product_writer.writerow(["product_name", "product_images", "number_of_reviews", "price", "product_url", "asin"])
5354
if self.review:
5455
self.review_writer.writerow(["product_name", "product_reviews", "product_url", "asin"])
55-
for page in range(1, self.pages + 1):
56+
for page in tqdm(range(1, self.pages + 1), desc="Scraping Pages"):
5657
url = self.url + "&page=" + str(page)
5758
headers = {"User-Agent": random.choice(self.user_agents)}
5859
response = requests.get(url, headers=headers)
5960
soup = BeautifulSoup(response.content, "html.parser")
6061
products = soup.find_all("div", {"class": "sg-col-inner"})
6162

6263
for product in products:
63-
# Product name
6464
name = product.find("span", {"class": "a-size-medium a-color-base a-text-normal"})
6565
if name is not None:
6666
name = name.text
6767
else:
68-
continue # Skip if no product name
68+
continue
6969

70-
# Product images
7170
images = product.find_all("img", {"class": "s-image"})
7271
if images is not None:
7372
images = [image['src'] for image in images]
7473
else:
7574
images = []
7675

77-
# Number of Reviews
7876
number_of_reviews = product.find("span", {"class": "a-size-base"})
7977
if number_of_reviews is not None:
8078
number_of_reviews = number_of_reviews.text
8179
else:
8280
number_of_reviews = ''
8381

84-
# Price
8582
price = product.find("span", {"class": "a-offscreen"})
8683
if price is not None:
8784
price = price.text
8885
else:
8986
price = ''
9087

91-
# Product URL
9288
product_url = product.find("a", {"class": "a-link-normal"})
9389
if product_url is not None:
9490
product_url = f'https://www.amazon.{self.locale}' + product_url['href']
9591
else:
9692
product_url = ''
9793

98-
# ASIN
9994
asin = product_url.split("/dp/")[1].split("/")[0] if "/dp/" in product_url else ''
10095

101-
# Write to CSV
10296
self.product_writer.writerow([name, ", ".join(images), number_of_reviews, price, product_url, asin])
10397

104-
# Add to JSON data
10598
self.product_json_data.append({
10699
"product_name": name,
107100
"product_images": images,
@@ -111,7 +104,6 @@ def start_scraping(self):
111104
"asin": asin
112105
})
113106

114-
# Product reviews
115107
if self.review:
116108
product_reviews = []
117109
review_url = f'https://www.amazon.{self.locale}/product-reviews/{asin}'
@@ -121,10 +113,8 @@ def start_scraping(self):
121113
for review in reviews:
122114
product_reviews.append(review.text.strip())
123115

124-
# Write to CSV
125116
self.review_writer.writerow([name, ", ".join(product_reviews), product_url, asin])
126117

127-
# Add to JSON data
128118
self.review_json_data.append({
129119
"product_name": name,
130120
"product_reviews": product_reviews,

amazon_scraper/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "1.0.5"
1+
VERSION = "1.0.6"

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
beautifulsoup4
2-
requests
2+
requests
3+
tqdm

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from distutils.core import Extension
66

77
NAME = "amazon_scrape"
8-
VERSION = "1.0.5"
8+
VERSION = "1.0.6"
99
REQUIRES = ["beautifulsoup4", "requests"]
1010

1111
# read the contents of your README file

0 commit comments

Comments
 (0)