Skip to content
This repository was archived by the owner on Apr 17, 2025. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def launchBrowser(self):
# Initiate the Browser webdriver
currentfolder = os.path.dirname(os.path.abspath(inspect.stack()[0][1]))
# Check which operating system is being used !
if platform == "linux" or platform == "linux2":
if platform in ["linux", "linux2"]:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function PluralCourse.launchBrowser refactored with the following changes:

  • Replace multiple comparisons of same variable with in operator (merge-comparisons)

# linux
chrome_driver = currentfolder+"/chromedriver"
elif platform == "win32":
Expand Down Expand Up @@ -100,14 +100,27 @@ def downloadEpisodes(self):
self.createDir(self.output+"/"+slugify(ModuleTitles[i]))
#For each list items(li) in the each list(ul) ,Get the titles (h3)
ModuleEpisodesList = [elt.find_element_by_tag_name('h3').text for elt in [elt for elt in Modules[i].find_elements_by_tag_name('li')]]
for j in range(len(ModuleEpisodesList)):
self.createDir(self.output+"/"+slugify(ModuleTitles[i])+"/"+slugify(ModuleEpisodesList[j]))
for item in ModuleEpisodesList:
self.createDir(self.output+"/"+slugify(ModuleTitles[i])+"/" + slugify(item))
# Get the episode elemnt
self.browser.find_element_by_xpath("//*[contains(text(), '"+ModuleEpisodesList[j]+"')]").click()
self.browser.find_element_by_xpath(
"//*[contains(text(), '" + item + "')]"
).click()

time.sleep(self.delay*1.5)
self.pausePlayback()
print("Downloading : ",slugify(ModuleEpisodesList[j])+".mp4")
path =self.output+"/"+slugify(ModuleTitles[i])+"/"+slugify(ModuleEpisodesList[j])+"/"+slugify(ModuleEpisodesList[j])+".mp4"
print("Downloading : ", slugify(item) + ".mp4")
path = (
self.output
+ "/"
+ slugify(ModuleTitles[i])
+ "/"
+ slugify(item)
+ "/"
+ slugify(item)
+ ".mp4"
)

Comment on lines -103 to +123
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function PluralCourse.downloadEpisodes refactored with the following changes:

  • Replace index in for loop with direct reference (for-index-replacement)

if not os.path.exists(path):
self.download(self.getVideoLink(),path)
else:
Expand All @@ -119,8 +132,7 @@ def downloadEpisodes(self):

def getVideoLink(self):
video_elt = self.browser.find_element_by_tag_name('video')
link = video_elt.get_attribute("src")
return link
return video_elt.get_attribute("src")
Comment on lines -122 to +135
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function PluralCourse.getVideoLink refactored with the following changes:

  • Inline variable that is immediately returned (inline-immediately-returned-variable)


def createDir(self,Dir):
if not os.path.exists(Dir):
Expand Down