@@ -20,7 +20,7 @@ class Lecture(Scraper):
2020 with a download class).
2121 """
2222
23- def get_download_names_and_urls (self ) -> Dict [str , str ] | None :
23+ def get_download_names_and_urls (self ) -> Dict [str | None , str ] | None :
2424 """ Get all downloadable urls with their filenames as a dictionary. """
2525 # Select all elements with the download_tags element selector
2626 # Recognize that raise_if_not_found is False that makes it so even if
@@ -29,7 +29,16 @@ def get_download_names_and_urls(self) -> Dict[str, str] | None:
2929 download_tags = self .select_element (self .element_selectors .download_tags , raise_if_not_found = False )
3030 if download_tags is None :
3131 return None
32- return {tag .get ('data-x-origin-download-name' ): tag .get ('href' ) for tag in download_tags }
32+ result = {}
33+ for tag in download_tags :
34+ orig_name = tag .get ("data-x-origin-download-name" )
35+ url = tag .get ("href" )
36+ if "://" not in url :
37+ # This means that the url is a relative url so we need to join it with the base url
38+ url = self .base_url + url
39+ result [orig_name ] = url
40+ # having result[None] will result in only one entry for all links without data-x-origin-download-name
41+ return result
3342
3443 def decompose_elements (self , element_selectors : Iterable [str ], source : Tag ) -> None :
3544 """
@@ -85,7 +94,15 @@ def get_resource_name(self, bare_resource_name: str) -> str:
8594 resource_name = '-' .join (bare_resource_name .split ('-' )[1 :]).strip ()
8695 else :
8796 resource_name = bare_resource_name .strip ()
88- return f"{ lecture_number } - resource_{ resource_name } "
97+ # if we have no extension, assume it's a video in mp4 format
98+ if "." not in resource_name :
99+ resource_name += ".mp4"
100+ try :
101+ # Try to format the lecture number to a 2 digit number
102+ lecture_number = f"{ int (str (lecture_number )):02d} "
103+ except ValueError :
104+ pass # not interested, if it's not a number, keep the string as it is
105+ return f"{ lecture_number } -resource_{ resource_name } "
89106
90107 @staticmethod
91108 def should_overwrite (file_path : Path , noconfirm = False ):
@@ -126,7 +143,14 @@ def download(self, base_dir: Path, chunk_size: int = 4096, noconfirm=False):
126143 self .__download_text (file_path , progress_bar , current_task_id )
127144 if download_names_urls is not None :
128145 for download_name , download_url in download_names_urls .items ():
129- filename = sterialize_file_or_folder (download_name )
146+ if download_name is None :
147+ # get the web page title and use it as the filename
148+ download_name = self .get_name ()
149+ filename = sterialize_file_or_folder (
150+ self .get_resource_name (download_name )
151+ )
152+ else :
153+ filename = sterialize_file_or_folder (download_name )
130154 if filename .split ('.' )[- 1 ] != 'mp4' :
131155 # This means that the downloadable thing is a resource so
132156 # we use the self.get_resource_name to get the resource name
0 commit comments