bandripper.bandripper
1import argparse 2import json 3import re 4import string 5from dataclasses import dataclass 6from pathlib import Path 7from urllib.parse import urlparse 8 9import requests 10from bs4 import BeautifulSoup 11 12import whosyouragent 13from noiftimer import Timer 14from printbuddies import ProgBar 15 16root = Path(__file__).parent 17 18 19def clean_string(text: str) -> str: 20 """Remove punctuation and trailing spaces from text.""" 21 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip() 22 23 24@dataclass 25class Track: 26 title: str 27 number: int 28 url: str 29 30 def __post_init__(self): 31 self.title = clean_string(self.title) 32 33 @property 34 def numbered_title(self): 35 num = str(self.number) 36 if len(num) == 1: 37 num = "0" + num 38 return f"{num} - {self.title}" 39 40 41@dataclass 42class Album: 43 url: str 44 artist: str = None 45 title: str = None 46 tracks: list[Track] = None 47 art_url: str = None 48 49 def __repr__(self): 50 return f"{self.title} by {self.artist}" 51 52 def __post_init__(self): 53 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 54 if response.status_code != 200: 55 raise RuntimeError( 56 f"Getting album info failed with code {response.status_code}" 57 ) 58 soup = BeautifulSoup(response.text, "html.parser") 59 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 60 for script in soup.find_all("script"): 61 if script.get("data-cart"): 62 data = script 63 break 64 data = json.loads(data.attrs["data-tralbum"]) 65 self.artist = clean_string(data["artist"]) 66 self.title = clean_string(data["current"]["title"]) 67 self.tracks = [ 68 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 69 for track in data["trackinfo"] 70 if track.get("file") 71 ] 72 73 74class AlbumRipper: 75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite 84 85 def make_save_path(self): 86 self.save_path = Path.cwd() / self.album.artist / self.album.title 87 self.save_path.mkdir(parents=True, exist_ok=True) 88 89 @property 90 def headers(self) -> dict: 91 """Get a headers dict with a random useragent.""" 92 return whosyouragent.get_agent(as_dict=True) 93 94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path 102 103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content 112 113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e) 122 123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists() 129 130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for track in self.album.tracks: 144 bar.display( 145 suffix=f"Downloading {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 157 print(f"Finished downloading {self.album} in {elapsed_time}.") 158 if fails: 159 print("The following tracks failed to download:") 160 for fail in fails: 161 print(f"{fail[0].title}: {fail[1]}") 162 163 164class BandRipper: 165 def __init__( 166 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 167 ): 168 self.band_url = band_url 169 self.albums = [] 170 for url in self.get_album_urls(band_url): 171 try: 172 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 173 except Exception as e: 174 print(e) 175 176 def get_album_urls(self, band_url: str) -> list[str]: 177 """Get album urls from the main bandcamp url.""" 178 print(f"Fetching discography from {band_url}...") 179 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 180 if response.status_code != 200: 181 raise RuntimeError( 182 f"Getting {band_url} failed with status code {response.status_code}." 183 ) 184 soup = BeautifulSoup(response.text, "html.parser") 185 grid = soup.find("ol", attrs={"id": "music-grid"}) 186 parsed_url = urlparse(band_url) 187 base_url = f"https://{parsed_url.netloc}" 188 return [base_url + album.a.get("href") for album in grid.find_all("li")] 189 190 def rip(self): 191 print( 192 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 193 ) 194 timer = Timer() 195 timer.start() 196 fails = [] 197 for album in self.albums: 198 try: 199 album.rip() 200 except Exception as e: 201 fails.append((album, e)) 202 timer.stop() 203 artist = self.albums[0].album.artist 204 elapsed_time = timer.current_elapsed_time() 205 print( 206 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 207 ) 208 if fails: 209 print(f"The following downloads failed:") 210 for fail in fails: 211 print(f"{fail[0]}: {fail[1]}") 212 213 214def page_is_discography(url: str) -> bool: 215 """Returns whether the url is to a discography page or not.""" 216 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 217 if response.status_code != 200: 218 raise RuntimeError( 219 f"Getting {url} failed with status code {response.status_code}." 220 ) 221 soup = BeautifulSoup(response.text, "html.parser") 222 # Returns None if it doesn't exist. 223 grid = soup.find("ol", attrs={"id": "music-grid"}) 224 if grid: 225 return True 226 return False 227 228 229def get_args() -> argparse.Namespace: 230 parser = argparse.ArgumentParser() 231 232 parser.add_argument( 233 "urls", 234 type=str, 235 nargs="*", 236 help=""" The bandcamp url(s) for the album or artist. 237 If the url is to an artists main page, 238 all albums will be downloaded. 239 The tracks will be saved to a subdirectory of 240 your current directory. 241 If a track can't be streamed (i.e. private) it 242 won't be downloaded. Multiple urls can be passed.""", 243 ) 244 245 parser.add_argument( 246 "-n", 247 "--no_track_number", 248 action="store_true", 249 help=""" By default the track number will be added 250 to the front of the track title. Pass this switch 251 to disable the behavior.""", 252 ) 253 254 parser.add_argument( 255 "-o", 256 "--overwrite", 257 action="store_true", 258 help=""" Pass this flag to overwrite existing files. 259 Otherwise don't download tracks that already exist locally.""", 260 ) 261 262 args = parser.parse_args() 263 args.urls = [url.strip("/") for url in args.urls] 264 265 return args 266 267 268def main(args: argparse.Namespace = None): 269 if not args: 270 args = get_args() 271 for url in args.urls: 272 if page_is_discography(url): 273 ripper = BandRipper(url, args.no_track_number) 274 else: 275 ripper = AlbumRipper(url, args.no_track_number) 276 ripper.rip() 277 278 279if __name__ == "__main__": 280 main(get_args())
def
clean_string(text: str) -> str:
20def clean_string(text: str) -> str: 21 """Remove punctuation and trailing spaces from text.""" 22 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
Remove punctuation and trailing spaces from text.
@dataclass
class
Track:
@dataclass
class
Album:
42@dataclass 43class Album: 44 url: str 45 artist: str = None 46 title: str = None 47 tracks: list[Track] = None 48 art_url: str = None 49 50 def __repr__(self): 51 return f"{self.title} by {self.artist}" 52 53 def __post_init__(self): 54 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 55 if response.status_code != 200: 56 raise RuntimeError( 57 f"Getting album info failed with code {response.status_code}" 58 ) 59 soup = BeautifulSoup(response.text, "html.parser") 60 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 61 for script in soup.find_all("script"): 62 if script.get("data-cart"): 63 data = script 64 break 65 data = json.loads(data.attrs["data-tralbum"]) 66 self.artist = clean_string(data["artist"]) 67 self.title = clean_string(data["current"]["title"]) 68 self.tracks = [ 69 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 70 for track in data["trackinfo"] 71 if track.get("file") 72 ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class
AlbumRipper:
75class AlbumRipper: 76 def __init__( 77 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 78 ): 79 """ 80 :param no_track_number: If True, don't add the track 81 number to the front of the track title.""" 82 self.album = Album(album_url) 83 self.no_track_number = no_track_number 84 self.overwrite = overwrite 85 86 def make_save_path(self): 87 self.save_path = Path.cwd() / self.album.artist / self.album.title 88 self.save_path.mkdir(parents=True, exist_ok=True) 89 90 @property 91 def headers(self) -> dict: 92 """Get a headers dict with a random useragent.""" 93 return whosyouragent.get_agent(as_dict=True) 94 95 def save_track(self, track_title: str, content: bytes) -> Path: 96 """Save track to self.save_path/{track_title}.mp3. 97 Returns the Path object for the save location. 98 99 :param content: The binary data of the track.""" 100 file_path = self.save_path / f"{track_title}.mp3" 101 file_path.write_bytes(content) 102 return file_path 103 104 def get_track_content(self, track_url: str) -> bytes: 105 """Make a request to track_url and return the content. 106 Raises a RunTimeError exception if response.status_code != 200.""" 107 response = requests.get(track_url, headers=self.headers) 108 if response.status_code != 200: 109 raise RuntimeError( 110 f"Downloading track failed with status code {response.status_code}." 111 ) 112 return response.content 113 114 def download_album_art(self): 115 """Download the album art and save as a .jpg.""" 116 file_path = self.save_path / f"{self.album.title}.jpg" 117 try: 118 response = requests.get(self.album.art_url, headers=self.headers) 119 file_path.write_bytes(response.content) 120 except Exception as e: 121 print(f"Failed to download art for {self.album}.") 122 print(e) 123 124 def track_exists(self, track: Track) -> bool: 125 """Return if a track already exists in self.save_path.""" 126 path = self.save_path / ( 127 track.title if self.no_track_number else track.numbered_title 128 ) 129 return path.with_suffix(".mp3").exists() 130 131 def rip(self): 132 """Download and save the album tracks and album art.""" 133 if len(self.album.tracks) == 0: 134 print(f"No public tracks available for {self.album}.") 135 return None 136 self.make_save_path() 137 self.download_album_art() 138 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 139 fails = [] 140 if not self.overwrite: 141 self.album.tracks = [ 142 track for track in self.album.tracks if not self.track_exists(track) 143 ] 144 for track in self.album.tracks: 145 bar.display( 146 suffix=f"Downloading {track.title}", 147 counter_override=1 if len(self.album.tracks) == 1 else None, 148 ) 149 try: 150 content = self.get_track_content(track.url) 151 self.save_track( 152 track.title if self.no_track_number else track.numbered_title, 153 content, 154 ) 155 except Exception as e: 156 fails.append((track, str(e))) 157 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 158 print(f"Finished downloading {self.album} in {elapsed_time}.") 159 if fails: 160 print("The following tracks failed to download:") 161 for fail in fails: 162 print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
76 def __init__( 77 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 78 ): 79 """ 80 :param no_track_number: If True, don't add the track 81 number to the front of the track title.""" 82 self.album = Album(album_url) 83 self.no_track_number = no_track_number 84 self.overwrite = overwrite
Parameters
- no_track_number: If True, don't add the track number to the front of the track title.
def
save_track(self, track_title: str, content: bytes) -> pathlib.Path:
95 def save_track(self, track_title: str, content: bytes) -> Path: 96 """Save track to self.save_path/{track_title}.mp3. 97 Returns the Path object for the save location. 98 99 :param content: The binary data of the track.""" 100 file_path = self.save_path / f"{track_title}.mp3" 101 file_path.write_bytes(content) 102 return file_path
Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.
Parameters
- content: The binary data of the track.
def
get_track_content(self, track_url: str) -> bytes:
104 def get_track_content(self, track_url: str) -> bytes: 105 """Make a request to track_url and return the content. 106 Raises a RunTimeError exception if response.status_code != 200.""" 107 response = requests.get(track_url, headers=self.headers) 108 if response.status_code != 200: 109 raise RuntimeError( 110 f"Downloading track failed with status code {response.status_code}." 111 ) 112 return response.content
Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.
def
download_album_art(self):
114 def download_album_art(self): 115 """Download the album art and save as a .jpg.""" 116 file_path = self.save_path / f"{self.album.title}.jpg" 117 try: 118 response = requests.get(self.album.art_url, headers=self.headers) 119 file_path.write_bytes(response.content) 120 except Exception as e: 121 print(f"Failed to download art for {self.album}.") 122 print(e)
Download the album art and save as a .jpg.
124 def track_exists(self, track: Track) -> bool: 125 """Return if a track already exists in self.save_path.""" 126 path = self.save_path / ( 127 track.title if self.no_track_number else track.numbered_title 128 ) 129 return path.with_suffix(".mp3").exists()
Return if a track already exists in self.save_path.
def
rip(self):
131 def rip(self): 132 """Download and save the album tracks and album art.""" 133 if len(self.album.tracks) == 0: 134 print(f"No public tracks available for {self.album}.") 135 return None 136 self.make_save_path() 137 self.download_album_art() 138 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 139 fails = [] 140 if not self.overwrite: 141 self.album.tracks = [ 142 track for track in self.album.tracks if not self.track_exists(track) 143 ] 144 for track in self.album.tracks: 145 bar.display( 146 suffix=f"Downloading {track.title}", 147 counter_override=1 if len(self.album.tracks) == 1 else None, 148 ) 149 try: 150 content = self.get_track_content(track.url) 151 self.save_track( 152 track.title if self.no_track_number else track.numbered_title, 153 content, 154 ) 155 except Exception as e: 156 fails.append((track, str(e))) 157 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 158 print(f"Finished downloading {self.album} in {elapsed_time}.") 159 if fails: 160 print("The following tracks failed to download:") 161 for fail in fails: 162 print(f"{fail[0].title}: {fail[1]}")
Download and save the album tracks and album art.
class
BandRipper:
165class BandRipper: 166 def __init__( 167 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 168 ): 169 self.band_url = band_url 170 self.albums = [] 171 for url in self.get_album_urls(band_url): 172 try: 173 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 174 except Exception as e: 175 print(e) 176 177 def get_album_urls(self, band_url: str) -> list[str]: 178 """Get album urls from the main bandcamp url.""" 179 print(f"Fetching discography from {band_url}...") 180 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 181 if response.status_code != 200: 182 raise RuntimeError( 183 f"Getting {band_url} failed with status code {response.status_code}." 184 ) 185 soup = BeautifulSoup(response.text, "html.parser") 186 grid = soup.find("ol", attrs={"id": "music-grid"}) 187 parsed_url = urlparse(band_url) 188 base_url = f"https://{parsed_url.netloc}" 189 return [base_url + album.a.get("href") for album in grid.find_all("li")] 190 191 def rip(self): 192 print( 193 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 194 ) 195 timer = Timer() 196 timer.start() 197 fails = [] 198 for album in self.albums: 199 try: 200 album.rip() 201 except Exception as e: 202 fails.append((album, e)) 203 timer.stop() 204 artist = self.albums[0].album.artist 205 elapsed_time = timer.current_elapsed_time() 206 print( 207 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 208 ) 209 if fails: 210 print(f"The following downloads failed:") 211 for fail in fails: 212 print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
166 def __init__( 167 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 168 ): 169 self.band_url = band_url 170 self.albums = [] 171 for url in self.get_album_urls(band_url): 172 try: 173 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 174 except Exception as e: 175 print(e)
def
get_album_urls(self, band_url: str) -> list[str]:
177 def get_album_urls(self, band_url: str) -> list[str]: 178 """Get album urls from the main bandcamp url.""" 179 print(f"Fetching discography from {band_url}...") 180 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 181 if response.status_code != 200: 182 raise RuntimeError( 183 f"Getting {band_url} failed with status code {response.status_code}." 184 ) 185 soup = BeautifulSoup(response.text, "html.parser") 186 grid = soup.find("ol", attrs={"id": "music-grid"}) 187 parsed_url = urlparse(band_url) 188 base_url = f"https://{parsed_url.netloc}" 189 return [base_url + album.a.get("href") for album in grid.find_all("li")]
Get album urls from the main bandcamp url.
def
rip(self):
191 def rip(self): 192 print( 193 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 194 ) 195 timer = Timer() 196 timer.start() 197 fails = [] 198 for album in self.albums: 199 try: 200 album.rip() 201 except Exception as e: 202 fails.append((album, e)) 203 timer.stop() 204 artist = self.albums[0].album.artist 205 elapsed_time = timer.current_elapsed_time() 206 print( 207 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 208 ) 209 if fails: 210 print(f"The following downloads failed:") 211 for fail in fails: 212 print(f"{fail[0]}: {fail[1]}")
def
page_is_discography(url: str) -> bool:
215def page_is_discography(url: str) -> bool: 216 """Returns whether the url is to a discography page or not.""" 217 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 218 if response.status_code != 200: 219 raise RuntimeError( 220 f"Getting {url} failed with status code {response.status_code}." 221 ) 222 soup = BeautifulSoup(response.text, "html.parser") 223 # Returns None if it doesn't exist. 224 grid = soup.find("ol", attrs={"id": "music-grid"}) 225 if grid: 226 return True 227 return False
Returns whether the url is to a discography page or not.
def
get_args() -> argparse.Namespace:
230def get_args() -> argparse.Namespace: 231 parser = argparse.ArgumentParser() 232 233 parser.add_argument( 234 "urls", 235 type=str, 236 nargs="*", 237 help=""" The bandcamp url(s) for the album or artist. 238 If the url is to an artists main page, 239 all albums will be downloaded. 240 The tracks will be saved to a subdirectory of 241 your current directory. 242 If a track can't be streamed (i.e. private) it 243 won't be downloaded. Multiple urls can be passed.""", 244 ) 245 246 parser.add_argument( 247 "-n", 248 "--no_track_number", 249 action="store_true", 250 help=""" By default the track number will be added 251 to the front of the track title. Pass this switch 252 to disable the behavior.""", 253 ) 254 255 parser.add_argument( 256 "-o", 257 "--overwrite", 258 action="store_true", 259 help=""" Pass this flag to overwrite existing files. 260 Otherwise don't download tracks that already exist locally.""", 261 ) 262 263 args = parser.parse_args() 264 args.urls = [url.strip("/") for url in args.urls] 265 266 return args
def
main(args: argparse.Namespace = None):