bandripper.bandripper
1import argparse 2import json 3import re 4import string 5from dataclasses import dataclass 6from pathlib import Path 7from urllib.parse import urlparse 8 9import requests 10import whosyouragent 11from bs4 import BeautifulSoup 12from noiftimer import Timer 13from printbuddies import ProgBar 14 15root = Path(__file__).parent 16 17 18def clean_string(text: str) -> str: 19 """Remove punctuation and trailing spaces from text.""" 20 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip() 21 22 23@dataclass 24class Track: 25 title: str 26 number: int 27 url: str 28 29 def __post_init__(self): 30 self.title = clean_string(self.title) 31 32 @property 33 def numbered_title(self): 34 num = str(self.number) 35 if len(num) == 1: 36 num = "0" + num 37 return f"{num} - {self.title}" 38 39 40@dataclass 41class Album: 42 url: str 43 artist: str = None 44 title: str = None 45 tracks: list[Track] = None 46 art_url: str = None 47 48 def __repr__(self): 49 return f"{self.title} by {self.artist}" 50 51 def __post_init__(self): 52 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 53 if response.status_code != 200: 54 raise RuntimeError( 55 f"Getting album info failed with code {response.status_code}" 56 ) 57 soup = BeautifulSoup(response.text, "html.parser") 58 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 59 for script in soup.find_all("script"): 60 if script.get("data-cart"): 61 data = script 62 break 63 data = json.loads(data.attrs["data-tralbum"]) 64 self.artist = clean_string(data["artist"]) 65 self.title = clean_string(data["current"]["title"]) 66 self.tracks = [ 67 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 68 for track in data["trackinfo"] 69 if track.get("file") 70 ] 71 72 73class AlbumRipper: 74 def __init__( 75 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 76 ): 77 """ 78 :param no_track_number: If True, don't add the track 79 number to the front of the track title.""" 80 self.album = Album(album_url) 81 self.no_track_number = no_track_number 82 self.overwrite = overwrite 83 84 def make_save_path(self): 85 self.save_path = Path.cwd() / self.album.artist / self.album.title 86 self.save_path.mkdir(parents=True, exist_ok=True) 87 88 @property 89 def headers(self) -> dict: 90 """Get a headers dict with a random useragent.""" 91 return whosyouragent.get_agent(as_dict=True) 92 93 def save_track(self, track_title: str, content: bytes) -> Path: 94 """Save track to self.save_path/{track_title}.mp3. 95 Returns the Path object for the save location. 96 97 :param content: The binary data of the track.""" 98 file_path = self.save_path / f"{track_title}.mp3" 99 file_path.write_bytes(content) 100 return file_path 101 102 def get_track_content(self, track_url: str) -> bytes: 103 """Make a request to track_url and return the content. 104 Raises a RunTimeError exception if response.status_code != 200.""" 105 response = requests.get(track_url, headers=self.headers) 106 if response.status_code != 200: 107 raise RuntimeError( 108 f"Downloading track failed with status code {response.status_code}." 109 ) 110 return response.content 111 112 def download_album_art(self): 113 """Download the album art and save as a .jpg.""" 114 file_path = self.save_path / f"{self.album.title}.jpg" 115 try: 116 response = requests.get(self.album.art_url, headers=self.headers) 117 file_path.write_bytes(response.content) 118 except Exception as e: 119 print(f"Failed to download art for {self.album}.") 120 print(e) 121 122 def track_exists(self, track: Track) -> bool: 123 """Return if a track already exists in self.save_path.""" 124 path = self.save_path / ( 125 track.title if self.no_track_number else track.numbered_title 126 ) 127 return path.with_suffix(".mp3").exists() 128 129 def rip(self): 130 """Download and save the album tracks and album art.""" 131 if len(self.album.tracks) == 0: 132 print(f"No public tracks available for {self.album}.") 133 return None 134 self.make_save_path() 135 self.download_album_art() 136 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 137 fails = [] 138 if not self.overwrite: 139 self.album.tracks = [ 140 track for track in self.album.tracks if not self.track_exists(track) 141 ] 142 for track in self.album.tracks: 143 bar.display( 144 suffix=f"Downloading {track.title}", 145 counter_override=1 if len(self.album.tracks) == 1 else None, 146 ) 147 try: 148 content = self.get_track_content(track.url) 149 self.save_track( 150 track.title if self.no_track_number else track.numbered_title, 151 content, 152 ) 153 except Exception as e: 154 fails.append((track, str(e))) 155 print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.") 156 if fails: 157 print("The following tracks failed to download:") 158 for fail in fails: 159 print(f"{fail[0].title}: {fail[1]}") 160 161 162class BandRipper: 163 def __init__( 164 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 165 ): 166 self.band_url = band_url 167 self.albums = [] 168 for url in self.get_album_urls(band_url): 169 try: 170 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 171 except Exception as e: 172 print(e) 173 174 def get_album_urls(self, band_url: str) -> list[str]: 175 """Get album urls from the main bandcamp url.""" 176 print(f"Fetching discography from {band_url}...") 177 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 178 if response.status_code != 200: 179 raise RuntimeError( 180 f"Getting {band_url} failed with status code {response.status_code}." 181 ) 182 soup = BeautifulSoup(response.text, "html.parser") 183 grid = soup.find("ol", attrs={"id": "music-grid"}) 184 parsed_url = urlparse(band_url) 185 base_url = f"https://{parsed_url.netloc}" 186 return [base_url + album.a.get("href") for album in grid.find_all("li")] 187 188 def rip(self): 189 print( 190 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 191 ) 192 timer = Timer(subsecond_resolution=True) 193 timer.start() 194 fails = [] 195 for album in self.albums: 196 try: 197 album.rip() 198 except Exception as e: 199 fails.append((album, e)) 200 timer.stop() 201 artist = self.albums[0].album.artist 202 print( 203 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 204 ) 205 if fails: 206 print(f"The following downloads failed:") 207 for fail in fails: 208 print(f"{fail[0]}: {fail[1]}") 209 210 211def page_is_discography(url: str) -> bool: 212 """Returns whether the url is to a discography page or not.""" 213 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 214 if response.status_code != 200: 215 raise RuntimeError( 216 f"Getting {url} failed with status code {response.status_code}." 217 ) 218 soup = BeautifulSoup(response.text, "html.parser") 219 # Returns None if it doesn't exist. 220 grid = soup.find("ol", attrs={"id": "music-grid"}) 221 if grid: 222 return True 223 return False 224 225 226def get_args() -> argparse.Namespace: 227 parser = argparse.ArgumentParser() 228 229 parser.add_argument( 230 "urls", 231 type=str, 232 nargs="*", 233 help=""" The bandcamp url(s) for the album or artist. 234 If the url is to an artists main page, 235 all albums will be downloaded. 236 The tracks will be saved to a subdirectory of 237 your current directory. 238 If a track can't be streamed (i.e. private) it 239 won't be downloaded. Multiple urls can be passed.""", 240 ) 241 242 parser.add_argument( 243 "-n", 244 "--no_track_number", 245 action="store_true", 246 help=""" By default the track number will be added 247 to the front of the track title. Pass this switch 248 to disable the behavior.""", 249 ) 250 251 parser.add_argument( 252 "-o", 253 "--overwrite", 254 action="store_true", 255 help=""" Pass this flag to overwrite existing files. 256 Otherwise don't download tracks that already exist locally.""", 257 ) 258 259 args = parser.parse_args() 260 args.urls = [url.strip("/") for url in args.urls] 261 262 return args 263 264 265def main(args: argparse.Namespace = None): 266 if not args: 267 args = get_args() 268 for url in args.urls: 269 if page_is_discography(url): 270 ripper = BandRipper(url, args.no_track_number, args.overwrite) 271 else: 272 ripper = AlbumRipper(url, args.no_track_number, args.overwrite) 273 ripper.rip() 274 275 276if __name__ == "__main__": 277 main(get_args())
def
clean_string(text: str) -> str:
19def clean_string(text: str) -> str: 20 """Remove punctuation and trailing spaces from text.""" 21 return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
Remove punctuation and trailing spaces from text.
@dataclass
class
Track:
@dataclass
class
Album:
41@dataclass 42class Album: 43 url: str 44 artist: str = None 45 title: str = None 46 tracks: list[Track] = None 47 art_url: str = None 48 49 def __repr__(self): 50 return f"{self.title} by {self.artist}" 51 52 def __post_init__(self): 53 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 54 if response.status_code != 200: 55 raise RuntimeError( 56 f"Getting album info failed with code {response.status_code}" 57 ) 58 soup = BeautifulSoup(response.text, "html.parser") 59 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 60 for script in soup.find_all("script"): 61 if script.get("data-cart"): 62 data = script 63 break 64 data = json.loads(data.attrs["data-tralbum"]) 65 self.artist = clean_string(data["artist"]) 66 self.title = clean_string(data["current"]["title"]) 67 self.tracks = [ 68 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 69 for track in data["trackinfo"] 70 if track.get("file") 71 ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class
AlbumRipper:
74class AlbumRipper: 75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite 84 85 def make_save_path(self): 86 self.save_path = Path.cwd() / self.album.artist / self.album.title 87 self.save_path.mkdir(parents=True, exist_ok=True) 88 89 @property 90 def headers(self) -> dict: 91 """Get a headers dict with a random useragent.""" 92 return whosyouragent.get_agent(as_dict=True) 93 94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path 102 103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content 112 113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e) 122 123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists() 129 130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for track in self.album.tracks: 144 bar.display( 145 suffix=f"Downloading {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.") 157 if fails: 158 print("The following tracks failed to download:") 159 for fail in fails: 160 print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75 def __init__( 76 self, album_url: str, no_track_number: bool = False, overwrite: bool = False 77 ): 78 """ 79 :param no_track_number: If True, don't add the track 80 number to the front of the track title.""" 81 self.album = Album(album_url) 82 self.no_track_number = no_track_number 83 self.overwrite = overwrite
Parameters
- no_track_number: If True, don't add the track number to the front of the track title.
def
save_track(self, track_title: str, content: bytes) -> pathlib.Path:
94 def save_track(self, track_title: str, content: bytes) -> Path: 95 """Save track to self.save_path/{track_title}.mp3. 96 Returns the Path object for the save location. 97 98 :param content: The binary data of the track.""" 99 file_path = self.save_path / f"{track_title}.mp3" 100 file_path.write_bytes(content) 101 return file_path
Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.
Parameters
- content: The binary data of the track.
def
get_track_content(self, track_url: str) -> bytes:
103 def get_track_content(self, track_url: str) -> bytes: 104 """Make a request to track_url and return the content. 105 Raises a RunTimeError exception if response.status_code != 200.""" 106 response = requests.get(track_url, headers=self.headers) 107 if response.status_code != 200: 108 raise RuntimeError( 109 f"Downloading track failed with status code {response.status_code}." 110 ) 111 return response.content
Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.
def
download_album_art(self):
113 def download_album_art(self): 114 """Download the album art and save as a .jpg.""" 115 file_path = self.save_path / f"{self.album.title}.jpg" 116 try: 117 response = requests.get(self.album.art_url, headers=self.headers) 118 file_path.write_bytes(response.content) 119 except Exception as e: 120 print(f"Failed to download art for {self.album}.") 121 print(e)
Download the album art and save as a .jpg.
123 def track_exists(self, track: Track) -> bool: 124 """Return if a track already exists in self.save_path.""" 125 path = self.save_path / ( 126 track.title if self.no_track_number else track.numbered_title 127 ) 128 return path.with_suffix(".mp3").exists()
Return if a track already exists in self.save_path.
def
rip(self):
130 def rip(self): 131 """Download and save the album tracks and album art.""" 132 if len(self.album.tracks) == 0: 133 print(f"No public tracks available for {self.album}.") 134 return None 135 self.make_save_path() 136 self.download_album_art() 137 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 138 fails = [] 139 if not self.overwrite: 140 self.album.tracks = [ 141 track for track in self.album.tracks if not self.track_exists(track) 142 ] 143 for track in self.album.tracks: 144 bar.display( 145 suffix=f"Downloading {track.title}", 146 counter_override=1 if len(self.album.tracks) == 1 else None, 147 ) 148 try: 149 content = self.get_track_content(track.url) 150 self.save_track( 151 track.title if self.no_track_number else track.numbered_title, 152 content, 153 ) 154 except Exception as e: 155 fails.append((track, str(e))) 156 print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.") 157 if fails: 158 print("The following tracks failed to download:") 159 for fail in fails: 160 print(f"{fail[0].title}: {fail[1]}")
Download and save the album tracks and album art.
class
BandRipper:
163class BandRipper: 164 def __init__( 165 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 166 ): 167 self.band_url = band_url 168 self.albums = [] 169 for url in self.get_album_urls(band_url): 170 try: 171 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 172 except Exception as e: 173 print(e) 174 175 def get_album_urls(self, band_url: str) -> list[str]: 176 """Get album urls from the main bandcamp url.""" 177 print(f"Fetching discography from {band_url}...") 178 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 179 if response.status_code != 200: 180 raise RuntimeError( 181 f"Getting {band_url} failed with status code {response.status_code}." 182 ) 183 soup = BeautifulSoup(response.text, "html.parser") 184 grid = soup.find("ol", attrs={"id": "music-grid"}) 185 parsed_url = urlparse(band_url) 186 base_url = f"https://{parsed_url.netloc}" 187 return [base_url + album.a.get("href") for album in grid.find_all("li")] 188 189 def rip(self): 190 print( 191 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 192 ) 193 timer = Timer(subsecond_resolution=True) 194 timer.start() 195 fails = [] 196 for album in self.albums: 197 try: 198 album.rip() 199 except Exception as e: 200 fails.append((album, e)) 201 timer.stop() 202 artist = self.albums[0].album.artist 203 print( 204 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 205 ) 206 if fails: 207 print(f"The following downloads failed:") 208 for fail in fails: 209 print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
164 def __init__( 165 self, band_url: str, no_track_number: bool = False, overwrite: bool = False 166 ): 167 self.band_url = band_url 168 self.albums = [] 169 for url in self.get_album_urls(band_url): 170 try: 171 self.albums.append(AlbumRipper(url, no_track_number, overwrite)) 172 except Exception as e: 173 print(e)
def
get_album_urls(self, band_url: str) -> list[str]:
175 def get_album_urls(self, band_url: str) -> list[str]: 176 """Get album urls from the main bandcamp url.""" 177 print(f"Fetching discography from {band_url}...") 178 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 179 if response.status_code != 200: 180 raise RuntimeError( 181 f"Getting {band_url} failed with status code {response.status_code}." 182 ) 183 soup = BeautifulSoup(response.text, "html.parser") 184 grid = soup.find("ol", attrs={"id": "music-grid"}) 185 parsed_url = urlparse(band_url) 186 base_url = f"https://{parsed_url.netloc}" 187 return [base_url + album.a.get("href") for album in grid.find_all("li")]
Get album urls from the main bandcamp url.
def
rip(self):
189 def rip(self): 190 print( 191 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 192 ) 193 timer = Timer(subsecond_resolution=True) 194 timer.start() 195 fails = [] 196 for album in self.albums: 197 try: 198 album.rip() 199 except Exception as e: 200 fails.append((album, e)) 201 timer.stop() 202 artist = self.albums[0].album.artist 203 print( 204 f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}." 205 ) 206 if fails: 207 print(f"The following downloads failed:") 208 for fail in fails: 209 print(f"{fail[0]}: {fail[1]}")
def
page_is_discography(url: str) -> bool:
212def page_is_discography(url: str) -> bool: 213 """Returns whether the url is to a discography page or not.""" 214 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 215 if response.status_code != 200: 216 raise RuntimeError( 217 f"Getting {url} failed with status code {response.status_code}." 218 ) 219 soup = BeautifulSoup(response.text, "html.parser") 220 # Returns None if it doesn't exist. 221 grid = soup.find("ol", attrs={"id": "music-grid"}) 222 if grid: 223 return True 224 return False
Returns whether the url is to a discography page or not.
def
get_args() -> argparse.Namespace:
227def get_args() -> argparse.Namespace: 228 parser = argparse.ArgumentParser() 229 230 parser.add_argument( 231 "urls", 232 type=str, 233 nargs="*", 234 help=""" The bandcamp url(s) for the album or artist. 235 If the url is to an artists main page, 236 all albums will be downloaded. 237 The tracks will be saved to a subdirectory of 238 your current directory. 239 If a track can't be streamed (i.e. private) it 240 won't be downloaded. Multiple urls can be passed.""", 241 ) 242 243 parser.add_argument( 244 "-n", 245 "--no_track_number", 246 action="store_true", 247 help=""" By default the track number will be added 248 to the front of the track title. Pass this switch 249 to disable the behavior.""", 250 ) 251 252 parser.add_argument( 253 "-o", 254 "--overwrite", 255 action="store_true", 256 help=""" Pass this flag to overwrite existing files. 257 Otherwise don't download tracks that already exist locally.""", 258 ) 259 260 args = parser.parse_args() 261 args.urls = [url.strip("/") for url in args.urls] 262 263 return args
def
main(args: argparse.Namespace = None):
266def main(args: argparse.Namespace = None): 267 if not args: 268 args = get_args() 269 for url in args.urls: 270 if page_is_discography(url): 271 ripper = BandRipper(url, args.no_track_number, args.overwrite) 272 else: 273 ripper = AlbumRipper(url, args.no_track_number, args.overwrite) 274 ripper.rip()