bandripper.bandripper
1import argparse 2import json 3import re 4import string 5from dataclasses import dataclass 6from pathlib import Path 7from urllib.parse import urlparse 8 9import requests 10import whosyouragent 11from bs4 import BeautifulSoup 12from noiftimer import Timer 13from printbuddies import ProgBar 14 15root = Path(__file__).parent 16 17 18def clean_string(text: str) -> str: 19 """Remove punctuation from text.""" 20 return re.sub(f"[{re.escape(string.punctuation)}]", "", text) 21 22 23@dataclass 24class Track: 25 title: str 26 number: int 27 url: str 28 29 def __post_init__(self): 30 self.title = clean_string(self.title) 31 32 @property 33 def numbered_title(self): 34 num = str(self.number) 35 if len(num) == 1: 36 num = "0" + num 37 return f"{num} - {self.title}" 38 39 40@dataclass 41class Album: 42 url: str 43 artist: str = None 44 title: str = None 45 tracks: list[Track] = None 46 art_url: str = None 47 48 def __repr__(self): 49 return f"{self.title} by {self.artist}" 50 51 def __post_init__(self): 52 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 53 if response.status_code != 200: 54 raise RuntimeError( 55 f"Getting album info failed with code {response.status_code}" 56 ) 57 soup = BeautifulSoup(response.text, "html.parser") 58 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 59 for script in soup.find_all("script"): 60 if script.get("data-cart"): 61 data = script 62 break 63 data = json.loads(data.attrs["data-tralbum"]) 64 self.artist = clean_string(data["artist"]) 65 self.title = clean_string(data["current"]["title"]) 66 self.tracks = [ 67 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 68 for track in data["trackinfo"] 69 if track.get("file") 70 ] 71 72 73class AlbumRipper: 74 def __init__(self, album_url: str, no_track_number: bool): 75 """ 76 :param no_track_number: If True, don't add the track 77 number to the front of the track title.""" 78 self.album = Album(album_url) 79 self.no_track_number = no_track_number 80 81 def make_save_path(self): 82 self.save_path = Path.cwd() / self.album.artist / self.album.title 83 self.save_path.mkdir(parents=True, exist_ok=True) 84 85 @property 86 def headers(self) -> dict: 87 """Get a headers dict with a random useragent.""" 88 return whosyouragent.get_agent(as_dict=True) 89 90 def save_track(self, track_title: str, content: bytes) -> Path: 91 """Save track to self.save_path/{track_title}.mp3. 92 Returns the Path object for the save location. 93 94 :param content: The binary data of the track.""" 95 file_path = self.save_path / f"{track_title}.mp3" 96 file_path.write_bytes(content) 97 return file_path 98 99 def get_track_content(self, track_url: str) -> bytes: 100 """Make a request to track_url and return the content. 101 Raises a RunTimeError exception if response.status_code != 200.""" 102 response = requests.get(track_url, headers=self.headers) 103 if response.status_code != 200: 104 raise RuntimeError( 105 f"Downloading track failed with status code {response.status_code}." 106 ) 107 return response.content 108 109 def download_album_art(self): 110 """Download the album art and save as a .jpg.""" 111 file_path = self.save_path / f"{self.album.title}.jpg" 112 try: 113 response = requests.get(self.album.art_url, headers=self.headers) 114 file_path.write_bytes(response.content) 115 except Exception as e: 116 print(f"Failed to download art for {self.album}.") 117 print(e) 118 119 def rip(self): 120 """Download and save the album tracks and album art.""" 121 if len(self.album.tracks) == 0: 122 print(f"No public tracks available for {self.album}.") 123 return None 124 self.make_save_path() 125 self.download_album_art() 126 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 127 fails = [] 128 for track in self.album.tracks: 129 bar.display( 130 suffix=f"Downloading {track.title}", 131 counter_override=1 if len(self.album.tracks) == 1 else None, 132 ) 133 try: 134 content = self.get_track_content(track.url) 135 self.save_track( 136 track.title if self.no_track_number else track.numbered_title, 137 content, 138 ) 139 except Exception as e: 140 fails.append((track, str(e))) 141 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 142 print(f"Finished downloading {self.album} in {elapsed_time}.") 143 if fails: 144 print("The following tracks failed to download:") 145 for fail in fails: 146 print(f"{fail[0].title}: {fail[1]}") 147 148 149class BandRipper: 150 def __init__(self, band_url: str, no_track_number: bool): 151 self.band_url = band_url 152 self.albums = [] 153 for url in self.get_album_urls(band_url): 154 try: 155 self.albums.append(AlbumRipper(url, no_track_number)) 156 except Exception as e: 157 print(e) 158 159 def get_album_urls(self, band_url: str) -> list[str]: 160 """Get album urls from the main bandcamp url.""" 161 print(f"Fetching discography from {band_url}...") 162 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 163 if response.status_code != 200: 164 raise RuntimeError( 165 f"Getting {band_url} failed with status code {response.status_code}." 166 ) 167 soup = BeautifulSoup(response.text, "html.parser") 168 grid = soup.find("ol", attrs={"id": "music-grid"}) 169 parsed_url = urlparse(band_url) 170 base_url = f"https://{parsed_url.netloc}" 171 return [base_url + album.a.get("href") for album in grid.find_all("li")] 172 173 def rip(self): 174 print( 175 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 176 ) 177 timer = Timer() 178 timer.start() 179 fails = [] 180 for album in self.albums: 181 try: 182 album.rip() 183 except Exception as e: 184 fails.append((album, e)) 185 timer.stop() 186 artist = self.albums[0].album.artist 187 elapsed_time = timer.current_elapsed_time() 188 print( 189 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 190 ) 191 if fails: 192 print(f"The following downloads failed:") 193 for fail in fails: 194 print(f"{fail[0]}: {fail[1]}") 195 196 197def get_args() -> argparse.Namespace: 198 parser = argparse.ArgumentParser() 199 200 parser.add_argument( 201 "urls", 202 type=str, 203 nargs="*", 204 help=""" The bandcamp url(s) for the album or artist. 205 If the url is to an artists main page, 206 all albums will be downloaded. 207 The tracks will be saved to a subdirectory of 208 your current directory. 209 If a track can't be streamed (i.e. private) it 210 won't be downloaded. Multiple urls can be passed.""", 211 ) 212 213 parser.add_argument( 214 "-n", 215 "--no_track_number", 216 action="store_true", 217 help=""" By default the track number will be added 218 to the front of the track title. Pass this switch 219 to disable the behavior.""", 220 ) 221 222 args = parser.parse_args() 223 args.urls = [url.strip("/") for url in args.urls] 224 225 return args 226 227 228def page_is_discography(url: str) -> bool: 229 """Returns whether the url is to a discography page or not.""" 230 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 231 if response.status_code != 200: 232 raise RuntimeError( 233 f"Getting {url} failed with status code {response.status_code}." 234 ) 235 soup = BeautifulSoup(response.text, "html.parser") 236 # Returns None if it doesn't exist. 237 grid = soup.find("ol", attrs={"id": "music-grid"}) 238 if grid: 239 return True 240 return False 241 242 243def main(args: argparse.Namespace = None): 244 if not args: 245 args = get_args() 246 for url in args.urls: 247 if page_is_discography(url): 248 ripper = BandRipper(url, args.no_track_number) 249 else: 250 ripper = AlbumRipper(url, args.no_track_number) 251 ripper.rip() 252 253 254if __name__ == "__main__": 255 main(get_args())
def
clean_string(text: str) -> str:
19def clean_string(text: str) -> str: 20 """Remove punctuation from text.""" 21 return re.sub(f"[{re.escape(string.punctuation)}]", "", text)
Remove punctuation from text.
@dataclass
class
Track:
@dataclass
class
Album:
41@dataclass 42class Album: 43 url: str 44 artist: str = None 45 title: str = None 46 tracks: list[Track] = None 47 art_url: str = None 48 49 def __repr__(self): 50 return f"{self.title} by {self.artist}" 51 52 def __post_init__(self): 53 response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True)) 54 if response.status_code != 200: 55 raise RuntimeError( 56 f"Getting album info failed with code {response.status_code}" 57 ) 58 soup = BeautifulSoup(response.text, "html.parser") 59 self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content") 60 for script in soup.find_all("script"): 61 if script.get("data-cart"): 62 data = script 63 break 64 data = json.loads(data.attrs["data-tralbum"]) 65 self.artist = clean_string(data["artist"]) 66 self.title = clean_string(data["current"]["title"]) 67 self.tracks = [ 68 Track(track["title"], track["track_num"], track["file"]["mp3-128"]) 69 for track in data["trackinfo"] 70 if track.get("file") 71 ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class
AlbumRipper:
74class AlbumRipper: 75 def __init__(self, album_url: str, no_track_number: bool): 76 """ 77 :param no_track_number: If True, don't add the track 78 number to the front of the track title.""" 79 self.album = Album(album_url) 80 self.no_track_number = no_track_number 81 82 def make_save_path(self): 83 self.save_path = Path.cwd() / self.album.artist / self.album.title 84 self.save_path.mkdir(parents=True, exist_ok=True) 85 86 @property 87 def headers(self) -> dict: 88 """Get a headers dict with a random useragent.""" 89 return whosyouragent.get_agent(as_dict=True) 90 91 def save_track(self, track_title: str, content: bytes) -> Path: 92 """Save track to self.save_path/{track_title}.mp3. 93 Returns the Path object for the save location. 94 95 :param content: The binary data of the track.""" 96 file_path = self.save_path / f"{track_title}.mp3" 97 file_path.write_bytes(content) 98 return file_path 99 100 def get_track_content(self, track_url: str) -> bytes: 101 """Make a request to track_url and return the content. 102 Raises a RunTimeError exception if response.status_code != 200.""" 103 response = requests.get(track_url, headers=self.headers) 104 if response.status_code != 200: 105 raise RuntimeError( 106 f"Downloading track failed with status code {response.status_code}." 107 ) 108 return response.content 109 110 def download_album_art(self): 111 """Download the album art and save as a .jpg.""" 112 file_path = self.save_path / f"{self.album.title}.jpg" 113 try: 114 response = requests.get(self.album.art_url, headers=self.headers) 115 file_path.write_bytes(response.content) 116 except Exception as e: 117 print(f"Failed to download art for {self.album}.") 118 print(e) 119 120 def rip(self): 121 """Download and save the album tracks and album art.""" 122 if len(self.album.tracks) == 0: 123 print(f"No public tracks available for {self.album}.") 124 return None 125 self.make_save_path() 126 self.download_album_art() 127 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 128 fails = [] 129 for track in self.album.tracks: 130 bar.display( 131 suffix=f"Downloading {track.title}", 132 counter_override=1 if len(self.album.tracks) == 1 else None, 133 ) 134 try: 135 content = self.get_track_content(track.url) 136 self.save_track( 137 track.title if self.no_track_number else track.numbered_title, 138 content, 139 ) 140 except Exception as e: 141 fails.append((track, str(e))) 142 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 143 print(f"Finished downloading {self.album} in {elapsed_time}.") 144 if fails: 145 print("The following tracks failed to download:") 146 for fail in fails: 147 print(f"{fail[0].title}: {fail[1]}")
AlbumRipper(album_url: str, no_track_number: bool)
75 def __init__(self, album_url: str, no_track_number: bool): 76 """ 77 :param no_track_number: If True, don't add the track 78 number to the front of the track title.""" 79 self.album = Album(album_url) 80 self.no_track_number = no_track_number
Parameters
- no_track_number: If True, don't add the track number to the front of the track title.
def
save_track(self, track_title: str, content: bytes) -> pathlib.Path:
91 def save_track(self, track_title: str, content: bytes) -> Path: 92 """Save track to self.save_path/{track_title}.mp3. 93 Returns the Path object for the save location. 94 95 :param content: The binary data of the track.""" 96 file_path = self.save_path / f"{track_title}.mp3" 97 file_path.write_bytes(content) 98 return file_path
Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.
Parameters
- content: The binary data of the track.
def
get_track_content(self, track_url: str) -> bytes:
100 def get_track_content(self, track_url: str) -> bytes: 101 """Make a request to track_url and return the content. 102 Raises a RunTimeError exception if response.status_code != 200.""" 103 response = requests.get(track_url, headers=self.headers) 104 if response.status_code != 200: 105 raise RuntimeError( 106 f"Downloading track failed with status code {response.status_code}." 107 ) 108 return response.content
Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.
def
download_album_art(self):
110 def download_album_art(self): 111 """Download the album art and save as a .jpg.""" 112 file_path = self.save_path / f"{self.album.title}.jpg" 113 try: 114 response = requests.get(self.album.art_url, headers=self.headers) 115 file_path.write_bytes(response.content) 116 except Exception as e: 117 print(f"Failed to download art for {self.album}.") 118 print(e)
Download the album art and save as a .jpg.
def
rip(self):
120 def rip(self): 121 """Download and save the album tracks and album art.""" 122 if len(self.album.tracks) == 0: 123 print(f"No public tracks available for {self.album}.") 124 return None 125 self.make_save_path() 126 self.download_album_art() 127 bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5) 128 fails = [] 129 for track in self.album.tracks: 130 bar.display( 131 suffix=f"Downloading {track.title}", 132 counter_override=1 if len(self.album.tracks) == 1 else None, 133 ) 134 try: 135 content = self.get_track_content(track.url) 136 self.save_track( 137 track.title if self.no_track_number else track.numbered_title, 138 content, 139 ) 140 except Exception as e: 141 fails.append((track, str(e))) 142 elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True) 143 print(f"Finished downloading {self.album} in {elapsed_time}.") 144 if fails: 145 print("The following tracks failed to download:") 146 for fail in fails: 147 print(f"{fail[0].title}: {fail[1]}")
Download and save the album tracks and album art.
class
BandRipper:
150class BandRipper: 151 def __init__(self, band_url: str, no_track_number: bool): 152 self.band_url = band_url 153 self.albums = [] 154 for url in self.get_album_urls(band_url): 155 try: 156 self.albums.append(AlbumRipper(url, no_track_number)) 157 except Exception as e: 158 print(e) 159 160 def get_album_urls(self, band_url: str) -> list[str]: 161 """Get album urls from the main bandcamp url.""" 162 print(f"Fetching discography from {band_url}...") 163 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 164 if response.status_code != 200: 165 raise RuntimeError( 166 f"Getting {band_url} failed with status code {response.status_code}." 167 ) 168 soup = BeautifulSoup(response.text, "html.parser") 169 grid = soup.find("ol", attrs={"id": "music-grid"}) 170 parsed_url = urlparse(band_url) 171 base_url = f"https://{parsed_url.netloc}" 172 return [base_url + album.a.get("href") for album in grid.find_all("li")] 173 174 def rip(self): 175 print( 176 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 177 ) 178 timer = Timer() 179 timer.start() 180 fails = [] 181 for album in self.albums: 182 try: 183 album.rip() 184 except Exception as e: 185 fails.append((album, e)) 186 timer.stop() 187 artist = self.albums[0].album.artist 188 elapsed_time = timer.current_elapsed_time() 189 print( 190 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 191 ) 192 if fails: 193 print(f"The following downloads failed:") 194 for fail in fails: 195 print(f"{fail[0]}: {fail[1]}")
def
get_album_urls(self, band_url: str) -> list[str]:
160 def get_album_urls(self, band_url: str) -> list[str]: 161 """Get album urls from the main bandcamp url.""" 162 print(f"Fetching discography from {band_url}...") 163 response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True)) 164 if response.status_code != 200: 165 raise RuntimeError( 166 f"Getting {band_url} failed with status code {response.status_code}." 167 ) 168 soup = BeautifulSoup(response.text, "html.parser") 169 grid = soup.find("ol", attrs={"id": "music-grid"}) 170 parsed_url = urlparse(band_url) 171 base_url = f"https://{parsed_url.netloc}" 172 return [base_url + album.a.get("href") for album in grid.find_all("li")]
Get album urls from the main bandcamp url.
def
rip(self):
174 def rip(self): 175 print( 176 f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}." 177 ) 178 timer = Timer() 179 timer.start() 180 fails = [] 181 for album in self.albums: 182 try: 183 album.rip() 184 except Exception as e: 185 fails.append((album, e)) 186 timer.stop() 187 artist = self.albums[0].album.artist 188 elapsed_time = timer.current_elapsed_time() 189 print( 190 f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}." 191 ) 192 if fails: 193 print(f"The following downloads failed:") 194 for fail in fails: 195 print(f"{fail[0]}: {fail[1]}")
def
get_args() -> argparse.Namespace:
198def get_args() -> argparse.Namespace: 199 parser = argparse.ArgumentParser() 200 201 parser.add_argument( 202 "urls", 203 type=str, 204 nargs="*", 205 help=""" The bandcamp url(s) for the album or artist. 206 If the url is to an artists main page, 207 all albums will be downloaded. 208 The tracks will be saved to a subdirectory of 209 your current directory. 210 If a track can't be streamed (i.e. private) it 211 won't be downloaded. Multiple urls can be passed.""", 212 ) 213 214 parser.add_argument( 215 "-n", 216 "--no_track_number", 217 action="store_true", 218 help=""" By default the track number will be added 219 to the front of the track title. Pass this switch 220 to disable the behavior.""", 221 ) 222 223 args = parser.parse_args() 224 args.urls = [url.strip("/") for url in args.urls] 225 226 return args
def
page_is_discography(url: str) -> bool:
229def page_is_discography(url: str) -> bool: 230 """Returns whether the url is to a discography page or not.""" 231 response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True)) 232 if response.status_code != 200: 233 raise RuntimeError( 234 f"Getting {url} failed with status code {response.status_code}." 235 ) 236 soup = BeautifulSoup(response.text, "html.parser") 237 # Returns None if it doesn't exist. 238 grid = soup.find("ol", attrs={"id": "music-grid"}) 239 if grid: 240 return True 241 return False
Returns whether the url is to a discography page or not.
def
main(args: argparse.Namespace = None):