bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10import whosyouragent
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from printbuddies import ProgBar
 14
 15root = Path(__file__).parent
 16
 17
 18def clean_string(text: str) -> str:
 19    """Remove punctuation and trailing spaces from text."""
 20    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
 21
 22
 23@dataclass
 24class Track:
 25    title: str
 26    number: int
 27    url: str
 28
 29    def __post_init__(self):
 30        self.title = clean_string(self.title)
 31
 32    @property
 33    def numbered_title(self):
 34        num = str(self.number)
 35        if len(num) == 1:
 36            num = "0" + num
 37        return f"{num} - {self.title}"
 38
 39
 40@dataclass
 41class Album:
 42    url: str
 43    artist: str = None
 44    title: str = None
 45    tracks: list[Track] = None
 46    art_url: str = None
 47
 48    def __repr__(self):
 49        return f"{self.title} by {self.artist}"
 50
 51    def __post_init__(self):
 52        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 53        if response.status_code != 200:
 54            raise RuntimeError(
 55                f"Getting album info failed with code {response.status_code}"
 56            )
 57        soup = BeautifulSoup(response.text, "html.parser")
 58        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 59        for script in soup.find_all("script"):
 60            if script.get("data-cart"):
 61                data = script
 62                break
 63        data = json.loads(data.attrs["data-tralbum"])
 64        self.artist = clean_string(data["artist"])
 65        self.title = clean_string(data["current"]["title"])
 66        self.tracks = [
 67            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 68            for track in data["trackinfo"]
 69            if track.get("file")
 70        ]
 71
 72
 73class AlbumRipper:
 74    def __init__(
 75        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 76    ):
 77        """
 78        :param no_track_number: If True, don't add the track
 79        number to the front of the track title."""
 80        self.album = Album(album_url)
 81        self.no_track_number = no_track_number
 82        self.overwrite = overwrite
 83
 84    def make_save_path(self):
 85        self.save_path = Path.cwd() / self.album.artist / self.album.title
 86        self.save_path.mkdir(parents=True, exist_ok=True)
 87
 88    @property
 89    def headers(self) -> dict:
 90        """Get a headers dict with a random useragent."""
 91        return whosyouragent.get_agent(as_dict=True)
 92
 93    def save_track(self, track_title: str, content: bytes) -> Path:
 94        """Save track to self.save_path/{track_title}.mp3.
 95        Returns the Path object for the save location.
 96
 97        :param content: The binary data of the track."""
 98        file_path = self.save_path / f"{track_title}.mp3"
 99        file_path.write_bytes(content)
100        return file_path
101
102    def get_track_content(self, track_url: str) -> bytes:
103        """Make a request to track_url and return the content.
104        Raises a RunTimeError exception if response.status_code != 200."""
105        response = requests.get(track_url, headers=self.headers)
106        if response.status_code != 200:
107            raise RuntimeError(
108                f"Downloading track failed with status code {response.status_code}."
109            )
110        return response.content
111
112    def download_album_art(self):
113        """Download the album art and save as a .jpg."""
114        file_path = self.save_path / f"{self.album.title}.jpg"
115        try:
116            response = requests.get(self.album.art_url, headers=self.headers)
117            file_path.write_bytes(response.content)
118        except Exception as e:
119            print(f"Failed to download art for {self.album}.")
120            print(e)
121
122    def track_exists(self, track: Track) -> bool:
123        """Return if a track already exists in self.save_path."""
124        path = self.save_path / (
125            track.title if self.no_track_number else track.numbered_title
126        )
127        return path.with_suffix(".mp3").exists()
128
129    def rip(self):
130        """Download and save the album tracks and album art."""
131        if len(self.album.tracks) == 0:
132            print(f"No public tracks available for {self.album}.")
133            return None
134        self.make_save_path()
135        self.download_album_art()
136        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
137        fails = []
138        if not self.overwrite:
139            self.album.tracks = [
140                track for track in self.album.tracks if not self.track_exists(track)
141            ]
142        for track in self.album.tracks:
143            bar.display(
144                suffix=f"Downloading {track.title}",
145                counter_override=1 if len(self.album.tracks) == 1 else None,
146            )
147            try:
148                content = self.get_track_content(track.url)
149                self.save_track(
150                    track.title if self.no_track_number else track.numbered_title,
151                    content,
152                )
153            except Exception as e:
154                fails.append((track, str(e)))
155        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
156        if fails:
157            print("The following tracks failed to download:")
158            for fail in fails:
159                print(f"{fail[0].title}: {fail[1]}")
160
161
162class BandRipper:
163    def __init__(
164        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
165    ):
166        self.band_url = band_url
167        self.albums = []
168        for url in self.get_album_urls(band_url):
169            try:
170                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
171            except Exception as e:
172                print(e)
173
174    def get_album_urls(self, band_url: str) -> list[str]:
175        """Get album urls from the main bandcamp url."""
176        print(f"Fetching discography from {band_url}...")
177        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
178        if response.status_code != 200:
179            raise RuntimeError(
180                f"Getting {band_url} failed with status code {response.status_code}."
181            )
182        soup = BeautifulSoup(response.text, "html.parser")
183        grid = soup.find("ol", attrs={"id": "music-grid"})
184        parsed_url = urlparse(band_url)
185        base_url = f"https://{parsed_url.netloc}"
186        return [base_url + album.a.get("href") for album in grid.find_all("li")]
187
188    def rip(self):
189        print(
190            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
191        )
192        timer = Timer(subsecond_resolution=True)
193        timer.start()
194        fails = []
195        for album in self.albums:
196            try:
197                album.rip()
198            except Exception as e:
199                fails.append((album, e))
200        timer.stop()
201        artist = self.albums[0].album.artist
202        print(
203            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
204        )
205        if fails:
206            print(f"The following downloads failed:")
207            for fail in fails:
208                print(f"{fail[0]}: {fail[1]}")
209
210
211def page_is_discography(url: str) -> bool:
212    """Returns whether the url is to a discography page or not."""
213    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
214    if response.status_code != 200:
215        raise RuntimeError(
216            f"Getting {url} failed with status code {response.status_code}."
217        )
218    soup = BeautifulSoup(response.text, "html.parser")
219    # Returns None if it doesn't exist.
220    grid = soup.find("ol", attrs={"id": "music-grid"})
221    if grid:
222        return True
223    return False
224
225
226def get_args() -> argparse.Namespace:
227    parser = argparse.ArgumentParser()
228
229    parser.add_argument(
230        "urls",
231        type=str,
232        nargs="*",
233        help=""" The bandcamp url(s) for the album or artist.
234            If the url is to an artists main page,
235            all albums will be downloaded.
236            The tracks will be saved to a subdirectory of
237            your current directory.
238            If a track can't be streamed (i.e. private) it
239            won't be downloaded. Multiple urls can be passed.""",
240    )
241
242    parser.add_argument(
243        "-n",
244        "--no_track_number",
245        action="store_true",
246        help=""" By default the track number will be added
247        to the front of the track title. Pass this switch
248        to disable the behavior.""",
249    )
250
251    parser.add_argument(
252        "-o",
253        "--overwrite",
254        action="store_true",
255        help=""" Pass this flag to overwrite existing files.
256        Otherwise don't download tracks that already exist locally.""",
257    )
258
259    args = parser.parse_args()
260    args.urls = [url.strip("/") for url in args.urls]
261
262    return args
263
264
265def main(args: argparse.Namespace = None):
266    if not args:
267        args = get_args()
268    for url in args.urls:
269        if page_is_discography(url):
270            ripper = BandRipper(url, args.no_track_number, args.overwrite)
271        else:
272            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
273        ripper.rip()
274
275
276if __name__ == "__main__":
277    main(get_args())
def clean_string(text: str) -> str:
19def clean_string(text: str) -> str:
20    """Remove punctuation and trailing spaces from text."""
21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()

Remove punctuation and trailing spaces from text.

@dataclass
class Track:
24@dataclass
25class Track:
26    title: str
27    number: int
28    url: str
29
30    def __post_init__(self):
31        self.title = clean_string(self.title)
32
33    @property
34    def numbered_title(self):
35        num = str(self.number)
36        if len(num) == 1:
37            num = "0" + num
38        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
41@dataclass
42class Album:
43    url: str
44    artist: str = None
45    title: str = None
46    tracks: list[Track] = None
47    art_url: str = None
48
49    def __repr__(self):
50        return f"{self.title} by {self.artist}"
51
52    def __post_init__(self):
53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
54        if response.status_code != 200:
55            raise RuntimeError(
56                f"Getting album info failed with code {response.status_code}"
57            )
58        soup = BeautifulSoup(response.text, "html.parser")
59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
60        for script in soup.find_all("script"):
61            if script.get("data-cart"):
62                data = script
63                break
64        data = json.loads(data.attrs["data-tralbum"])
65        self.artist = clean_string(data["artist"])
66        self.title = clean_string(data["current"]["title"])
67        self.tracks = [
68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
69            for track in data["trackinfo"]
70            if track.get("file")
71        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 74class AlbumRipper:
 75    def __init__(
 76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 77    ):
 78        """
 79        :param no_track_number: If True, don't add the track
 80        number to the front of the track title."""
 81        self.album = Album(album_url)
 82        self.no_track_number = no_track_number
 83        self.overwrite = overwrite
 84
 85    def make_save_path(self):
 86        self.save_path = Path.cwd() / self.album.artist / self.album.title
 87        self.save_path.mkdir(parents=True, exist_ok=True)
 88
 89    @property
 90    def headers(self) -> dict:
 91        """Get a headers dict with a random useragent."""
 92        return whosyouragent.get_agent(as_dict=True)
 93
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path
102
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content
112
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)
122
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()
129
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for track in self.album.tracks:
144            bar.display(
145                suffix=f"Downloading {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
157        if fails:
158            print("The following tracks failed to download:")
159            for fail in fails:
160                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
75    def __init__(
76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
77    ):
78        """
79        :param no_track_number: If True, don't add the track
80        number to the front of the track title."""
81        self.album = Album(album_url)
82        self.no_track_number = no_track_number
83        self.overwrite = overwrite
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
85    def make_save_path(self):
86        self.save_path = Path.cwd() / self.album.artist / self.album.title
87        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)

Download the album art and save as a .jpg.

def track_exists(self, track: bandripper.bandripper.Track) -> bool:
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()

Return if a track already exists in self.save_path.

def rip(self):
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for track in self.album.tracks:
144            bar.display(
145                suffix=f"Downloading {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
157        if fails:
158            print("The following tracks failed to download:")
159            for fail in fails:
160                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
163class BandRipper:
164    def __init__(
165        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
166    ):
167        self.band_url = band_url
168        self.albums = []
169        for url in self.get_album_urls(band_url):
170            try:
171                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
172            except Exception as e:
173                print(e)
174
175    def get_album_urls(self, band_url: str) -> list[str]:
176        """Get album urls from the main bandcamp url."""
177        print(f"Fetching discography from {band_url}...")
178        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
179        if response.status_code != 200:
180            raise RuntimeError(
181                f"Getting {band_url} failed with status code {response.status_code}."
182            )
183        soup = BeautifulSoup(response.text, "html.parser")
184        grid = soup.find("ol", attrs={"id": "music-grid"})
185        parsed_url = urlparse(band_url)
186        base_url = f"https://{parsed_url.netloc}"
187        return [base_url + album.a.get("href") for album in grid.find_all("li")]
188
189    def rip(self):
190        print(
191            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
192        )
193        timer = Timer(subsecond_resolution=True)
194        timer.start()
195        fails = []
196        for album in self.albums:
197            try:
198                album.rip()
199            except Exception as e:
200                fails.append((album, e))
201        timer.stop()
202        artist = self.albums[0].album.artist
203        print(
204            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
205        )
206        if fails:
207            print(f"The following downloads failed:")
208            for fail in fails:
209                print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
164    def __init__(
165        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
166    ):
167        self.band_url = band_url
168        self.albums = []
169        for url in self.get_album_urls(band_url):
170            try:
171                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
172            except Exception as e:
173                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
175    def get_album_urls(self, band_url: str) -> list[str]:
176        """Get album urls from the main bandcamp url."""
177        print(f"Fetching discography from {band_url}...")
178        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
179        if response.status_code != 200:
180            raise RuntimeError(
181                f"Getting {band_url} failed with status code {response.status_code}."
182            )
183        soup = BeautifulSoup(response.text, "html.parser")
184        grid = soup.find("ol", attrs={"id": "music-grid"})
185        parsed_url = urlparse(band_url)
186        base_url = f"https://{parsed_url.netloc}"
187        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
189    def rip(self):
190        print(
191            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
192        )
193        timer = Timer(subsecond_resolution=True)
194        timer.start()
195        fails = []
196        for album in self.albums:
197            try:
198                album.rip()
199            except Exception as e:
200                fails.append((album, e))
201        timer.stop()
202        artist = self.albums[0].album.artist
203        print(
204            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
205        )
206        if fails:
207            print(f"The following downloads failed:")
208            for fail in fails:
209                print(f"{fail[0]}: {fail[1]}")
def page_is_discography(url: str) -> bool:
212def page_is_discography(url: str) -> bool:
213    """Returns whether the url is to a discography page or not."""
214    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
215    if response.status_code != 200:
216        raise RuntimeError(
217            f"Getting {url} failed with status code {response.status_code}."
218        )
219    soup = BeautifulSoup(response.text, "html.parser")
220    # Returns None if it doesn't exist.
221    grid = soup.find("ol", attrs={"id": "music-grid"})
222    if grid:
223        return True
224    return False

Returns whether the url is to a discography page or not.

def get_args() -> argparse.Namespace:
227def get_args() -> argparse.Namespace:
228    parser = argparse.ArgumentParser()
229
230    parser.add_argument(
231        "urls",
232        type=str,
233        nargs="*",
234        help=""" The bandcamp url(s) for the album or artist.
235            If the url is to an artists main page,
236            all albums will be downloaded.
237            The tracks will be saved to a subdirectory of
238            your current directory.
239            If a track can't be streamed (i.e. private) it
240            won't be downloaded. Multiple urls can be passed.""",
241    )
242
243    parser.add_argument(
244        "-n",
245        "--no_track_number",
246        action="store_true",
247        help=""" By default the track number will be added
248        to the front of the track title. Pass this switch
249        to disable the behavior.""",
250    )
251
252    parser.add_argument(
253        "-o",
254        "--overwrite",
255        action="store_true",
256        help=""" Pass this flag to overwrite existing files.
257        Otherwise don't download tracks that already exist locally.""",
258    )
259
260    args = parser.parse_args()
261    args.urls = [url.strip("/") for url in args.urls]
262
263    return args
def main(args: argparse.Namespace = None):
266def main(args: argparse.Namespace = None):
267    if not args:
268        args = get_args()
269    for url in args.urls:
270        if page_is_discography(url):
271            ripper = BandRipper(url, args.no_track_number, args.overwrite)
272        else:
273            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
274        ripper.rip()