bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10from bs4 import BeautifulSoup
 11
 12import whosyouragent
 13from noiftimer import Timer
 14from printbuddies import ProgBar
 15
 16root = Path(__file__).parent
 17
 18
 19def clean_string(text: str) -> str:
 20    """Remove punctuation and trailing spaces from text."""
 21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
 22
 23
 24@dataclass
 25class Track:
 26    title: str
 27    number: int
 28    url: str
 29
 30    def __post_init__(self):
 31        self.title = clean_string(self.title)
 32
 33    @property
 34    def numbered_title(self):
 35        num = str(self.number)
 36        if len(num) == 1:
 37            num = "0" + num
 38        return f"{num} - {self.title}"
 39
 40
 41@dataclass
 42class Album:
 43    url: str
 44    artist: str = None
 45    title: str = None
 46    tracks: list[Track] = None
 47    art_url: str = None
 48
 49    def __repr__(self):
 50        return f"{self.title} by {self.artist}"
 51
 52    def __post_init__(self):
 53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 54        if response.status_code != 200:
 55            raise RuntimeError(
 56                f"Getting album info failed with code {response.status_code}"
 57            )
 58        soup = BeautifulSoup(response.text, "html.parser")
 59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 60        for script in soup.find_all("script"):
 61            if script.get("data-cart"):
 62                data = script
 63                break
 64        data = json.loads(data.attrs["data-tralbum"])
 65        self.artist = clean_string(data["artist"])
 66        self.title = clean_string(data["current"]["title"])
 67        self.tracks = [
 68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 69            for track in data["trackinfo"]
 70            if track.get("file")
 71        ]
 72
 73
 74class AlbumRipper:
 75    def __init__(
 76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 77    ):
 78        """
 79        :param no_track_number: If True, don't add the track
 80        number to the front of the track title."""
 81        self.album = Album(album_url)
 82        self.no_track_number = no_track_number
 83        self.overwrite = overwrite
 84
 85    def make_save_path(self):
 86        self.save_path = Path.cwd() / self.album.artist / self.album.title
 87        self.save_path.mkdir(parents=True, exist_ok=True)
 88
 89    @property
 90    def headers(self) -> dict:
 91        """Get a headers dict with a random useragent."""
 92        return whosyouragent.get_agent(as_dict=True)
 93
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path
102
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content
112
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)
122
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()
129
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        fails = []
139        if not self.overwrite:
140            self.album.tracks = [
141                track for track in self.album.tracks if not self.track_exists(track)
142            ]
143        for track in self.album.tracks:
144            bar.display(
145                suffix=f"Downloading {track.title}",
146                counter_override=1 if len(self.album.tracks) == 1 else None,
147            )
148            try:
149                content = self.get_track_content(track.url)
150                self.save_track(
151                    track.title if self.no_track_number else track.numbered_title,
152                    content,
153                )
154            except Exception as e:
155                fails.append((track, str(e)))
156        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
157        print(f"Finished downloading {self.album} in {elapsed_time}.")
158        if fails:
159            print("The following tracks failed to download:")
160            for fail in fails:
161                print(f"{fail[0].title}: {fail[1]}")
162
163
164class BandRipper:
165    def __init__(
166        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
167    ):
168        self.band_url = band_url
169        self.albums = []
170        for url in self.get_album_urls(band_url):
171            try:
172                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
173            except Exception as e:
174                print(e)
175
176    def get_album_urls(self, band_url: str) -> list[str]:
177        """Get album urls from the main bandcamp url."""
178        print(f"Fetching discography from {band_url}...")
179        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
180        if response.status_code != 200:
181            raise RuntimeError(
182                f"Getting {band_url} failed with status code {response.status_code}."
183            )
184        soup = BeautifulSoup(response.text, "html.parser")
185        grid = soup.find("ol", attrs={"id": "music-grid"})
186        parsed_url = urlparse(band_url)
187        base_url = f"https://{parsed_url.netloc}"
188        return [base_url + album.a.get("href") for album in grid.find_all("li")]
189
190    def rip(self):
191        print(
192            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
193        )
194        timer = Timer()
195        timer.start()
196        fails = []
197        for album in self.albums:
198            try:
199                album.rip()
200            except Exception as e:
201                fails.append((album, e))
202        timer.stop()
203        artist = self.albums[0].album.artist
204        elapsed_time = timer.current_elapsed_time()
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
212
213
214def page_is_discography(url: str) -> bool:
215    """Returns whether the url is to a discography page or not."""
216    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
217    if response.status_code != 200:
218        raise RuntimeError(
219            f"Getting {url} failed with status code {response.status_code}."
220        )
221    soup = BeautifulSoup(response.text, "html.parser")
222    # Returns None if it doesn't exist.
223    grid = soup.find("ol", attrs={"id": "music-grid"})
224    if grid:
225        return True
226    return False
227
228
229def get_args() -> argparse.Namespace:
230    parser = argparse.ArgumentParser()
231
232    parser.add_argument(
233        "urls",
234        type=str,
235        nargs="*",
236        help=""" The bandcamp url(s) for the album or artist.
237            If the url is to an artists main page,
238            all albums will be downloaded.
239            The tracks will be saved to a subdirectory of
240            your current directory.
241            If a track can't be streamed (i.e. private) it
242            won't be downloaded. Multiple urls can be passed.""",
243    )
244
245    parser.add_argument(
246        "-n",
247        "--no_track_number",
248        action="store_true",
249        help=""" By default the track number will be added
250        to the front of the track title. Pass this switch
251        to disable the behavior.""",
252    )
253
254    parser.add_argument(
255        "-o",
256        "--overwrite",
257        action="store_true",
258        help=""" Pass this flag to overwrite existing files.
259        Otherwise don't download tracks that already exist locally.""",
260    )
261
262    args = parser.parse_args()
263    args.urls = [url.strip("/") for url in args.urls]
264
265    return args
266
267
268def main(args: argparse.Namespace = None):
269    if not args:
270        args = get_args()
271    for url in args.urls:
272        if page_is_discography(url):
273            ripper = BandRipper(url, args.no_track_number)
274        else:
275            ripper = AlbumRipper(url, args.no_track_number)
276        ripper.rip()
277
278
279if __name__ == "__main__":
280    main(get_args())
def clean_string(text: str) -> str:
20def clean_string(text: str) -> str:
21    """Remove punctuation and trailing spaces from text."""
22    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()

Remove punctuation and trailing spaces from text.

@dataclass
class Track:
25@dataclass
26class Track:
27    title: str
28    number: int
29    url: str
30
31    def __post_init__(self):
32        self.title = clean_string(self.title)
33
34    @property
35    def numbered_title(self):
36        num = str(self.number)
37        if len(num) == 1:
38            num = "0" + num
39        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
42@dataclass
43class Album:
44    url: str
45    artist: str = None
46    title: str = None
47    tracks: list[Track] = None
48    art_url: str = None
49
50    def __repr__(self):
51        return f"{self.title} by {self.artist}"
52
53    def __post_init__(self):
54        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
55        if response.status_code != 200:
56            raise RuntimeError(
57                f"Getting album info failed with code {response.status_code}"
58            )
59        soup = BeautifulSoup(response.text, "html.parser")
60        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
61        for script in soup.find_all("script"):
62            if script.get("data-cart"):
63                data = script
64                break
65        data = json.loads(data.attrs["data-tralbum"])
66        self.artist = clean_string(data["artist"])
67        self.title = clean_string(data["current"]["title"])
68        self.tracks = [
69            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
70            for track in data["trackinfo"]
71            if track.get("file")
72        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 75class AlbumRipper:
 76    def __init__(
 77        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 78    ):
 79        """
 80        :param no_track_number: If True, don't add the track
 81        number to the front of the track title."""
 82        self.album = Album(album_url)
 83        self.no_track_number = no_track_number
 84        self.overwrite = overwrite
 85
 86    def make_save_path(self):
 87        self.save_path = Path.cwd() / self.album.artist / self.album.title
 88        self.save_path.mkdir(parents=True, exist_ok=True)
 89
 90    @property
 91    def headers(self) -> dict:
 92        """Get a headers dict with a random useragent."""
 93        return whosyouragent.get_agent(as_dict=True)
 94
 95    def save_track(self, track_title: str, content: bytes) -> Path:
 96        """Save track to self.save_path/{track_title}.mp3.
 97        Returns the Path object for the save location.
 98
 99        :param content: The binary data of the track."""
100        file_path = self.save_path / f"{track_title}.mp3"
101        file_path.write_bytes(content)
102        return file_path
103
104    def get_track_content(self, track_url: str) -> bytes:
105        """Make a request to track_url and return the content.
106        Raises a RunTimeError exception if response.status_code != 200."""
107        response = requests.get(track_url, headers=self.headers)
108        if response.status_code != 200:
109            raise RuntimeError(
110                f"Downloading track failed with status code {response.status_code}."
111            )
112        return response.content
113
114    def download_album_art(self):
115        """Download the album art and save as a .jpg."""
116        file_path = self.save_path / f"{self.album.title}.jpg"
117        try:
118            response = requests.get(self.album.art_url, headers=self.headers)
119            file_path.write_bytes(response.content)
120        except Exception as e:
121            print(f"Failed to download art for {self.album}.")
122            print(e)
123
124    def track_exists(self, track: Track) -> bool:
125        """Return if a track already exists in self.save_path."""
126        path = self.save_path / (
127            track.title if self.no_track_number else track.numbered_title
128        )
129        return path.with_suffix(".mp3").exists()
130
131    def rip(self):
132        """Download and save the album tracks and album art."""
133        if len(self.album.tracks) == 0:
134            print(f"No public tracks available for {self.album}.")
135            return None
136        self.make_save_path()
137        self.download_album_art()
138        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
139        fails = []
140        if not self.overwrite:
141            self.album.tracks = [
142                track for track in self.album.tracks if not self.track_exists(track)
143            ]
144        for track in self.album.tracks:
145            bar.display(
146                suffix=f"Downloading {track.title}",
147                counter_override=1 if len(self.album.tracks) == 1 else None,
148            )
149            try:
150                content = self.get_track_content(track.url)
151                self.save_track(
152                    track.title if self.no_track_number else track.numbered_title,
153                    content,
154                )
155            except Exception as e:
156                fails.append((track, str(e)))
157        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
158        print(f"Finished downloading {self.album} in {elapsed_time}.")
159        if fails:
160            print("The following tracks failed to download:")
161            for fail in fails:
162                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
76    def __init__(
77        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
78    ):
79        """
80        :param no_track_number: If True, don't add the track
81        number to the front of the track title."""
82        self.album = Album(album_url)
83        self.no_track_number = no_track_number
84        self.overwrite = overwrite
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
86    def make_save_path(self):
87        self.save_path = Path.cwd() / self.album.artist / self.album.title
88        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
 95    def save_track(self, track_title: str, content: bytes) -> Path:
 96        """Save track to self.save_path/{track_title}.mp3.
 97        Returns the Path object for the save location.
 98
 99        :param content: The binary data of the track."""
100        file_path = self.save_path / f"{track_title}.mp3"
101        file_path.write_bytes(content)
102        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
104    def get_track_content(self, track_url: str) -> bytes:
105        """Make a request to track_url and return the content.
106        Raises a RunTimeError exception if response.status_code != 200."""
107        response = requests.get(track_url, headers=self.headers)
108        if response.status_code != 200:
109            raise RuntimeError(
110                f"Downloading track failed with status code {response.status_code}."
111            )
112        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
114    def download_album_art(self):
115        """Download the album art and save as a .jpg."""
116        file_path = self.save_path / f"{self.album.title}.jpg"
117        try:
118            response = requests.get(self.album.art_url, headers=self.headers)
119            file_path.write_bytes(response.content)
120        except Exception as e:
121            print(f"Failed to download art for {self.album}.")
122            print(e)

Download the album art and save as a .jpg.

def track_exists(self, track: bandripper.bandripper.Track) -> bool:
124    def track_exists(self, track: Track) -> bool:
125        """Return if a track already exists in self.save_path."""
126        path = self.save_path / (
127            track.title if self.no_track_number else track.numbered_title
128        )
129        return path.with_suffix(".mp3").exists()

Return if a track already exists in self.save_path.

def rip(self):
131    def rip(self):
132        """Download and save the album tracks and album art."""
133        if len(self.album.tracks) == 0:
134            print(f"No public tracks available for {self.album}.")
135            return None
136        self.make_save_path()
137        self.download_album_art()
138        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
139        fails = []
140        if not self.overwrite:
141            self.album.tracks = [
142                track for track in self.album.tracks if not self.track_exists(track)
143            ]
144        for track in self.album.tracks:
145            bar.display(
146                suffix=f"Downloading {track.title}",
147                counter_override=1 if len(self.album.tracks) == 1 else None,
148            )
149            try:
150                content = self.get_track_content(track.url)
151                self.save_track(
152                    track.title if self.no_track_number else track.numbered_title,
153                    content,
154                )
155            except Exception as e:
156                fails.append((track, str(e)))
157        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
158        print(f"Finished downloading {self.album} in {elapsed_time}.")
159        if fails:
160            print("The following tracks failed to download:")
161            for fail in fails:
162                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
165class BandRipper:
166    def __init__(
167        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
168    ):
169        self.band_url = band_url
170        self.albums = []
171        for url in self.get_album_urls(band_url):
172            try:
173                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
174            except Exception as e:
175                print(e)
176
177    def get_album_urls(self, band_url: str) -> list[str]:
178        """Get album urls from the main bandcamp url."""
179        print(f"Fetching discography from {band_url}...")
180        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
181        if response.status_code != 200:
182            raise RuntimeError(
183                f"Getting {band_url} failed with status code {response.status_code}."
184            )
185        soup = BeautifulSoup(response.text, "html.parser")
186        grid = soup.find("ol", attrs={"id": "music-grid"})
187        parsed_url = urlparse(band_url)
188        base_url = f"https://{parsed_url.netloc}"
189        return [base_url + album.a.get("href") for album in grid.find_all("li")]
190
191    def rip(self):
192        print(
193            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
194        )
195        timer = Timer()
196        timer.start()
197        fails = []
198        for album in self.albums:
199            try:
200                album.rip()
201            except Exception as e:
202                fails.append((album, e))
203        timer.stop()
204        artist = self.albums[0].album.artist
205        elapsed_time = timer.current_elapsed_time()
206        print(
207            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
208        )
209        if fails:
210            print(f"The following downloads failed:")
211            for fail in fails:
212                print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
166    def __init__(
167        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
168    ):
169        self.band_url = band_url
170        self.albums = []
171        for url in self.get_album_urls(band_url):
172            try:
173                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
174            except Exception as e:
175                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
177    def get_album_urls(self, band_url: str) -> list[str]:
178        """Get album urls from the main bandcamp url."""
179        print(f"Fetching discography from {band_url}...")
180        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
181        if response.status_code != 200:
182            raise RuntimeError(
183                f"Getting {band_url} failed with status code {response.status_code}."
184            )
185        soup = BeautifulSoup(response.text, "html.parser")
186        grid = soup.find("ol", attrs={"id": "music-grid"})
187        parsed_url = urlparse(band_url)
188        base_url = f"https://{parsed_url.netloc}"
189        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
191    def rip(self):
192        print(
193            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
194        )
195        timer = Timer()
196        timer.start()
197        fails = []
198        for album in self.albums:
199            try:
200                album.rip()
201            except Exception as e:
202                fails.append((album, e))
203        timer.stop()
204        artist = self.albums[0].album.artist
205        elapsed_time = timer.current_elapsed_time()
206        print(
207            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
208        )
209        if fails:
210            print(f"The following downloads failed:")
211            for fail in fails:
212                print(f"{fail[0]}: {fail[1]}")
def page_is_discography(url: str) -> bool:
215def page_is_discography(url: str) -> bool:
216    """Returns whether the url is to a discography page or not."""
217    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
218    if response.status_code != 200:
219        raise RuntimeError(
220            f"Getting {url} failed with status code {response.status_code}."
221        )
222    soup = BeautifulSoup(response.text, "html.parser")
223    # Returns None if it doesn't exist.
224    grid = soup.find("ol", attrs={"id": "music-grid"})
225    if grid:
226        return True
227    return False

Returns whether the url is to a discography page or not.

def get_args() -> argparse.Namespace:
230def get_args() -> argparse.Namespace:
231    parser = argparse.ArgumentParser()
232
233    parser.add_argument(
234        "urls",
235        type=str,
236        nargs="*",
237        help=""" The bandcamp url(s) for the album or artist.
238            If the url is to an artists main page,
239            all albums will be downloaded.
240            The tracks will be saved to a subdirectory of
241            your current directory.
242            If a track can't be streamed (i.e. private) it
243            won't be downloaded. Multiple urls can be passed.""",
244    )
245
246    parser.add_argument(
247        "-n",
248        "--no_track_number",
249        action="store_true",
250        help=""" By default the track number will be added
251        to the front of the track title. Pass this switch
252        to disable the behavior.""",
253    )
254
255    parser.add_argument(
256        "-o",
257        "--overwrite",
258        action="store_true",
259        help=""" Pass this flag to overwrite existing files.
260        Otherwise don't download tracks that already exist locally.""",
261    )
262
263    args = parser.parse_args()
264    args.urls = [url.strip("/") for url in args.urls]
265
266    return args
def main(args: argparse.Namespace = None):
269def main(args: argparse.Namespace = None):
270    if not args:
271        args = get_args()
272    for url in args.urls:
273        if page_is_discography(url):
274            ripper = BandRipper(url, args.no_track_number)
275        else:
276            ripper = AlbumRipper(url, args.no_track_number)
277        ripper.rip()