bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10import whosyouragent
 11from bs4 import BeautifulSoup
 12from noiftimer import Timer
 13from printbuddies import ProgBar
 14
 15root = Path(__file__).parent
 16
 17
 18def clean_string(text: str) -> str:
 19    """Remove punctuation from text."""
 20    return re.sub(f"[{re.escape(string.punctuation)}]", "", text)
 21
 22
 23@dataclass
 24class Track:
 25    title: str
 26    number: int
 27    url: str
 28
 29    def __post_init__(self):
 30        self.title = clean_string(self.title)
 31
 32    @property
 33    def numbered_title(self):
 34        num = str(self.number)
 35        if len(num) == 1:
 36            num = "0" + num
 37        return f"{num} - {self.title}"
 38
 39
 40@dataclass
 41class Album:
 42    url: str
 43    artist: str = None
 44    title: str = None
 45    tracks: list[Track] = None
 46    art_url: str = None
 47
 48    def __repr__(self):
 49        return f"{self.title} by {self.artist}"
 50
 51    def __post_init__(self):
 52        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 53        if response.status_code != 200:
 54            raise RuntimeError(
 55                f"Getting album info failed with code {response.status_code}"
 56            )
 57        soup = BeautifulSoup(response.text, "html.parser")
 58        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 59        for script in soup.find_all("script"):
 60            if script.get("data-cart"):
 61                data = script
 62                break
 63        data = json.loads(data.attrs["data-tralbum"])
 64        self.artist = clean_string(data["artist"])
 65        self.title = clean_string(data["current"]["title"])
 66        self.tracks = [
 67            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 68            for track in data["trackinfo"]
 69            if track.get("file")
 70        ]
 71
 72
 73class AlbumRipper:
 74    def __init__(self, album_url: str, no_track_number: bool):
 75        """
 76        :param no_track_number: If True, don't add the track
 77        number to the front of the track title."""
 78        self.album = Album(album_url)
 79        self.no_track_number = no_track_number
 80
 81    def make_save_path(self):
 82        self.save_path = Path.cwd() / self.album.artist / self.album.title
 83        self.save_path.mkdir(parents=True, exist_ok=True)
 84
 85    @property
 86    def headers(self) -> dict:
 87        """Get a headers dict with a random useragent."""
 88        return whosyouragent.get_agent(as_dict=True)
 89
 90    def save_track(self, track_title: str, content: bytes) -> Path:
 91        """Save track to self.save_path/{track_title}.mp3.
 92        Returns the Path object for the save location.
 93
 94        :param content: The binary data of the track."""
 95        file_path = self.save_path / f"{track_title}.mp3"
 96        file_path.write_bytes(content)
 97        return file_path
 98
 99    def get_track_content(self, track_url: str) -> bytes:
100        """Make a request to track_url and return the content.
101        Raises a RunTimeError exception if response.status_code != 200."""
102        response = requests.get(track_url, headers=self.headers)
103        if response.status_code != 200:
104            raise RuntimeError(
105                f"Downloading track failed with status code {response.status_code}."
106            )
107        return response.content
108
109    def download_album_art(self):
110        """Download the album art and save as a .jpg."""
111        file_path = self.save_path / f"{self.album.title}.jpg"
112        try:
113            response = requests.get(self.album.art_url, headers=self.headers)
114            file_path.write_bytes(response.content)
115        except Exception as e:
116            print(f"Failed to download art for {self.album}.")
117            print(e)
118
119    def rip(self):
120        """Download and save the album tracks and album art."""
121        if len(self.album.tracks) == 0:
122            print(f"No public tracks available for {self.album}.")
123            return None
124        self.make_save_path()
125        self.download_album_art()
126        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
127        fails = []
128        for track in self.album.tracks:
129            bar.display(
130                suffix=f"Downloading {track.title}",
131                counter_override=1 if len(self.album.tracks) == 1 else None,
132            )
133            try:
134                content = self.get_track_content(track.url)
135                self.save_track(
136                    track.title if self.no_track_number else track.numbered_title,
137                    content,
138                )
139            except Exception as e:
140                fails.append((track, str(e)))
141        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
142        print(f"Finished downloading {self.album} in {elapsed_time}.")
143        if fails:
144            print("The following tracks failed to download:")
145            for fail in fails:
146                print(f"{fail[0].title}: {fail[1]}")
147
148
149class BandRipper:
150    def __init__(self, band_url: str, no_track_number: bool):
151        self.band_url = band_url
152        self.albums = []
153        for url in self.get_album_urls(band_url):
154            try:
155                self.albums.append(AlbumRipper(url, no_track_number))
156            except Exception as e:
157                print(e)
158
159    def get_album_urls(self, band_url: str) -> list[str]:
160        """Get album urls from the main bandcamp url."""
161        print(f"Fetching discography from {band_url}...")
162        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
163        if response.status_code != 200:
164            raise RuntimeError(
165                f"Getting {band_url} failed with status code {response.status_code}."
166            )
167        soup = BeautifulSoup(response.text, "html.parser")
168        grid = soup.find("ol", attrs={"id": "music-grid"})
169        parsed_url = urlparse(band_url)
170        base_url = f"https://{parsed_url.netloc}"
171        return [base_url + album.a.get("href") for album in grid.find_all("li")]
172
173    def rip(self):
174        print(
175            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
176        )
177        timer = Timer()
178        timer.start()
179        fails = []
180        for album in self.albums:
181            try:
182                album.rip()
183            except Exception as e:
184                fails.append((album, e))
185        timer.stop()
186        artist = self.albums[0].album.artist
187        elapsed_time = timer.current_elapsed_time()
188        print(
189            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
190        )
191        if fails:
192            print(f"The following downloads failed:")
193            for fail in fails:
194                print(f"{fail[0]}: {fail[1]}")
195
196
197def get_args() -> argparse.Namespace:
198    parser = argparse.ArgumentParser()
199
200    parser.add_argument(
201        "urls",
202        type=str,
203        nargs="*",
204        help=""" The bandcamp url(s) for the album or artist.
205            If the url is to an artists main page,
206            all albums will be downloaded.
207            The tracks will be saved to a subdirectory of
208            your current directory.
209            If a track can't be streamed (i.e. private) it
210            won't be downloaded. Multiple urls can be passed.""",
211    )
212
213    parser.add_argument(
214        "-n",
215        "--no_track_number",
216        action="store_true",
217        help=""" By default the track number will be added
218        to the front of the track title. Pass this switch
219        to disable the behavior.""",
220    )
221
222    args = parser.parse_args()
223    args.urls = [url.strip("/") for url in args.urls]
224
225    return args
226
227
228def page_is_discography(url: str) -> bool:
229    """Returns whether the url is to a discography page or not."""
230    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
231    if response.status_code != 200:
232        raise RuntimeError(
233            f"Getting {url} failed with status code {response.status_code}."
234        )
235    soup = BeautifulSoup(response.text, "html.parser")
236    # Returns None if it doesn't exist.
237    grid = soup.find("ol", attrs={"id": "music-grid"})
238    if grid:
239        return True
240    return False
241
242
243def main(args: argparse.Namespace = None):
244    if not args:
245        args = get_args()
246    for url in args.urls:
247        if page_is_discography(url):
248            ripper = BandRipper(url, args.no_track_number)
249        else:
250            ripper = AlbumRipper(url, args.no_track_number)
251        ripper.rip()
252
253
254if __name__ == "__main__":
255    main(get_args())
def clean_string(text: str) -> str:
19def clean_string(text: str) -> str:
20    """Remove punctuation from text."""
21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text)

Remove punctuation from text.

@dataclass
class Track:
24@dataclass
25class Track:
26    title: str
27    number: int
28    url: str
29
30    def __post_init__(self):
31        self.title = clean_string(self.title)
32
33    @property
34    def numbered_title(self):
35        num = str(self.number)
36        if len(num) == 1:
37            num = "0" + num
38        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
41@dataclass
42class Album:
43    url: str
44    artist: str = None
45    title: str = None
46    tracks: list[Track] = None
47    art_url: str = None
48
49    def __repr__(self):
50        return f"{self.title} by {self.artist}"
51
52    def __post_init__(self):
53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
54        if response.status_code != 200:
55            raise RuntimeError(
56                f"Getting album info failed with code {response.status_code}"
57            )
58        soup = BeautifulSoup(response.text, "html.parser")
59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
60        for script in soup.find_all("script"):
61            if script.get("data-cart"):
62                data = script
63                break
64        data = json.loads(data.attrs["data-tralbum"])
65        self.artist = clean_string(data["artist"])
66        self.title = clean_string(data["current"]["title"])
67        self.tracks = [
68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
69            for track in data["trackinfo"]
70            if track.get("file")
71        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 74class AlbumRipper:
 75    def __init__(self, album_url: str, no_track_number: bool):
 76        """
 77        :param no_track_number: If True, don't add the track
 78        number to the front of the track title."""
 79        self.album = Album(album_url)
 80        self.no_track_number = no_track_number
 81
 82    def make_save_path(self):
 83        self.save_path = Path.cwd() / self.album.artist / self.album.title
 84        self.save_path.mkdir(parents=True, exist_ok=True)
 85
 86    @property
 87    def headers(self) -> dict:
 88        """Get a headers dict with a random useragent."""
 89        return whosyouragent.get_agent(as_dict=True)
 90
 91    def save_track(self, track_title: str, content: bytes) -> Path:
 92        """Save track to self.save_path/{track_title}.mp3.
 93        Returns the Path object for the save location.
 94
 95        :param content: The binary data of the track."""
 96        file_path = self.save_path / f"{track_title}.mp3"
 97        file_path.write_bytes(content)
 98        return file_path
 99
100    def get_track_content(self, track_url: str) -> bytes:
101        """Make a request to track_url and return the content.
102        Raises a RunTimeError exception if response.status_code != 200."""
103        response = requests.get(track_url, headers=self.headers)
104        if response.status_code != 200:
105            raise RuntimeError(
106                f"Downloading track failed with status code {response.status_code}."
107            )
108        return response.content
109
110    def download_album_art(self):
111        """Download the album art and save as a .jpg."""
112        file_path = self.save_path / f"{self.album.title}.jpg"
113        try:
114            response = requests.get(self.album.art_url, headers=self.headers)
115            file_path.write_bytes(response.content)
116        except Exception as e:
117            print(f"Failed to download art for {self.album}.")
118            print(e)
119
120    def rip(self):
121        """Download and save the album tracks and album art."""
122        if len(self.album.tracks) == 0:
123            print(f"No public tracks available for {self.album}.")
124            return None
125        self.make_save_path()
126        self.download_album_art()
127        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
128        fails = []
129        for track in self.album.tracks:
130            bar.display(
131                suffix=f"Downloading {track.title}",
132                counter_override=1 if len(self.album.tracks) == 1 else None,
133            )
134            try:
135                content = self.get_track_content(track.url)
136                self.save_track(
137                    track.title if self.no_track_number else track.numbered_title,
138                    content,
139                )
140            except Exception as e:
141                fails.append((track, str(e)))
142        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
143        print(f"Finished downloading {self.album} in {elapsed_time}.")
144        if fails:
145            print("The following tracks failed to download:")
146            for fail in fails:
147                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper(album_url: str, no_track_number: bool)
75    def __init__(self, album_url: str, no_track_number: bool):
76        """
77        :param no_track_number: If True, don't add the track
78        number to the front of the track title."""
79        self.album = Album(album_url)
80        self.no_track_number = no_track_number
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
82    def make_save_path(self):
83        self.save_path = Path.cwd() / self.album.artist / self.album.title
84        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
91    def save_track(self, track_title: str, content: bytes) -> Path:
92        """Save track to self.save_path/{track_title}.mp3.
93        Returns the Path object for the save location.
94
95        :param content: The binary data of the track."""
96        file_path = self.save_path / f"{track_title}.mp3"
97        file_path.write_bytes(content)
98        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
100    def get_track_content(self, track_url: str) -> bytes:
101        """Make a request to track_url and return the content.
102        Raises a RunTimeError exception if response.status_code != 200."""
103        response = requests.get(track_url, headers=self.headers)
104        if response.status_code != 200:
105            raise RuntimeError(
106                f"Downloading track failed with status code {response.status_code}."
107            )
108        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
110    def download_album_art(self):
111        """Download the album art and save as a .jpg."""
112        file_path = self.save_path / f"{self.album.title}.jpg"
113        try:
114            response = requests.get(self.album.art_url, headers=self.headers)
115            file_path.write_bytes(response.content)
116        except Exception as e:
117            print(f"Failed to download art for {self.album}.")
118            print(e)

Download the album art and save as a .jpg.

def rip(self):
120    def rip(self):
121        """Download and save the album tracks and album art."""
122        if len(self.album.tracks) == 0:
123            print(f"No public tracks available for {self.album}.")
124            return None
125        self.make_save_path()
126        self.download_album_art()
127        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
128        fails = []
129        for track in self.album.tracks:
130            bar.display(
131                suffix=f"Downloading {track.title}",
132                counter_override=1 if len(self.album.tracks) == 1 else None,
133            )
134            try:
135                content = self.get_track_content(track.url)
136                self.save_track(
137                    track.title if self.no_track_number else track.numbered_title,
138                    content,
139                )
140            except Exception as e:
141                fails.append((track, str(e)))
142        elapsed_time = bar.timer.current_elapsed_time(subsecond_resolution=True)
143        print(f"Finished downloading {self.album} in {elapsed_time}.")
144        if fails:
145            print("The following tracks failed to download:")
146            for fail in fails:
147                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
150class BandRipper:
151    def __init__(self, band_url: str, no_track_number: bool):
152        self.band_url = band_url
153        self.albums = []
154        for url in self.get_album_urls(band_url):
155            try:
156                self.albums.append(AlbumRipper(url, no_track_number))
157            except Exception as e:
158                print(e)
159
160    def get_album_urls(self, band_url: str) -> list[str]:
161        """Get album urls from the main bandcamp url."""
162        print(f"Fetching discography from {band_url}...")
163        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
164        if response.status_code != 200:
165            raise RuntimeError(
166                f"Getting {band_url} failed with status code {response.status_code}."
167            )
168        soup = BeautifulSoup(response.text, "html.parser")
169        grid = soup.find("ol", attrs={"id": "music-grid"})
170        parsed_url = urlparse(band_url)
171        base_url = f"https://{parsed_url.netloc}"
172        return [base_url + album.a.get("href") for album in grid.find_all("li")]
173
174    def rip(self):
175        print(
176            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
177        )
178        timer = Timer()
179        timer.start()
180        fails = []
181        for album in self.albums:
182            try:
183                album.rip()
184            except Exception as e:
185                fails.append((album, e))
186        timer.stop()
187        artist = self.albums[0].album.artist
188        elapsed_time = timer.current_elapsed_time()
189        print(
190            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
191        )
192        if fails:
193            print(f"The following downloads failed:")
194            for fail in fails:
195                print(f"{fail[0]}: {fail[1]}")
BandRipper(band_url: str, no_track_number: bool)
151    def __init__(self, band_url: str, no_track_number: bool):
152        self.band_url = band_url
153        self.albums = []
154        for url in self.get_album_urls(band_url):
155            try:
156                self.albums.append(AlbumRipper(url, no_track_number))
157            except Exception as e:
158                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
160    def get_album_urls(self, band_url: str) -> list[str]:
161        """Get album urls from the main bandcamp url."""
162        print(f"Fetching discography from {band_url}...")
163        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
164        if response.status_code != 200:
165            raise RuntimeError(
166                f"Getting {band_url} failed with status code {response.status_code}."
167            )
168        soup = BeautifulSoup(response.text, "html.parser")
169        grid = soup.find("ol", attrs={"id": "music-grid"})
170        parsed_url = urlparse(band_url)
171        base_url = f"https://{parsed_url.netloc}"
172        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
174    def rip(self):
175        print(
176            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
177        )
178        timer = Timer()
179        timer.start()
180        fails = []
181        for album in self.albums:
182            try:
183                album.rip()
184            except Exception as e:
185                fails.append((album, e))
186        timer.stop()
187        artist = self.albums[0].album.artist
188        elapsed_time = timer.current_elapsed_time()
189        print(
190            f"Finished downloading {len(self.albums)} albums by {artist} in {elapsed_time}."
191        )
192        if fails:
193            print(f"The following downloads failed:")
194            for fail in fails:
195                print(f"{fail[0]}: {fail[1]}")
def get_args() -> argparse.Namespace:
198def get_args() -> argparse.Namespace:
199    parser = argparse.ArgumentParser()
200
201    parser.add_argument(
202        "urls",
203        type=str,
204        nargs="*",
205        help=""" The bandcamp url(s) for the album or artist.
206            If the url is to an artists main page,
207            all albums will be downloaded.
208            The tracks will be saved to a subdirectory of
209            your current directory.
210            If a track can't be streamed (i.e. private) it
211            won't be downloaded. Multiple urls can be passed.""",
212    )
213
214    parser.add_argument(
215        "-n",
216        "--no_track_number",
217        action="store_true",
218        help=""" By default the track number will be added
219        to the front of the track title. Pass this switch
220        to disable the behavior.""",
221    )
222
223    args = parser.parse_args()
224    args.urls = [url.strip("/") for url in args.urls]
225
226    return args
def page_is_discography(url: str) -> bool:
229def page_is_discography(url: str) -> bool:
230    """Returns whether the url is to a discography page or not."""
231    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
232    if response.status_code != 200:
233        raise RuntimeError(
234            f"Getting {url} failed with status code {response.status_code}."
235        )
236    soup = BeautifulSoup(response.text, "html.parser")
237    # Returns None if it doesn't exist.
238    grid = soup.find("ol", attrs={"id": "music-grid"})
239    if grid:
240        return True
241    return False

Returns whether the url is to a discography page or not.

def main(args: argparse.Namespace = None):
244def main(args: argparse.Namespace = None):
245    if not args:
246        args = get_args()
247    for url in args.urls:
248        if page_is_discography(url):
249            ripper = BandRipper(url, args.no_track_number)
250        else:
251            ripper = AlbumRipper(url, args.no_track_number)
252        ripper.rip()