feat: reversengeneered bandcamp artist html structure

This commit is contained in:
Hellow 2023-09-12 17:59:04 +02:00
parent fbc20d7f4d
commit 1a5fbdc0c2
3 changed files with 155 additions and 4 deletions

View File

@ -0,0 +1,125 @@
<!DOCTYPE html>
<html xmlns:og="http://opengraphprotocol.org/schema/" xmlns:fb="http://www.facebook.com/2008/fbml" lang="en">
<head>
<title>Music | Only Smile</title>
<meta name="description" content="
Only Smile.
Russia.
">
<link rel="apple-touch-icon" sizes="180x180" href="https://s4.bcbits.com/img/favicon/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="https://s4.bcbits.com/img/favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="16x16" href="https://s4.bcbits.com/img/favicon/favicon-16x16.png">
<link rel="mask-icon" href="https://s4.bcbits.com/img/favicon/safari-pinned-tab.svg" color="#1da0c3">
<meta name="msapplication-TileColor" content="#603cba">
<meta name="theme-color" content="#ffffff">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="title" content="Only Smile">
<meta property="og:title" content="Only Smile">
<meta property="og:type" content="band">
<meta property="og:url" content="https://onlysmile.bandcamp.com">
<meta property="og:site_name" content="Only Smile">
<meta property="og:description" content="">
</head>
<body class="webkit has-menubar invertIconography " lang="en">
<div id="menubar-wrapper" class="header-rework-2018 ">
<!--- Just the search part and login icon etc. --->
</div>
<div id="centerWrapper">
<div id="propOpenWrapper">
<div id="pgBd" class="yui-skin-sam">
<div id="customHeaderWrapper">
<!--- The banner any artist can set --->
</div>
<div class="leftMiddleColumns">
<!--- The list of all albums. --->
<ol id="music-grid" data-edit-callback="/music_reorder" class="editable-grid music-grid columns-2 public">
<li data-item-id="album-4166209375" data-band-id="798628171" class="music-grid-item square first-four" data-bind="css: {'featured': featured()}">
<a href="/album/your-best-friend">
<div class="art">
<img src="https://f4.bcbits.com/img/a3009649372_16.jpg" alt="" />
</div>
<p class="title">
Your best friend
</p>
</a>
</li>
<li data-item-id="album-2060798800" data-band-id="798628171" class="music-grid-item square first-four" data-bind="css: {'featured': featured()}">
<a href="/album/few-words">
<div class="art">
<img src="https://f4.bcbits.com/img/a1197628604_16.jpg" alt="" />
</div>
<p class="title">
Few words...
</p>
</a>
</li>
</ol>
</div>
<div id="rightColumn" class="rightColumn music-page " itemscope itemtype="http://schema.org/MusicGroup">
<!--- Stuff about the artist --->
<div id="bio-container" data-bind="css: {'ko-ready': $data}">
<h3 class="title bio-label hiddenAccess">about</h3>
<p id="band-name-location">
<span class="title">Only Smile</span>
<span class="location secondaryText">Russia</span>
</p>
<ol id="band-links">
<li>
<a target="_blank" rel="nofollow ugc me" referrerpolicy="strict-origin-when-cross-origin" href="https://www.youtube.com/channel/UCZzqXSdk8wnl9qirJTCaANQ">
YouTube
</a>
</li>
</ol>
</div>
<p id="contact-tracker-data" data-band-id="798628171">
<a href="/contact?b=798628171&amp;n=Only%20Smile" title="Send an email to Only Smile">
Contact Only Smile
</a>
</p>
<p>
<a href="https://bandcamp.com/help/downloading?from=tralbum_downloading" target="_blank">
Streaming and
<span id="sidebar-contact-label-break"><br></span>
Download help
</a>
</p>
</div>
</div>
</div>
</div>
</body>
</html>

View File

@ -29,7 +29,8 @@ if __name__ == "__main__":
]
youtube_music_test = [
"s: #t Self Loather"
"s: #a Only Smile",
"0"
]
music_kraken.cli.download(genre="test", command_list=youtube_music_test, process_metadata_anyway=True)

View File

@ -43,6 +43,15 @@ class Bandcamp(Page):
super().__init__(*args, **kwargs)
def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
parsed_url = urlparse(source.url)
if parsed_url.path == "":
return Artist
if parsed_url.path.startswith("/album/"):
return Album
if parsed_url.path.startswith("/track/"):
return Song
return super().get_source_type(source)
def _parse_autocomplete_api_result(self, data: dict) -> DatabaseObject:
@ -135,15 +144,31 @@ class Bandcamp(Page):
def song_search(self, song: Song) -> List[Song]:
return self.general_search(song.title, filter_string="t")
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
artist = Artist()
r = self.connection.get(source.url)
if r is None:
return artist
soup = self.get_soup_from_response(r)
data_container = soup.find("div", {"id": "pagedata"})
data = data_container["data-blob"]
if DEBUG:
dump_to_file("artist_page.html", r.text, exit_after_dump=False)
dump_to_file("bandcamp_artis.json", data, is_json=True, exit_after_dump=False)
return artist
def fetch_song(self, source: Source, stop_at_level: int = 1) -> Song:
print(source)
return Song()
def fetch_album(self, source: Source, stop_at_level: int = 1) -> Album:
return Album()
def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist:
return Artist()
def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
return Label()