From cc5ee8838994d696e92325120cac052ae8699c74 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 14:15:56 +0200 Subject: [PATCH] feat: tried fixing the collections --- music_kraken/__init__.py | 2 +- music_kraken/objects/collection.py | 16 ++++++++-------- music_kraken/objects/parents.py | 3 +++ music_kraken/objects/song.py | 20 +++++++++++++++++--- music_kraken/pages/musify.py | 19 ++++--------------- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/music_kraken/__init__.py b/music_kraken/__init__.py index 7697a3b..0f906f6 100644 --- a/music_kraken/__init__.py +++ b/music_kraken/__init__.py @@ -46,7 +46,7 @@ init_logging() from . import cli if DEBUG: - sys.setrecursionlimit(500) + sys.setrecursionlimit(5000) if main_settings['modify_gc']: diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index d0d455f..d351774 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator from .parents import OuterProxy T = TypeVar('T', bound=OuterProxy) @@ -35,11 +35,8 @@ class Collection(Generic[T]): # Key: collection attribute (str) of appended element # Value: main collection to sync to self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} - self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} - self.contain_self_on_append: List[str] = [] - self._indexed_values = defaultdict(set) self._indexed_to_objects = defaultdict(list) @@ -239,18 +236,21 @@ class Collection(Generic[T]): for attribute, new_object in self.append_object_to_attribute.items(): __object.__getattribute__(attribute).append(new_object) else: - # merge + # merge only if the two objects are not the same + if existing_object.id == __object.id: + return + append_to._unmap_element(existing_object) existing_object.merge(__object) append_to._map_element(existing_object) - def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): + def extend(self, __iterable: Optional[Generator[T, None, None]]): if __iterable is None: return for __object in __iterable: - self.append(__object, from_map=from_map) + self.append(__object) def sync_with_other_collection(self, equal_collection: Collection): """ @@ -306,7 +306,7 @@ class Collection(Generic[T]): yield from c.__iter__(finished_ids=finished_ids) def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other._data, from_map=True) + self.extend(__other) def __getitem__(self, item: int): if item < len(self._data): diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 6385a2d..4a8d064 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -201,6 +201,9 @@ class OuterProxy: for instance in b._inner._refers_to_instances: instance._inner = a._inner + def __merge__(self, __other: Optional[OuterProxy], override: bool = False): + self.merge(__other, override) + def mark_as_fetched(self, *url_hash_list: List[str]): for url_hash in url_hash_list: self._fetched_from[url_hash] = { diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 1723539..7608f7e 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -49,6 +49,7 @@ class Song(Base): source_collection: SourceCollection target_collection: Collection[Target] lyrics_collection: Collection[Lyrics] + main_artist_collection: Collection[Artist] feature_artist_collection: Collection[Artist] album_collection: Collection[Album] @@ -241,13 +242,17 @@ class Album(Base): UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") def __init_collections__(self): - self.song_collection.contain_attribute_in_given = { - "main_artist_collection": self.artist_collection - } self.song_collection.append_object_to_attribute = { "album_collection": self } + self.artist_collection.append_object_to_attribute = { + "main_album_collection": self + } + self.artist_collection.contain_given_in_attribute = { + "label_collection": self.label_collection + } + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: self.song_collection.extend(object_list) @@ -642,6 +647,15 @@ class Label(Base): contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, **kwargs) + def __init_collections__(self): + self.album_collection.append_object_to_attribute = { + "label_collection": self + } + + self.current_artist_collection.append_object_to_attribute = { + "label_collection": self + } + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index c9c6460..43e644f 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -1,7 +1,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Type, Union +from typing import List, Optional, Type, Union, Generator from urllib.parse import urlparse import pycountry @@ -1056,7 +1056,7 @@ class Musify(Page): date=date ) - def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> List[Album]: + def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]: """ POST https://musify.club/artist/filteralbums ArtistID: 280348 @@ -1077,18 +1077,8 @@ class Musify(Page): return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") - discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): - new_album: Album = self._parse_album_card(card_soup, artist_name) - album_source: Source - - if stop_at_level > 1: - for album_source in new_album.source_collection.get_sources_from_page(self.SOURCE_TYPE): - new_album.merge(self.fetch_album(album_source, stop_at_level=stop_at_level-1)) - - discography.append(new_album) - - return discography + yield self._parse_album_card(card_soup, artist_name) def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: """ @@ -1110,8 +1100,7 @@ class Musify(Page): artist = self._get_artist_attributes(url) - discography: List[Album] = self._get_discography(url, artist.name) - artist.main_album_collection.extend(discography) + artist.main_album_collection.extend(self._get_discography(url, artist.name)) return artist