From 71d582a17ca039c002901831f713ce63d7a5b4e6 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Apr 2024 14:19:07 +0200 Subject: [PATCH 01/14] fix: improved debug for requests --- music_kraken/connection/connection.py | 27 ++++++++++++++------------- music_kraken/utils/shared.py | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/music_kraken/connection/connection.py b/music_kraken/connection/connection.py index eb3de20..1ad7e77 100644 --- a/music_kraken/connection/connection.py +++ b/music_kraken/connection/connection.py @@ -101,7 +101,7 @@ class Connection: } if self.HOST is not None: - headers["Host"] = self.HOST.netloc + # headers["Host"] = self.HOST.netloc headers["Referer"] = self.base_url(url=self.HOST) headers.update(header_values) @@ -215,10 +215,6 @@ class Connection: self.save(r, name, **kwargs) return r - if self.SEMANTIC_NOT_FOUND and r.status_code == 404: - self.LOGGER.warning(f"Couldn't find url (404): {request_url}") - return None - # the server rejected the request, or the internet is lacking except requests.exceptions.Timeout: self.LOGGER.warning(f"Request timed out at \"{request_url}\": ({try_count}-{self.TRIES})") @@ -231,15 +227,20 @@ class Connection: finally: self.lock = False - if not connection_failed: + if r is None: + self.LOGGER.warning(f"{self.HOST.netloc} didn't respond at {url}. ({try_count}-{self.TRIES})") + self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in headers.items())) + else: self.LOGGER.warning(f"{self.HOST.netloc} responded wit {r.status_code} at {url}. ({try_count}-{self.TRIES})") - if r is not None: - self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items())) - self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items())) - self.LOGGER.debug(r.content) - - if name != "": - self.save(r, name, error=True, **kwargs) + self.LOGGER.debug("request headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.request.headers.items())) + self.LOGGER.debug("response headers:\n\t"+ "\n\t".join(f"{k}\t=\t{v}" for k, v in r.headers.items())) + self.LOGGER.debug(r.content) + + if name != "": + self.save(r, name, error=True, **kwargs) + + if self.SEMANTIC_NOT_FOUND and r.status_code == 404: + return None if sleep_after_404 != 0: self.LOGGER.warning(f"Waiting for {sleep_after_404} seconds.") diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index b3f30e5..6676393 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -13,7 +13,7 @@ if not load_dotenv(Path(__file__).parent.parent.parent / ".env"): __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True -DEBUG_LOGGING = DEBUG and False +DEBUG_LOGGING = DEBUG and True DEBUG_TRACE = DEBUG and True DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False From 0b216b7d40476f6566c7bd7c7cc7ccb91e238f5a Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Tue, 16 Apr 2024 17:50:01 +0200 Subject: [PATCH 02/14] fix: intermediate transport commit --- .vscode/settings.json | 1 + development/actual_donwload.py | 4 +-- music_kraken/objects/collection.py | 53 ++++++++++++++---------------- music_kraken/objects/song.py | 2 +- 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2a9cc2b..aca8e78 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -26,6 +26,7 @@ "OKBLUE", "Referer", "tracksort", + "unmap", "youtube" ] } \ No newline at end of file diff --git a/development/actual_donwload.py b/development/actual_donwload.py index 76cc2a3..ed2a9de 100644 --- a/development/actual_donwload.py +++ b/development/actual_donwload.py @@ -6,8 +6,8 @@ logging.getLogger().setLevel(logging.DEBUG) if __name__ == "__main__": commands = [ - "s: #a And End...", - "d: 10", + "s: #a Ghost Bath", + "4", ] diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 92617f0..f18df69 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -110,7 +110,7 @@ class Collection(Generic[T]): if self._contained_in_self(__object): return [self] - for collection in (*self.children, *self.parents): + for collection in self.children: results.extend(collection._contained_in_sub(__object, break_at_first=break_at_first)) if break_at_first: @@ -198,7 +198,7 @@ class Collection(Generic[T]): if value in self._indexed_values[name]: return self._indexed_to_objects[value][0] - def _find_object(self, __object: T) -> Tuple[Collection[T], Optional[T]]: + def _find_object(self, __object: T, no_sibling: bool = False) -> Tuple[Collection[T], Optional[T]]: other_object = self._find_object_in_self(__object) if other_object is not None: return self, other_object @@ -208,6 +208,19 @@ class Collection(Generic[T]): if other_object is not None: return o, other_object + if no_sibling: + return self, None + + # find in siblings and all children of siblings + for parent in self.parents: + for sibling in parent.children: + if sibling is self: + continue + + o, other_object = sibling._find_object(__object, no_sibling=True) + if other_object is not None: + return o, other_object + return self, None def append(self, __object: Optional[T], already_is_parent: bool = False, from_map: bool = False): @@ -229,33 +242,14 @@ class Collection(Generic[T]): if existing_object is None: # append - # print("appending", existing_object, __object) append_to._data.append(__object) + append_to._map_element(__object, from_map=from_map) else: # merge append_to._unmap_element(existing_object) existing_object.merge(__object) + append_to._map_element(existing_object, from_map=from_map) - append_to._map_element(__object, from_map=from_map) - - """ - exists_in_collection = self._contained_in_sub(__object) - if len(exists_in_collection) and self is exists_in_collection[0]: - # assuming that the object already is contained in the correct collections - if not already_is_parent: - self.merge_into_self(__object, from_map=from_map) - return - - if not len(exists_in_collection): - self._append(__object, from_map=from_map) - else: - exists_in_collection[0].merge_into_self(__object, from_map=from_map) - - if not already_is_parent or not self._is_root: - for parent_collection in self._get_parents_of_multiple_contained_children(__object): - pass - parent_collection.append(__object, already_is_parent=True, from_map=from_map) - """ def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): if __iterable is None: @@ -296,8 +290,7 @@ class Collection(Generic[T]): @property def data(self) -> List[T]: - return [*self._data, - *(__object for collection in self.children for __object in collection.shallow_list)] + return list(i for i in self.__iter__()) def __len__(self) -> int: return len(self._data) + sum(len(collection) for collection in self.children) @@ -306,13 +299,17 @@ class Collection(Generic[T]): def empty(self) -> bool: return self.__len__() <= 0 - def __iter__(self) -> Iterator[T]: + def __iter__(self, finished_ids: set = None) -> Iterator[T]: + _finished_ids = finished_ids or set() + for element in self._data: + if element.id in _finished_ids: + continue + _finished_ids.add(element.id) yield element for c in self.children: - for element in c: - yield element + yield from c.__iter__(finished_ids=finished_ids) def __merge__(self, __other: Collection, override: bool = False): self.extend(__other._data, from_map=True) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 716694c..1723539 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -299,7 +299,7 @@ class Album(Base): @property def option_string(self) -> str: return f"{self.__repr__()} " \ - f"by Artist({OPTION_STRING_DELIMITER.join([artist.name for artist in self.artist_collection])}) " \ + f"by Artist({OPTION_STRING_DELIMITER.join([artist.name + str(artist.id) for artist in self.artist_collection])}) " \ f"under Label({OPTION_STRING_DELIMITER.join([label.name for label in self.label_collection])})" @property From 42c4a04b62f7ba1975c8ce44d3447dc7f147b412 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 13:39:58 +0200 Subject: [PATCH 03/14] feat: cleaned the modification of the new collections --- music_kraken/objects/collection.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index f18df69..d0d455f 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -58,16 +58,6 @@ class Collection(Generic[T]): self._indexed_values[name].add(value) self._indexed_to_objects[value].append(__object) - if not from_map: - for attribute, new_object in self.contain_given_in_attribute.items(): - __object.__getattribute__(attribute).contain_collection_inside(new_object) - - for attribute, new_object in self.contain_attribute_in_given.items(): - new_object.contain_collection_inside(__object.__getattribute__(attribute)) - - for attribute, new_object in self.append_object_to_attribute.items(): - __object.__getattribute__(attribute).append(new_object) - def _unmap_element(self, __object: T): if __object.id in self._contains_ids: self._contains_ids.remove(__object.id) @@ -186,11 +176,6 @@ class Collection(Generic[T]): def contains(self, __object: T) -> bool: return len(self._contained_in_sub(__object)) > 0 - def _append(self, __object: T, from_map: bool = False): - print(self, __object) - self._map_element(__object, from_map=from_map) - self._data.append(__object) - def _find_object_in_self(self, __object: T) -> Optional[T]: for name, value in __object.indexing_values: if value is None or value == __object._default_factories.get(name, lambda: None)(): @@ -211,6 +196,7 @@ class Collection(Generic[T]): if no_sibling: return self, None + """ # find in siblings and all children of siblings for parent in self.parents: for sibling in parent.children: @@ -220,6 +206,7 @@ class Collection(Generic[T]): o, other_object = sibling._find_object(__object, no_sibling=True) if other_object is not None: return o, other_object + """ return self, None @@ -243,12 +230,19 @@ class Collection(Generic[T]): if existing_object is None: # append append_to._data.append(__object) - append_to._map_element(__object, from_map=from_map) + append_to._map_element(__object) + + # only modify collections if the object actually has been appended + for collection_attribute, new_object in self.contain_given_in_attribute.items(): + __object.__getattribute__(collection_attribute).contain_collection_inside(new_object) + + for attribute, new_object in self.append_object_to_attribute.items(): + __object.__getattribute__(attribute).append(new_object) else: # merge append_to._unmap_element(existing_object) existing_object.merge(__object) - append_to._map_element(existing_object, from_map=from_map) + append_to._map_element(existing_object) def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): From cc5ee8838994d696e92325120cac052ae8699c74 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 14:15:56 +0200 Subject: [PATCH 04/14] feat: tried fixing the collections --- music_kraken/__init__.py | 2 +- music_kraken/objects/collection.py | 16 ++++++++-------- music_kraken/objects/parents.py | 3 +++ music_kraken/objects/song.py | 20 +++++++++++++++++--- music_kraken/pages/musify.py | 19 ++++--------------- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/music_kraken/__init__.py b/music_kraken/__init__.py index 7697a3b..0f906f6 100644 --- a/music_kraken/__init__.py +++ b/music_kraken/__init__.py @@ -46,7 +46,7 @@ init_logging() from . import cli if DEBUG: - sys.setrecursionlimit(500) + sys.setrecursionlimit(5000) if main_settings['modify_gc']: diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index d0d455f..d351774 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator from .parents import OuterProxy T = TypeVar('T', bound=OuterProxy) @@ -35,11 +35,8 @@ class Collection(Generic[T]): # Key: collection attribute (str) of appended element # Value: main collection to sync to self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} - self.contain_attribute_in_given: Dict[str, Collection] = contain_attribute_in_given or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} - self.contain_self_on_append: List[str] = [] - self._indexed_values = defaultdict(set) self._indexed_to_objects = defaultdict(list) @@ -239,18 +236,21 @@ class Collection(Generic[T]): for attribute, new_object in self.append_object_to_attribute.items(): __object.__getattribute__(attribute).append(new_object) else: - # merge + # merge only if the two objects are not the same + if existing_object.id == __object.id: + return + append_to._unmap_element(existing_object) existing_object.merge(__object) append_to._map_element(existing_object) - def extend(self, __iterable: Optional[Iterable[T]], from_map: bool = False): + def extend(self, __iterable: Optional[Generator[T, None, None]]): if __iterable is None: return for __object in __iterable: - self.append(__object, from_map=from_map) + self.append(__object) def sync_with_other_collection(self, equal_collection: Collection): """ @@ -306,7 +306,7 @@ class Collection(Generic[T]): yield from c.__iter__(finished_ids=finished_ids) def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other._data, from_map=True) + self.extend(__other) def __getitem__(self, item: int): if item < len(self._data): diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 6385a2d..4a8d064 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -201,6 +201,9 @@ class OuterProxy: for instance in b._inner._refers_to_instances: instance._inner = a._inner + def __merge__(self, __other: Optional[OuterProxy], override: bool = False): + self.merge(__other, override) + def mark_as_fetched(self, *url_hash_list: List[str]): for url_hash in url_hash_list: self._fetched_from[url_hash] = { diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 1723539..7608f7e 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -49,6 +49,7 @@ class Song(Base): source_collection: SourceCollection target_collection: Collection[Target] lyrics_collection: Collection[Lyrics] + main_artist_collection: Collection[Artist] feature_artist_collection: Collection[Artist] album_collection: Collection[Album] @@ -241,13 +242,17 @@ class Album(Base): UPWARDS_COLLECTION_STRING_ATTRIBUTES = ("artist_collection", "label_collection") def __init_collections__(self): - self.song_collection.contain_attribute_in_given = { - "main_artist_collection": self.artist_collection - } self.song_collection.append_object_to_attribute = { "album_collection": self } + self.artist_collection.append_object_to_attribute = { + "main_album_collection": self + } + self.artist_collection.contain_given_in_attribute = { + "label_collection": self.label_collection + } + def _add_other_db_objects(self, object_type: Type[OuterProxy], object_list: List[OuterProxy]): if object_type is Song: self.song_collection.extend(object_list) @@ -642,6 +647,15 @@ class Label(Base): contact_list=contact_list, album_list=album_list, current_artist_list=current_artist_list, **kwargs) + def __init_collections__(self): + self.album_collection.append_object_to_attribute = { + "label_collection": self + } + + self.current_artist_collection.append_object_to_attribute = { + "label_collection": self + } + @property def indexing_values(self) -> List[Tuple[str, object]]: return [ diff --git a/music_kraken/pages/musify.py b/music_kraken/pages/musify.py index c9c6460..43e644f 100644 --- a/music_kraken/pages/musify.py +++ b/music_kraken/pages/musify.py @@ -1,7 +1,7 @@ from collections import defaultdict from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Type, Union +from typing import List, Optional, Type, Union, Generator from urllib.parse import urlparse import pycountry @@ -1056,7 +1056,7 @@ class Musify(Page): date=date ) - def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> List[Album]: + def _get_discography(self, url: MusifyUrl, artist_name: str = None, stop_at_level: int = 1) -> Generator[Album, None, None]: """ POST https://musify.club/artist/filteralbums ArtistID: 280348 @@ -1077,18 +1077,8 @@ class Musify(Page): return [] soup: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") - discography: List[Album] = [] for card_soup in soup.find_all("div", {"class": "card"}): - new_album: Album = self._parse_album_card(card_soup, artist_name) - album_source: Source - - if stop_at_level > 1: - for album_source in new_album.source_collection.get_sources_from_page(self.SOURCE_TYPE): - new_album.merge(self.fetch_album(album_source, stop_at_level=stop_at_level-1)) - - discography.append(new_album) - - return discography + yield self._parse_album_card(card_soup, artist_name) def fetch_artist(self, source: Source, stop_at_level: int = 1) -> Artist: """ @@ -1110,8 +1100,7 @@ class Musify(Page): artist = self._get_artist_attributes(url) - discography: List[Album] = self._get_discography(url, artist.name) - artist.main_album_collection.extend(discography) + artist.main_album_collection.extend(self._get_discography(url, artist.name)) return artist From b15d0839ef77349511146becb8bbe8417a6fe89e Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 17:24:51 +0200 Subject: [PATCH 05/14] feat: removed children from collections after merging --- music_kraken/__init__.py | 2 +- music_kraken/objects/collection.py | 43 ++++++++---------------------- music_kraken/objects/parents.py | 20 ++++++++++---- music_kraken/utils/shared.py | 2 +- 4 files changed, 28 insertions(+), 39 deletions(-) diff --git a/music_kraken/__init__.py b/music_kraken/__init__.py index 0f906f6..f67bf3b 100644 --- a/music_kraken/__init__.py +++ b/music_kraken/__init__.py @@ -46,7 +46,7 @@ init_logging() from . import cli if DEBUG: - sys.setrecursionlimit(5000) + sys.setrecursionlimit(300) if main_settings['modify_gc']: diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index d351774..e4905dc 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -43,13 +43,11 @@ class Collection(Generic[T]): self.extend(data) def _map_element(self, __object: T, from_map: bool = False): - if __object.id in self._contains_ids: - return - + __object._inner._mapped_in_collection.add(self) self._contains_ids.add(__object.id) for name, value in __object.indexing_values: - if value is None: + if value is None or value == __object._inner._default_values.get(name): continue self._indexed_values[name].add(value) @@ -175,8 +173,6 @@ class Collection(Generic[T]): def _find_object_in_self(self, __object: T) -> Optional[T]: for name, value in __object.indexing_values: - if value is None or value == __object._default_factories.get(name, lambda: None)(): - continue if value in self._indexed_values[name]: return self._indexed_to_objects[value][0] @@ -219,7 +215,7 @@ class Collection(Generic[T]): :return: """ - if __object is None or __object.id in self._contains_ids: + if __object is None: return append_to, existing_object = self._find_object(__object) @@ -230,8 +226,8 @@ class Collection(Generic[T]): append_to._map_element(__object) # only modify collections if the object actually has been appended - for collection_attribute, new_object in self.contain_given_in_attribute.items(): - __object.__getattribute__(collection_attribute).contain_collection_inside(new_object) + for collection_attribute, child_collection in self.contain_given_in_attribute.items(): + __object.__getattribute__(collection_attribute).contain_collection_inside(child_collection, __object) for attribute, new_object in self.append_object_to_attribute.items(): __object.__getattribute__(attribute).append(new_object) @@ -252,39 +248,22 @@ class Collection(Generic[T]): for __object in __iterable: self.append(__object) - def sync_with_other_collection(self, equal_collection: Collection): - """ - If two collections always need to have the same values, this can be used. - - Internally: - 1. import the data from other to self - - _data - - contained_collections - 2. replace all refs from the other object, with refs from this object - """ - if equal_collection is self: - return - - # don't add the elements from the subelements from the other collection. - # this will be done in the next step. - self.extend(equal_collection._data) - # add all submodules - for equal_sub_collection in equal_collection.children: - self.contain_collection_inside(equal_sub_collection) - - def contain_collection_inside(self, sub_collection: Collection): + def contain_collection_inside(self, sub_collection: Collection, _object: T): """ This collection will ALWAYS contain everything from the passed in collection """ if self is sub_collection or sub_collection in self.children: return + _object._inner._is_collection_child[self] = sub_collection + _object._inner._is_collection_parent[sub_collection] = self + self.children.append(sub_collection) sub_collection.parents.append(self) @property def data(self) -> List[T]: - return list(i for i in self.__iter__()) + return list(self.__iter__()) def __len__(self) -> int: return len(self._data) + sum(len(collection) for collection in self.children) @@ -306,7 +285,7 @@ class Collection(Generic[T]): yield from c.__iter__(finished_ids=finished_ids) def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other) + self.extend(__other.__iter__()) def __getitem__(self, item: int): if item < len(self._data): diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 4a8d064..feebe3d 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -29,12 +29,16 @@ class InnerData: _refers_to_instances: set = None def __init__(self, object_type, **kwargs): - self._refers_to_instances =set() + self._refers_to_instances = set() + + # collection : collection that is a collection of self + self._is_collection_child: Dict[Collection, Collection] = {} + self._is_collection_parent: Dict[Collection, Collection] = {} # initialize the default values - self.__default_values = {} + self._default_values = {} for name, factory in object_type._default_factories.items(): - self.__default_values[name] = factory() + self._default_values[name] = factory() for key, value in kwargs.items(): self.__setattr__(key, value) @@ -48,7 +52,7 @@ class InnerData: for key, value in __other.__dict__.copy().items(): # just set the other value if self doesn't already have it - if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self.__default_values.get(key)): + if key not in self.__dict__ or (key in self.__dict__ and self.__dict__[key] == self._default_values.get(key)): self.__setattr__(key, value) continue @@ -183,7 +187,7 @@ class OuterProxy: if __other is None: return - object_trace(f"merging {type(self).__name__} [{self.title_string}] with {type(__other).__name__} [{__other.title_string}]") + object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}]") a = self b = __other @@ -196,6 +200,12 @@ class OuterProxy: a, b = b, a a._inner.__merge__(b._inner, override=override) + for collection, child_collection in b._inner._is_collection_child.items(): + collection.children.remove(child_collection) + + for collection, parent_collection in b._inner._is_collection_parent.items(): + collection.parents.remove(parent_collection) + a._inner._refers_to_instances.update(b._inner._refers_to_instances) for instance in b._inner._refers_to_instances: diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 6676393..8861fce 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and True DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and False +DEBUG_OBJECT_TRACE = DEBUG and True DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False DEBUG_DUMP = DEBUG and True From cde3b3dbbbee07f6149eafe1f358e1466404124d Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 17:52:59 +0200 Subject: [PATCH 06/14] fix: correct unmaping of values --- music_kraken/objects/collection.py | 40 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index e4905dc..1fba27d 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -37,39 +37,33 @@ class Collection(Generic[T]): self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} - self._indexed_values = defaultdict(set) - self._indexed_to_objects = defaultdict(list) + self._id_to_index_values: Dict[int, set] = defaultdict(set) + self._indexed_values = defaultdict(lambda: None) + self._indexed_to_objects = defaultdict(lambda: None) self.extend(data) def _map_element(self, __object: T, from_map: bool = False): - __object._inner._mapped_in_collection.add(self) self._contains_ids.add(__object.id) for name, value in __object.indexing_values: if value is None or value == __object._inner._default_values.get(name): continue - self._indexed_values[name].add(value) - self._indexed_to_objects[value].append(__object) + self._indexed_values[name] = value + self._indexed_to_objects[value] = __object + + self._id_to_index_values[__object.id].add((name, value)) def _unmap_element(self, __object: T): if __object.id in self._contains_ids: self._contains_ids.remove(__object.id) - for name, value in __object.indexing_values: - if value is None: - continue - if value not in self._indexed_values[name]: - continue + for name, value in self._id_to_index_values[__object.id]: + del self._indexed_values[name] + del self._indexed_to_objects[value] - try: - self._indexed_to_objects[value].remove(__object) - except ValueError: - continue - - if not len(self._indexed_to_objects[value]): - self._indexed_values[name].remove(value) + del self._id_to_index_values[__object.id] def _contained_in_self(self, __object: T) -> bool: if __object.id in self._contains_ids: @@ -78,7 +72,7 @@ class Collection(Generic[T]): for name, value in __object.indexing_values: if value is None: continue - if value in self._indexed_values[name]: + if value == self._indexed_values[name]: return True return False @@ -150,8 +144,8 @@ class Collection(Generic[T]): if value is None: continue - if value in self._indexed_values[name]: - existing_object = self._indexed_to_objects[value][0] + if value == self._indexed_values[name]: + existing_object = self._indexed_to_objects[value] if existing_object.id == __object.id: return None @@ -173,8 +167,8 @@ class Collection(Generic[T]): def _find_object_in_self(self, __object: T) -> Optional[T]: for name, value in __object.indexing_values: - if value in self._indexed_values[name]: - return self._indexed_to_objects[value][0] + if value == self._indexed_values[name]: + return self._indexed_to_objects[value] def _find_object(self, __object: T, no_sibling: bool = False) -> Tuple[Collection[T], Optional[T]]: other_object = self._find_object_in_self(__object) @@ -237,6 +231,8 @@ class Collection(Generic[T]): return append_to._unmap_element(existing_object) + append_to._unmap_element(__object) + existing_object.merge(__object) append_to._map_element(existing_object) From 56101d4a31a1e2041e917da332f07b73daea82e8 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 17:56:16 +0200 Subject: [PATCH 07/14] fix: clean mapping --- music_kraken/objects/collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 1fba27d..e0789d9 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -46,7 +46,7 @@ class Collection(Generic[T]): def _map_element(self, __object: T, from_map: bool = False): self._contains_ids.add(__object.id) - for name, value in __object.indexing_values: + for name, value in (*__object.indexing_values, ('id', __object.id)): if value is None or value == __object._inner._default_values.get(name): continue From 7e4ba0b1a0ea190edb924a6cd29d4a0023cd3759 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 18:00:41 +0200 Subject: [PATCH 08/14] fix: clean mapping --- music_kraken/objects/collection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index e0789d9..e1a8463 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -228,6 +228,7 @@ class Collection(Generic[T]): else: # merge only if the two objects are not the same if existing_object.id == __object.id: + exit() return append_to._unmap_element(existing_object) From f000ad448461d7b39e81a268e86cfbf975d2dfc2 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Wed, 17 Apr 2024 18:13:03 +0200 Subject: [PATCH 09/14] fix: some crashes --- music_kraken/objects/collection.py | 9 +++++---- music_kraken/objects/parents.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index e1a8463..2adb792 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -60,8 +60,10 @@ class Collection(Generic[T]): self._contains_ids.remove(__object.id) for name, value in self._id_to_index_values[__object.id]: - del self._indexed_values[name] - del self._indexed_to_objects[value] + if name in self._indexed_values: + del self._indexed_values[name] + if value in self._indexed_to_objects: + del self._indexed_to_objects[value] del self._id_to_index_values[__object.id] @@ -228,7 +230,6 @@ class Collection(Generic[T]): else: # merge only if the two objects are not the same if existing_object.id == __object.id: - exit() return append_to._unmap_element(existing_object) @@ -282,7 +283,7 @@ class Collection(Generic[T]): yield from c.__iter__(finished_ids=finished_ids) def __merge__(self, __other: Collection, override: bool = False): - self.extend(__other.__iter__()) + self.extend(__other) def __getitem__(self, item: int): if item < len(self._data): diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index feebe3d..0ab6165 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -194,17 +194,23 @@ class OuterProxy: if a._inner is b._inner: return - + # switch instances if more efficient if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances): a, b = b, a a._inner.__merge__(b._inner, override=override) for collection, child_collection in b._inner._is_collection_child.items(): - collection.children.remove(child_collection) + try: + collection.children.remove(child_collection) + except ValueError: + pass for collection, parent_collection in b._inner._is_collection_parent.items(): - collection.parents.remove(parent_collection) + try: + collection.parents.remove(parent_collection) + except ValueError: + pass a._inner._refers_to_instances.update(b._inner._refers_to_instances) From 85923e2a795d2abc1498b365dc181aefffa27176 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 18 Apr 2024 14:37:20 +0200 Subject: [PATCH 10/14] feat: improved logging with traceback --- music_kraken/objects/collection.py | 22 +++++++++++++--------- music_kraken/objects/parents.py | 13 +++++++++---- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 2adb792..1e3ee89 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections import defaultdict -from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator +from typing import TypeVar, Generic, Dict, Optional, Iterable, List, Iterator, Tuple, Generator, Union from .parents import OuterProxy T = TypeVar('T', bound=OuterProxy) @@ -55,17 +55,19 @@ class Collection(Generic[T]): self._id_to_index_values[__object.id].add((name, value)) - def _unmap_element(self, __object: T): - if __object.id in self._contains_ids: - self._contains_ids.remove(__object.id) + def _unmap_element(self, __object: Union[T, int]): + obj_id = __object.id if isinstance(__object, OuterProxy) else __object - for name, value in self._id_to_index_values[__object.id]: + if obj_id in self._contains_ids: + self._contains_ids.remove(obj_id) + + for name, value in self._id_to_index_values[obj_id]: if name in self._indexed_values: del self._indexed_values[name] if value in self._indexed_to_objects: del self._indexed_to_objects[value] - del self._id_to_index_values[__object.id] + del self._id_to_index_values[obj_id] def _contained_in_self(self, __object: T) -> bool: if __object.id in self._contains_ids: @@ -232,12 +234,14 @@ class Collection(Generic[T]): if existing_object.id == __object.id: return - append_to._unmap_element(existing_object) - append_to._unmap_element(__object) + old_id = existing_object.id existing_object.merge(__object) - append_to._map_element(existing_object) + if existing_object.id != old_id: + append_to._unmap_element(old_id) + + append_to._map_element(existing_object) def extend(self, __iterable: Optional[Generator[T, None, None]]): if __iterable is None: diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index 0ab6165..a90576b 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -3,9 +3,11 @@ from __future__ import annotations import random from collections import defaultdict from functools import lru_cache - from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set +from pathlib import Path +import inspect + from .metadata import Metadata from ..utils import get_unix_time, object_trace from ..utils.config import logging_settings, main_settings @@ -187,12 +189,12 @@ class OuterProxy: if __other is None: return - object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}]") + object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}] called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:4])}]") a = self b = __other - if a._inner is b._inner: + if a.id == b.id: return # switch instances if more efficient @@ -211,12 +213,15 @@ class OuterProxy: collection.parents.remove(parent_collection) except ValueError: pass - + + old_inner = b._inner a._inner._refers_to_instances.update(b._inner._refers_to_instances) for instance in b._inner._refers_to_instances: instance._inner = a._inner + del old_inner + def __merge__(self, __other: Optional[OuterProxy], override: bool = False): self.merge(__other, override) From 662f2075296cd2fd631ac3c122ecbe9ef649480b Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 18 Apr 2024 15:30:04 +0200 Subject: [PATCH 11/14] fix: recursion depth error --- music_kraken/objects/parents.py | 21 +++++++-------------- music_kraken/objects/source.py | 9 +-------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/music_kraken/objects/parents.py b/music_kraken/objects/parents.py index a90576b..7b71269 100644 --- a/music_kraken/objects/parents.py +++ b/music_kraken/objects/parents.py @@ -164,15 +164,7 @@ class OuterProxy: self._add_other_db_objects(key, value) def __hash__(self): - """ - :raise: IsDynamicException - :return: - """ - - if self.dynamic: - return id(self._inner) - - return self.id + return id(self) def __eq__(self, other: Any): return self.__hash__() == other.__hash__() @@ -189,7 +181,6 @@ class OuterProxy: if __other is None: return - object_trace(f"merging {type(self).__name__} [{self.title_string} | {self.id}] with {type(__other).__name__} [{__other.title_string} | {__other.id}] called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:4])}]") a = self b = __other @@ -201,7 +192,8 @@ class OuterProxy: if len(b._inner._refers_to_instances) > len(a._inner._refers_to_instances): a, b = b, a - a._inner.__merge__(b._inner, override=override) + object_trace(f"merging {type(a).__name__} [{a.title_string} | {a.id}] with {type(b).__name__} [{b.title_string} | {b.id}] called by [{' | '.join(f'{s.function} {Path(s.filename).name}:{str(s.lineno)}' for s in inspect.stack()[1:5])}]") + for collection, child_collection in b._inner._is_collection_child.items(): try: collection.children.remove(child_collection) @@ -213,13 +205,14 @@ class OuterProxy: collection.parents.remove(parent_collection) except ValueError: pass - + old_inner = b._inner - a._inner._refers_to_instances.update(b._inner._refers_to_instances) - for instance in b._inner._refers_to_instances: + for instance in b._inner._refers_to_instances.copy(): instance._inner = a._inner + a._inner._refers_to_instances.add(instance) + a._inner.__merge__(old_inner, override=override) del old_inner def __merge__(self, __other: Optional[OuterProxy], override: bool = False): diff --git a/music_kraken/objects/source.py b/music_kraken/objects/source.py index 5a8a560..bb2e9e3 100644 --- a/music_kraken/objects/source.py +++ b/music_kraken/objects/source.py @@ -104,14 +104,7 @@ class Source(OuterProxy): ('url', self.url), ('audio_url', self.audio_url), ] - - def __merge__(self, __other: Source, override: bool = False): - if override: - self.audio_url = __other.audio_url - - if self.audio_url is None or (override and __other.audio_url is not None): - self.audio_url = __other.audio_url - + def __str__(self): return self.__repr__() From 3cd9daf512a01c244627b8005fde2f18425f3ed0 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 18 Apr 2024 15:43:01 +0200 Subject: [PATCH 12/14] feat: swaped the artist syncing --- music_kraken/objects/song.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index 7608f7e..b81d57f 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -86,9 +86,11 @@ class Song(Base): TITEL = "title" def __init_collections__(self) -> None: + """ self.album_collection.contain_given_in_attribute = { "artist_collection": self.main_artist_collection, } + """ self.album_collection.append_object_to_attribute = { "song_collection": self, } @@ -245,6 +247,9 @@ class Album(Base): self.song_collection.append_object_to_attribute = { "album_collection": self } + self.song_collection.contain_given_in_attribute = { + "main_artist_collection": self.artist_collection + } self.artist_collection.append_object_to_attribute = { "main_album_collection": self From 3532fea36cf40b732fb33bebeef14a267beb29e2 Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 18 Apr 2024 17:20:30 +0200 Subject: [PATCH 13/14] feat: syncing artists between song and album --- music_kraken/objects/collection.py | 6 ++++++ music_kraken/objects/song.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/music_kraken/objects/collection.py b/music_kraken/objects/collection.py index 1e3ee89..1d62116 100644 --- a/music_kraken/objects/collection.py +++ b/music_kraken/objects/collection.py @@ -36,6 +36,7 @@ class Collection(Generic[T]): # Value: main collection to sync to self.contain_given_in_attribute: Dict[str, Collection] = contain_given_in_attribute or {} self.append_object_to_attribute: Dict[str, T] = append_object_to_attribute or {} + self.sync_on_append: Dict[str, Collection] = sync_on_append or {} self._id_to_index_values: Dict[int, set] = defaultdict(set) self._indexed_values = defaultdict(lambda: None) @@ -229,6 +230,11 @@ class Collection(Generic[T]): for attribute, new_object in self.append_object_to_attribute.items(): __object.__getattribute__(attribute).append(new_object) + + for attribute, collection in self.sync_on_append.items(): + collection.extend(__object.__getattribute__(attribute)) + __object.__setattr__(attribute, collection) + else: # merge only if the two objects are not the same if existing_object.id == __object.id: diff --git a/music_kraken/objects/song.py b/music_kraken/objects/song.py index b81d57f..e682fe1 100644 --- a/music_kraken/objects/song.py +++ b/music_kraken/objects/song.py @@ -91,6 +91,10 @@ class Song(Base): "artist_collection": self.main_artist_collection, } """ + self.album_collection.sync_on_append = { + "artist_collection": self.main_artist_collection, + } + self.album_collection.append_object_to_attribute = { "song_collection": self, } @@ -247,7 +251,7 @@ class Album(Base): self.song_collection.append_object_to_attribute = { "album_collection": self } - self.song_collection.contain_given_in_attribute = { + self.song_collection.sync_on_append = { "main_artist_collection": self.artist_collection } From ba94e38a2d800d2d477af0457dcab7f67a70617b Mon Sep 17 00:00:00 2001 From: Lars Noack Date: Thu, 18 Apr 2024 17:47:33 +0200 Subject: [PATCH 14/14] feat: disabled object trace --- music_kraken/utils/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_kraken/utils/shared.py b/music_kraken/utils/shared.py index 8861fce..6676393 100644 --- a/music_kraken/utils/shared.py +++ b/music_kraken/utils/shared.py @@ -15,7 +15,7 @@ __stage__ = os.getenv("STAGE", "prod") DEBUG = (__stage__ == "dev") and True DEBUG_LOGGING = DEBUG and True DEBUG_TRACE = DEBUG and True -DEBUG_OBJECT_TRACE = DEBUG and True +DEBUG_OBJECT_TRACE = DEBUG and False DEBUG_YOUTUBE_INITIALIZING = DEBUG and False DEBUG_PAGES = DEBUG and False DEBUG_DUMP = DEBUG and True