fix: runtime errors

2024-05-15 13:16:11 +02:00 · 2024-05-15 13:16:11 +02:00 · bedd0fe819
parent ac6c513d56
commit bedd0fe819
12 changed files with 65 additions and 196 deletions
--- a/development/actual_donwload.py
+++ b/development/actual_donwload.py
@ -6,8 +6,9 @@ logging.getLogger().setLevel(logging.DEBUG)

 if __name__ == "__main__":
    commands = [
-        "s: #a Crystal F",
-        "d: 20",
+        "s: #a I'm in a coffin",
+        "0",
+        "d: 0",
    ]

    
--- a/music_kraken/cli/main_downloader.py
+++ b/music_kraken/cli/main_downloader.py
@ -317,7 +317,7 @@ class Downloader:

        for database_object in data_objects:
            r = self.pages.download(
-                music_object=database_object, 
+                data_object=database_object, 
                genre=self.genre, 
                **kwargs
            )
--- a/music_kraken/download/init.py
+++ b/music_kraken/download/init.py
@ -1,4 +1,5 @@
 from dataclasses import dataclass, field
+from typing import Set

 from ..utils.config import main_settings
 from ..utils.enums.album import AlbumType
--- a/music_kraken/download/page_attributes.py
+++ b/music_kraken/download/page_attributes.py
@ -2,6 +2,7 @@ from typing import Tuple, Type, Dict, Set, Optional, List
 from collections import defaultdict
 from pathlib import Path
 import re
+import logging

 from . import FetchOptions, DownloadOptions
 from .results import SearchResults
@ -17,6 +18,7 @@ from ..objects import (
    Label,
 )
 from ..audio import write_metadata_to_target, correct_codec
+from ..utils import output, BColors
 from ..utils.string_processing import fit_to_file_system
 from ..utils.config import youtube_settings, main_settings
 from ..utils.path_manager import LOCATIONS
@ -69,6 +71,8 @@ if DEBUG_PAGES:

 class Pages:
    def __init__(self, exclude_pages: Set[Type[Page]] = None, exclude_shady: bool = False, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.LOGGER = logging.getLogger("download")
+        
        self.download_options: DownloadOptions = download_options or DownloadOptions()
        self.fetch_options: FetchOptions = fetch_options or FetchOptions()

@ -118,7 +122,9 @@ class Pages:
            return data_object
        
        source: Source
-        for source in data_object.source_collection.get_sources():
+        for source in data_object.source_collection.get_sources(source_type_sorting={
+            "only_with_page": True,
+        }):
            new_data_object = self.fetch_from_source(source=source, stop_at_level=stop_at_level)
            if new_data_object is not None:
                data_object.merge(new_data_object)
@ -129,10 +135,15 @@ class Pages:
        if not source.has_page:
            return None
        
-        func = getattr(source.page, fetch_map[source_type])(source=source, **kwargs)
+        source_type = source.page.get_source_type(source=source)
+        if source_type is None:
+            self.LOGGER.debug(f"Could not determine source type for {source}.")
+            return None
+
+        func = getattr(source.page, fetch_map[source_type])
        
        # fetching the data object and marking it as fetched
-        data_object: DataObject = func(source=source)
+        data_object: DataObject = func(source=source, **kwargs)
        data_object.mark_as_fetched(source.hash_url)
        return data_object

@ -175,7 +186,7 @@ class Pages:
        
        # download all children
        download_result: DownloadResult = DownloadResult()
-        for c in data_object.get_children():
+        for c in data_object.get_child_collections():
            for d in c:
                if self._skip_object(d):
                    continue
@ -209,7 +220,7 @@ class Pages:

            path_template = path_template.replace(f"{{{field}}}", naming[field][0])

-        return possible_parts
+        return path_template

    def _download_song(self, song: Song, naming: dict) -> DownloadOptions:
        """
@ -235,7 +246,7 @@ class Pages:
        # removing duplicates from the naming, and process the strings
        for key, value in naming.items():
            # https://stackoverflow.com/a/17016257
-            naming[key] = list(dict.fromkeys(items))
+            naming[key] = list(dict.fromkeys(value))

        # manage the targets
        tmp: Target = Target.temp(file_extension=main_settings["audio_format"])
@ -248,14 +259,14 @@ class Pages:
            )
        ))
        for target in song.target_collection:
-            if target.exists():
+            if target.exists:
                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
                r.found_on_disk += 1

                if self.download_options.download_again_if_found:
                    target.copy_content(tmp)
            else:
-                target.create_parent_directories()
+                target.create_path()
                output(f'- {target.file_path}', color=BColors.GREY)

        # this streams from every available source until something succeeds, setting the skip intervals to the values of the according source
@ -294,7 +305,7 @@ class Pages:
        if used_source is not None:
            used_source.page.post_process_hook(song=song, temp_target=tmp)

-        if not found_on_disc or self.download_options.process_metadata_if_found:
+        if not found_on_disk or self.download_options.process_metadata_if_found:
            write_metadata_to_target(metadata=song.metadata, target=tmp, song=song)

        tmp.delete()
--- a/music_kraken/download/results.py
+++ b/music_kraken/download/results.py
@ -2,7 +2,6 @@ from typing import Tuple, Type, Dict, List, Generator, Union
 from dataclasses import dataclass

 from ..objects import DatabaseObject
-from ..utils.enums.source import SourceType
 from ..pages import Page, EncyclopaediaMetallum, Musify


--- a/music_kraken/objects/parents.py
+++ b/music_kraken/objects/parents.py
@ -8,6 +8,7 @@ from typing import Optional, Dict, Tuple, List, Type, Generic, Any, TypeVar, Set
 from pathlib import Path
 import inspect

+from .source import SourceCollection
 from .metadata import Metadata
 from ..utils import get_unix_time, object_trace, generate_id
 from ..utils.config import logging_settings, main_settings
--- a/music_kraken/objects/source.py
+++ b/music_kraken/objects/source.py
@ -20,13 +20,11 @@ from dataclasses import dataclass, field
 from functools import cached_property

 from ..utils import generate_id
-from ..utils.enums import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.config import youtube_settings
 from ..utils.string_processing import hash_url, shorten_display_url

 from .metadata import Mapping, Metadata
-from .parents import OuterProxy
-from .collection import Collection
 if TYPE_CHECKING:
    from ..pages.abstract import Page

@ -54,38 +52,38 @@ class Source:
        url = parsed_url.geturl()
        
        if "musify" in parsed_url.netloc:
-            return cls(SourceType.MUSIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MUSIFY, url, referrer_page=referrer_page)

        if parsed_url.netloc in [_url.netloc for _url in youtube_settings['youtube_url']]:
-            return cls(SourceType.YOUTUBE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.YOUTUBE, url, referrer_page=referrer_page)

        if url.startswith("https://www.deezer"):
-            return cls(SourceType.DEEZER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.DEEZER, url, referrer_page=referrer_page)
        
        if url.startswith("https://open.spotify.com"):
-            return cls(SourceType.SPOTIFY, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.SPOTIFY, url, referrer_page=referrer_page)

        if "bandcamp" in url:
-            return cls(SourceType.BANDCAMP, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.BANDCAMP, url, referrer_page=referrer_page)

        if "wikipedia" in parsed_url.netloc:
-            return cls(SourceType.WIKIPEDIA, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.WIKIPEDIA, url, referrer_page=referrer_page)

        if url.startswith("https://www.metal-archives.com/"):
-            return cls(SourceType.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, url, referrer_page=referrer_page)

        # the less important once
        if url.startswith("https://www.facebook"):
-            return cls(SourceType.FACEBOOK, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.FACEBOOK, url, referrer_page=referrer_page)

        if url.startswith("https://www.instagram"):
-            return cls(SourceType.INSTAGRAM, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.INSTAGRAM, url, referrer_page=referrer_page)

        if url.startswith("https://twitter"):
-            return cls(SourceType.TWITTER, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.TWITTER, url, referrer_page=referrer_page)

        if url.startswith("https://myspace.com"):
-            return cls(SourceType.MYSPACE, url, referrer_page=referrer_page)
+            return cls(ALL_SOURCE_TYPES.MYSPACE, url, referrer_page=referrer_page)

    @property
    def has_page(self) -> bool:
@ -134,7 +132,7 @@ class SourceCollection:
    _sources_by_type: Dict[SourceType, List[Source]]

    def __init__(self, data: Optional[Iterable[Source]] = None, **kwargs):
-        self._page_to_source_list = defaultdict(list)
+        self._sources_by_type = defaultdict(list)
        self._indexed_sources = {}

        self.extend(data or [])
@ -157,7 +155,7 @@ class SourceCollection:
            Iterable[SourceType]: A list of source types.
        """

-        source_types: List[SourceType] = self._page_to_source_list.keys()
+        source_types: List[SourceType] = self._sources_by_type.keys()
        if only_with_page:
            source_types = filter(lambda st: st.has_page, source_types)

@ -186,7 +184,7 @@ class SourceCollection:
                source_types = self.source_types(**source_type_sorting)

            for source_type in source_types:
-                yield from self._page_to_source_list[source_type]
+                yield from self._sources_by_type[source_type]

    def append(self, source: Source):
        if source is None:
@ -202,7 +200,7 @@ class SourceCollection:
            existing_source.__merge__(source)
            source = existing_source
        else:
-            self._page_to_source_list[source.source_type].append(source)
+            self._sources_by_type[source.source_type].append(source)

        changed = False
        for key in source.indexing_values:
--- a/music_kraken/pages/abstract.py
+++ b/music_kraken/pages/abstract.py
@ -49,15 +49,16 @@ class DownloadOptions:

 class Page:
    SOURCE_TYPE: SourceType
-    LOGGER: LOGGER
+    LOGGER: logging.Logger

    def __new__(cls, *args, **kwargs):
-        cls.SOURCE_TYPE.register_page(cls)
        cls.LOGGER = logging.getLogger(cls.__name__)

        return super().__new__(cls)

    def __init__(self, download_options: DownloadOptions = None, fetch_options: FetchOptions = None):
+        self.SOURCE_TYPE.register_page(self)
+        
        self.download_options: DownloadOptions = download_options or DownloadOptions()
        self.fetch_options: FetchOptions = fetch_options or FetchOptions()

@ -145,151 +146,7 @@ class Page:
    def fetch_label(self, source: Source, stop_at_level: int = 1) -> Label:
        return Label()

-    def download(
-        self, 
-        music_object: DatabaseObject, 
-        genre: str, 
-    ) -> DownloadResult:
-        naming_dict: NamingDict = NamingDict({"genre": genre})
-
-        def fill_naming_objects(naming_music_object: DatabaseObject):
-            nonlocal naming_dict
-
-            for collection_name in naming_music_object.UPWARDS_COLLECTION_STRING_ATTRIBUTES:
-                collection: Collection = getattr(naming_music_object, collection_name)
-
-                if collection.empty:
-                    continue
-                
-                dom_ordered_music_object: DatabaseObject = collection[0]
-                naming_dict.add_object(dom_ordered_music_object)
-                return fill_naming_objects(dom_ordered_music_object)
-
-        fill_naming_objects(music_object)
-
-        return self._download(music_object, naming_dict)
-
-    def _download(
-        self, 
-        music_object: DatabaseObject, 
-        naming_dict: NamingDict, 
-        **kwargs
-    ) -> DownloadResult:
-        if isinstance(music_object, Song):
-            output(f"Downloading {music_object.option_string} to:", color=BColors.BOLD)
-        else:
-            output(f"Downloading {music_object.option_string}...", color=BColors.BOLD)
-
-        # Skips all releases, that are defined in shared.ALBUM_TYPE_BLACKLIST, if download_all is False
-        if isinstance(music_object, Album):
-            if not self.download_options.download_all and music_object.album_type in self.download_options.album_type_blacklist:
-                return DownloadResult()
-
-        if not (isinstance(music_object, Song) and self.NO_ADDITIONAL_DATA_FROM_SONG):
-            self.fetch_details(music_object=music_object, stop_at_level=1)
-
-        if isinstance(music_object, Album):
-            music_object.update_tracksort()
-            
-        naming_dict.add_object(music_object)
-
-        if isinstance(music_object, Song):
-            return self._download_song(music_object, naming_dict)
-
-        download_result: DownloadResult = DownloadResult()
-
-        for collection_name in music_object.DOWNWARDS_COLLECTION_STRING_ATTRIBUTES:
-            collection: Collection = getattr(music_object, collection_name)
-
-            sub_ordered_music_object: DatabaseObject
-            for sub_ordered_music_object in collection:
-                download_result.merge(self._download(sub_ordered_music_object, naming_dict.copy()))
-
-        return download_result
-
-    def _download_song(self, song: Song, naming_dict: NamingDict):
-        song.compile()
-        if "genre" not in naming_dict and song.genre is not None:
-            naming_dict["genre"] = song.genre
-
-        if song.genre is None:
-            song.genre = naming_dict["genre"]
-
-        path_parts = Formatter().parse(main_settings["download_path"])
-        file_parts = Formatter().parse(main_settings["download_file"])
-        new_target = Target(
-            relative_to_music_dir=True,
-            file_path=Path(
-                main_settings["download_path"].format(**{part[1]: naming_dict[part[1]] for part in path_parts}),
-                main_settings["download_file"].format(**{part[1]: naming_dict[part[1]] for part in file_parts})
-            )
-        )
-
-        if song.target_collection.empty:
-            song.target_collection.append(new_target)
-
-        r = DownloadResult(1)
-        temp_target: Target = Target.temp(file_extension=main_settings["audio_format"])
-
-        found_on_disc = False
-        target: Target
-        for target in song.target_collection:
-            current_exists = target.exists
-
-            if current_exists:
-                output(f'- {target.file_path} {BColors.OKGREEN.value}[already exists]', color=BColors.GREY)
-                target.copy_content(temp_target)
-                found_on_disc = True
-
-                r.found_on_disk += 1
-                r.add_target(target)
-            else:
-                output(f'- {target.file_path}', color=BColors.GREY)
-
-
-        sources = song.source_collection.get_sources(self.SOURCE_TYPE)
-
-        skip_intervals = []
-        if not found_on_disc:
-            for source in sources:
-                r = self.download_song_to_target(source=source, target=temp_target, desc="downloading")
-
-                if not r.is_fatal_error:
-                    skip_intervals = self.get_skip_intervals(song, source)
-                    break
-        
-        if temp_target.exists:
-            r.merge(self._post_process_targets(
-                song=song, 
-                temp_target=temp_target,
-                skip_intervals=skip_intervals,
-                found_on_disc=found_on_disc,
-            ))
-
-        return r
-
-    def _post_process_targets(self, song: Song, temp_target: Target, skip_intervals: List, found_on_disc: bool) -> DownloadResult:
-        if not found_on_disc or self.download_options.process_audio_if_found:
-            correct_codec(temp_target, skip_intervals=skip_intervals)
-
-        self.post_process_hook(song, temp_target)
-
-        if not found_on_disc or self.download_options.process_metadata_if_found:
-            write_metadata_to_target(song.metadata, temp_target, song)
-
-        r = DownloadResult()
-
-        target: Target
-        for target in song.target_collection:
-            if temp_target is not target:
-                temp_target.copy_content(target)
-            r.add_target(target)
-
-        temp_target.delete()
-        r.sponsor_segments += len(skip_intervals)
-
-        return r
-
+    # to download stuff
    def get_skip_intervals(self, song: Song, source: Source) -> List[Tuple[float, float]]:
        return []

--- a/music_kraken/pages/bandcamp.py
+++ b/music_kraken/pages/bandcamp.py
@ -62,8 +62,7 @@ class Bandcamp(Page):
        super().__init__(*args, **kwargs)

    def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
-        parsed_url = urlparse(source.url)
-        path = parsed_url.path.replace("/", "")
+        path = source.parsed_url.path.replace("/", "")

        if path == "" or path.startswith("music"):
            return Artist
--- a/music_kraken/pages/encyclopaedia_metallum.py
+++ b/music_kraken/pages/encyclopaedia_metallum.py
@ -7,7 +7,7 @@ from urllib.parse import urlparse, urlencode
 from ..connection import Connection
 from ..utils.config import logging_settings
 from .abstract import Page
-from ..utils.enums.source import SourceType
+from ..utils.enums import SourceType, ALL_SOURCE_TYPES
 from ..utils.enums.album import AlbumType
 from ..utils.support_classes.query import Query
 from ..objects import (
@ -59,7 +59,7 @@ def _song_from_json(artist_html=None, album_html=None, release_type=None, title=
            _album_from_json(album_html=album_html, release_type=release_type, artist_html=artist_html)
        ],
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, song_id)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, song_id)
        ]
    )

@ -85,7 +85,7 @@ def _artist_from_json(artist_html=None, genre=None, country=None) -> Artist:
    return Artist(
        name=artist_name,
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, artist_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, artist_url)
        ]
    )

@ -105,7 +105,7 @@ def _album_from_json(album_html=None, release_type=None, artist_html=None) -> Al
        title=album_name,
        album_type=album_type,
        source_list=[
-            Source(SourceType.ENCYCLOPAEDIA_METALLUM, album_url)
+            Source(ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM, album_url)
        ],
        artist_list=[
            _artist_from_json(artist_html=artist_html)
@ -207,7 +207,7 @@ def create_grid(


 class EncyclopaediaMetallum(Page):
-    SOURCE_TYPE = SourceType.ENCYCLOPAEDIA_METALLUM
+    SOURCE_TYPE = ALL_SOURCE_TYPES.ENCYCLOPAEDIA_METALLUM
    LOGGER = logging_settings["metal_archives_logger"]
    
    def __init__(self, **kwargs):
--- a/music_kraken/pages/youtube_music/youtube_music.py
+++ b/music_kraken/pages/youtube_music/youtube_music.py
@ -22,20 +22,22 @@ from ...utils import get_current_millis, traverse_json_path

 from ...utils import dump_to_file

-from ...objects import Source, DatabaseObject, ID3Timestamp, Artwork
 from ..abstract import Page
 from ...objects import (
-    Artist,
+    DatabaseObject as DataObject,
    Source,
-    SourceType,
+    FormattedText,
+    ID3Timestamp,
+    Artwork,
+    Artist,
    Song,
    Album,
    Label,
    Target,
    Lyrics,
-    FormattedText
 )
 from ...connection import Connection
+from ...utils.enums import SourceType, ALL_SOURCE_TYPES
 from ...utils.enums.album import AlbumType
 from ...utils.support_classes.download_result import DownloadResult

@ -176,8 +178,7 @@ ALBUM_TYPE_MAP = {

 class YoutubeMusic(SuperYouTube):
    # CHANGE
-    SOURCE_TYPE = SourceType.YOUTUBE_MUSIC
-    LOGGER = logging_settings["youtube_music_logger"]
+    SOURCE_TYPE = ALL_SOURCE_TYPES.YOUTUBE

    def __init__(self, *args, ydl_opts: dict = None, **kwargs):
        self.yt_music_connection: YoutubeMusicConnection = YoutubeMusicConnection(
@ -348,10 +349,10 @@ class YoutubeMusic(SuperYouTube):
            default='{}'
        )) or {}

-    def get_source_type(self, source: Source) -> Optional[Type[DatabaseObject]]:
+    def get_source_type(self, source: Source) -> Optional[Type[DataObject]]:
        return super().get_source_type(source)

-    def general_search(self, search_query: str) -> List[DatabaseObject]:
+    def general_search(self, search_query: str) -> List[DataObject]:
        search_query = search_query.strip()

        urlescaped_query: str = quote(search_query.strip().replace(" ", "+"))
--- a/music_kraken/utils/enums/init.py
+++ b/music_kraken/utils/enums/init.py
@ -14,10 +14,11 @@ class SourceType:
    page_type: Type[Page] = None
    page: Page = None

+    def register_page(self, page: Page):
+        self.page = page

-    def register_page(self, page_type: Type[Page]):
-        self.page_type = page
-        self.page = page_type()
+    def __hash__(self):
+        return hash(self.name)

    @property
    def has_page(self) -> bool: