much progress on new db integration

This commit is contained in:
Hellow 2022-12-06 23:44:42 +01:00
parent cbb56499bf
commit 4b60ed7555
10 changed files with 284 additions and 85 deletions

View File

@ -8,5 +8,17 @@
<jdbc-url>jdbc:sqlite:/tmp/music-downloader/metadata.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
</data-source>
<data-source source="LOCAL" name="test" uuid="eb0321e0-ad51-46e5-8c59-97956edf1699">
<driver-ref>sqlite.xerial</driver-ref>
<synchronize>true</synchronize>
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
<jdbc-url>jdbc:sqlite:$PROJECT_DIR$/src/test.db</jdbc-url>
<working-dir>$ProjectFileDir$</working-dir>
<libraries>
<library>
<url>file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.39.2/sqlite-jdbc-3.39.2.jar</url>
</library>
</libraries>
</data-source>
</component>
</project>

6
.idea/sqldialects.xml Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$/src/music_kraken/static_files/new_db.sql" dialect="SQLite" />
</component>
</project>

View File

@ -43,8 +43,8 @@ with open('version', 'r') as version_file:
setup(
name='music-kraken',
version=version,
description='An extensive music downloader crawling the internet. It gets its metadata from a couple metadata '
'provider, and it scrapes the audiofiles.',
description='An extensive music downloader crawling the internet. It gets its metadata from a couple of metadata '
'providers, and it scrapes the audiofiles.',
long_description=long_description,
long_description_content_type='text/markdown',
author='Hellow2',

Binary file not shown.

View File

@ -8,51 +8,33 @@ from music_kraken import (
import music_kraken.database.new_database as db
cache = music_kraken.database.new_database.Database("test.db")
cache.reset()
song = Song(
title="Vein Deep in the Solution",
release_name="One Final Action",
length=666,
target=Target(file="~/Music/genre/artist/album/song.mp3", path="~/Music/genre/artist/album"),
metadata={
"album": "One Final Action"
},
lyrics=[
Lyrics(text="these are some depressive lyrics", language="en")
Lyrics(text="these are some depressive lyrics", language="en"),
Lyrics(text="test", language="en")
],
sources=[
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd"),
Source(src="musify", url="https://ln.topdf.de/Music-Kraken/")
]
)
cache.push([song])
song_ref = song.reference
print(song_ref)
"""
music_kraken.clear_cache()
lyrics = Lyrics(text="these are some Lyrics that don't belong to any Song", language="en")
artist = music_kraken.Artist(
name="I'm in a Coffin"
)
cache.push([song, lyrics])
song = Song(
title="Vein Deep in the Solution",
release_name="One Final Action",
target=Target(file="~/Music/genre/artist/album/song.mp3", path="~/Music/genre/artist/album"),
metadata={
"album": "One Final Action"
},
lyrics=[
Lyrics(text="these are some depressive lyrics", language="en")
],
sources=[
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
]
)
print(song)
print(song.id)
# music_kraken.fetch_sources([song])
"""
cache.pull_single_song(song_ref=song_ref)

View File

@ -1,9 +1,10 @@
import sqlite3
import os
import logging
from typing import List
from typing import List, Tuple
from pkg_resources import resource_string
from .objects.database_object import Reference
from .objects import (
Song,
Lyrics,
@ -15,12 +16,34 @@ from .objects import (
logger = logging.getLogger("database")
# Due to this not being deployed on a Server **HOPEFULLY**
# I don't need to parameterize stuff like the where and
# use complicated query builder
SONG_QUERY = """
SELECT
Song.id AS song_id, Song.name AS title, Song.isrc AS isrc, Song.length AS length,
Target.id AS target_id, Target.file AS file, Target.path AS path
FROM Song
LEFT JOIN Target ON Song.id=Target.song_id
WHERE Song.id="{song_id}";
"""
SOURCE_QUERY = """
SELECT id, src, url, song_id
FROM Source
WHERE {where};
"""
LYRICS_QUERY = """
SELECT id, text, language, song_id
FROM Lyrics
WHERE {where};
"""
class Database:
def __init__(self, database_file: str):
self.database_file: str = database_file
self.connection, self.cursor = self.reset_cursor()
self.connection = sqlite3.connect(self.database_file)
self.cursor = self.connection.cursor()
def reset(self):
@ -36,14 +59,21 @@ class Database:
os.remove(self.database_file)
# newly creating the database
self.connection = sqlite3.connect(self.database_file)
self.cursor = self.connection.cursor()
self.reset_cursor()
query = resource_string("music_kraken", "static_files/new_db.sql").decode('utf-8')
# fill the database with the schematic
self.cursor.executescript(query)
self.connection.commit()
def reset_cursor(self) -> Tuple[sqlite3.Connection, sqlite3.Cursor]:
self.connection = sqlite3.connect(self.database_file)
# This is necessary that fetching rows returns dicts instead of tuple
self.connection.row_factory = sqlite3.Row
self.cursor = self.connection.cursor()
return self.connection, self.cursor
def push_one(self, db_object: Song | Lyrics | Target | Artist | Source):
if type(db_object) == Song:
return self.push_song(song=db_object)
@ -82,27 +112,143 @@ class Database:
name - title
"""
table = "Song"
query = f"INSERT OR REPLACE INTO {table} (id, name) VALUES (?, ?);"
query = f"INSERT OR REPLACE INTO {table} (id, name, isrc, length) VALUES (?, ?, ?, ?);"
values = (
song.id,
song.title
song.title,
song.isrc,
song.length
)
self.cursor.execute(query, values)
self.connection.commit()
def push_lyrics(self, lyrics: Lyrics):
pass
# add sources
for source in song.sources:
self.push_source(source=source)
# add lyrics
for single_lyrics in song.lyrics:
self.push_lyrics(lyrics=single_lyrics)
# add target
self.push_target(target=song.target)
def push_lyrics(self, lyrics: Lyrics, ):
if lyrics.song_ref_id is None:
logger.warning("the Lyrics don't refer to a song")
table = "Lyrics"
query = f"INSERT OR REPLACE INTO {table} (id, song_id, text, language) VALUES (?, ?, ?, ?);"
values = (
lyrics.id,
lyrics.song_ref_id,
lyrics.text,
lyrics.language
)
self.cursor.execute(query, values)
self.connection.commit()
def push_source(self, source: Source):
if source.song_ref_id is None:
logger.warning("the Source don't refer to a song")
table = "Source"
query = f"INSERT OR REPLACE INTO {table} (id, song_id, src, url) VALUES (?, ?, ?, ?);"
values = (
source.id,
source.song_ref_id,
source.src,
source.url
)
self.cursor.execute(query, values)
self.connection.commit()
def push_target(self, target: Target):
pass
if target.song_ref_id is None:
logger.warning("the Target doesn't refer to a song")
table = "Target"
query = f"INSERT OR REPLACE INTO {table} (id, song_id, file, path) VALUES (?, ?, ?, ?);"
values = (
target.id,
target.song_ref_id,
target.file,
target.path
)
self.cursor.execute(query, values)
self.connection.commit()
def push_artist(self, artist: Artist):
pass
def push_source(self, source: Source):
def pull_lyrics(self, song_ref: Reference = None, lyrics_ref: Reference = None) -> List[Lyrics]:
pass
def pull_sources(self, song_ref: Reference = None, source_ref: Reference = None) -> List[Source]:
"""
Gets a list of sources. if source_ref is passed in the List will most likely only
contain one Element if everything goes accordingly.
**If neither song_ref nor source_ref are passed in it will return ALL sources**
:param song_ref:
:param source_ref:
:return:
"""
where = "1=1"
if song_ref is not None:
where = f"song_id=\"{song_ref.id}\""
elif source_ref is not None:
where = f"id=\"{source_ref.id}\""
query = SOURCE_QUERY.format(where=where)
self.cursor.execute(query)
source_rows = self.cursor.fetchall()
return [Source(
id_=source_row['id'],
src=source_row['src'],
url=source_row['url']
) for source_row in source_rows]
def pull_single_song(self, song_ref: Reference = None) -> Song:
"""
This function is used to get one song (including its children like Sources etc)
from one song id (a reference object)
:param song_ref:
:return requested_song:
"""
if song_ref.id is None:
raise ValueError("The Song ref doesn't point anywhere. Remember to use the debugger.")
query = SONG_QUERY.format(song_id=song_ref.id)
self.cursor.execute(query)
song_rows = self.cursor.fetchall()
if len(song_rows) == 0:
logger.warning(f"No song found for the id {song_ref.id}")
return Song()
if len(song_rows) > 1:
logger.warning(f"Multiple Songs found for the id {song_ref.id}. Defaulting to the first one.")
song_result = song_rows[0]
song = Song(
id_=song_result['song_id'],
title=song_result['title'],
isrc=song_result['isrc'],
length=song_result['length'],
target=Target(
id_=song_result['target_id'],
file=song_result['file'],
path=song_result['path']
),
sources=self.pull_sources(song_ref=song_ref)
)
return song
if __name__ == "__main__":
cache = Database("")

View File

@ -4,15 +4,19 @@ from ...utils.shared import (
SONG_LOGGER as logger
)
class Reference:
def __init__(self, id_: str) -> None:
self.id = id_
def __str__(self):
return f"references to an object with the id: {self.id}"
class DatabaseObject:
def __init__(self, id_: str = None) -> None:
self.id_: str | None = id_
def get_id(self) -> str:
"""
returns the id if it is set, else

View File

@ -12,13 +12,33 @@ from .database_object import (
)
class SongAttribute:
def __init__(self, song_ref: Reference = None):
# the reference to the song the lyrics belong to
self.song_ref = song_ref
def add_song(self, song_ref: Reference):
self.song_ref = song_ref
def get_ref_song_id(self):
if self.song_ref is None:
return None
return self.song_ref.id
def set_ref_song_id(self, song_id):
self.song_ref = Reference(song_id)
song_ref_id = property(fget=get_ref_song_id, fset=set_ref_song_id)
class Metadata:
"""
Shall only be read or edited via the Song object.
For this reason there is no reference to the song needed.
"""
def __init__(self, data: dict = {}) -> None:
self.data = {}
self.data = data
def get_all_metadata(self):
return list(self.data.items())
@ -33,7 +53,7 @@ class Metadata:
return self.data[item]
class Source(DatabaseObject):
class Source(DatabaseObject, SongAttribute):
"""
create somehow like that
```python
@ -41,23 +61,30 @@ class Source(DatabaseObject):
Source(src="youtube", url="https://youtu.be/dfnsdajlhkjhsd")
```
"""
def __init__(self, id_: str = None, src: str = None, url: str = None) -> None:
super().__init__(id_=id_)
DatabaseObject.__init__(self, id_=id_)
SongAttribute.__init__(self)
self.src = src
self.url = url
def __str__(self):
return f"{self.src}: {self.url}"
class Target(DatabaseObject):
class Target(DatabaseObject, SongAttribute):
"""
create somehow like that
```python
# I know path is pointles, and I will change that (don't worry about backwards compatibility there)
# I know path is pointless, and I will change that (don't worry about backwards compatibility there)
Target(file="~/Music/genre/artist/album/song.mp3", path="~/Music/genre/artist/album")
```
"""
def __init__(self, id_:str = None, file: str = None, path: str = None) -> None:
super().__init__(id_=id_)
def __init__(self, id_: str = None, file: str = None, path: str = None) -> None:
DatabaseObject.__init__(self, id_=id_)
SongAttribute.__init__(self)
self._file = file
self._path = path
@ -86,7 +113,7 @@ class Target(DatabaseObject):
return False
return os.path.exists(self.file)
def is_set(self) -> bool:
return not (self._file is None or self._path is None)
@ -96,30 +123,31 @@ class Target(DatabaseObject):
exists_on_disc = property(fget=get_exists_on_disc)
class Lyrics(DatabaseObject):
class Lyrics(DatabaseObject, SongAttribute):
def __init__(self, text: str, language: str, id_: str = None) -> None:
super().__init__(id_=id_)
DatabaseObject.__init__(self, id_=id_)
SongAttribute.__init__(self)
self.text = text
self.language = language
class Song(DatabaseObject):
def __init__(
self,
id_: str = None,
mb_id: str = None,
title: str = None,
release_name: str = None,
artist_names: List[str] = [],
isrc: str = None,
length: int = None,
sources: List[Source] = None,
target: Target = None,
lyrics: List[Lyrics] = None,
metadata: dict = {},
release_ref: str = None,
artist_refs: List[Reference] = None
) -> None:
self,
id_: str = None,
mb_id: str = None,
title: str = None,
release_name: str = None,
artist_names: List[str] = [],
isrc: str = None,
length: int = None,
sources: List[Source] = None,
target: Target = None,
lyrics: List[Lyrics] = None,
metadata: dict = {},
release_ref: str = None,
artist_refs: List[Reference] = None
) -> None:
"""
id: is not NECESARRILY the musicbrainz id, but is DISTINCT for every song
mb_id: is the musicbrainz_id
@ -133,29 +161,33 @@ class Song(DatabaseObject):
self.title: str | None = title
self.release_name: str | None = release_name
self.isrc: str | None = isrc
self.length: int | None = length
self.length_: int | None = length
self.artist_names = artist_names
self.metadata = Metadata(data=metadata)
if sources is None:
sources = []
self.sources: List[Source] = sources
for source in self.sources:
source.add_song(self.reference)
if target is None:
target = Target()
self.target: Target = target
self.target.add_song(self.reference)
if lyrics is None:
lyrics = []
self.lyrics: List[Lyrics] = lyrics
for lyrics_ in self.lyrics:
lyrics_.add_song(self.reference)
self.release_ref = release_ref
self.artist_refs = artist_refs
def __str__(self) -> str:
return f"\"{self.title}\" by {', '.join([str(a) for a in self.artists])}"
return f"\"{self.title}\" by {', '.join(self.artist_names)}"
def __repr__(self) -> str:
return self.__str__()
@ -167,7 +199,20 @@ class Song(DatabaseObject):
return self.isrc is not None
def get_artist_names(self) -> List[str]:
return [a.name for a in self.artists]
return self.artist_names
def get_length(self):
if self.length_ is None:
return None
return int(self.length_)
def set_length(self, length: int):
if type(length) != int:
raise TypeError(f"length of a song must be of the type int not {type(length)}")
self.length_ = length
length = property(fget=get_length, fset=set_length)
if __name__ == "__main__":
"""

View File

@ -1,14 +1,20 @@
CREATE TABLE Song
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name TEXT
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name TEXT,
isrc TEXT,
length INT -- length is in milliseconds (could be wrong)
);
CREATE TABLE Source
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
song_id BIGINT,
id BIGINT AUTO_INCREMENT PRIMARY KEY,
src TEXT NOT NULL,
url TEXT NOT NULL,
certainty INT NOT NULL DEFAULT 0, -- certainty=0 -> it is definitely a valid source
valid BOOLEAN NOT NULL DEFAULT 1,
song_id BIGINT,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
@ -29,14 +35,18 @@ CREATE TABLE Album
CREATE TABLE Target
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
song_id BIGINT,
file TEXT NOT NULL,
path TEXT,
song_id BIGINT UNIQUE,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
CREATE TABLE Lyrics
(
id BIGINT AUTO_INCREMENT PRIMARY KEY,
song_id BIGINT,
id BIGINT AUTO_INCREMENT PRIMARY KEY,
text TEXT,
language TEXT,
song_id BIGINT,
FOREIGN KEY(song_id) REFERENCES Song(id)
);
@ -55,9 +65,3 @@ CREATE TABLE AlbumArtist
FOREIGN KEY(album_id) REFERENCES Album(id),
FOREIGN KEY(artist_id) REFERENCES Artist(id)
);
SELECT
Song.id,
Song.name
FROM Song

BIN
src/test.db Normal file

Binary file not shown.