Source code for nkdsu.apps.vote.anime

from __future__ import annotations

import hashlib
import os
import re
from itertools import chain
from typing import Literal, Optional

from django.conf import settings
from pydantic import BaseModel, HttpUrl
import requests
from typing_extensions import TypedDict
import ujson

from .utils import UA, camel_to_snake


MAX_ANIME_SUGGESTIONS = 10

ANIME_WEBSITES = {
    'anidb.net': 'AniDB',
    'anilist.co': 'AniList',
    'kitsu.io': 'Kitsu',
    'myanimelist.net': 'MAL',
}

ANIME_PICTURE_DIR = os.path.join(settings.MEDIA_ROOT, 'ap')


[docs] class Season(TypedDict): year: Optional[int] season: Literal['WINTER', 'SPRING', 'SUMMER', 'FALL', 'UNDEFINED']
[docs] class Anime(BaseModel): model_config = { 'frozen': True, } title: str picture: HttpUrl thumbnail: HttpUrl synonyms: list[str] sources: list[HttpUrl] relations: list[HttpUrl] anime_season: Season type: Literal['MOVIE', 'ONA', 'OVA', 'SPECIAL', 'TV', 'UNKNOWN'] @property def quarter(self) -> str: quarter = { 'WINTER': 'q1', 'SPRING': 'q2', 'SUMMER': 'q3', 'FALL': 'q4', 'UNDEFINED': 'q?', }[self.anime_season['season']] return f'{self.anime_season.get("year"), 0}-{quarter}'
[docs] def cached_picture_filename(self) -> str: if not os.path.isdir(ANIME_PICTURE_DIR): os.makedirs(ANIME_PICTURE_DIR) path = self.picture.path assert path is not None, f"{self.picture} has no path" ext = path.split('/')[-1].split('.')[-1] return f"{hashlib.md5(str(self.picture).encode()).hexdigest()}.{ext}"
[docs] def cached_picture_path(self) -> str: return os.path.join(ANIME_PICTURE_DIR, self.cached_picture_filename())
[docs] def picture_is_cached(self) -> bool: return os.path.exists(self.cached_picture_path())
[docs] def cached_picture_url(self, force_refresh: bool = False) -> str: return f'{settings.MEDIA_URL.rstrip("/")}/ap/{self.cached_picture_filename()}'
[docs] def cache_picture(self) -> None: resp = requests.get(str(self.picture), headers={'User-Agent': UA}) try: resp.raise_for_status() except requests.HTTPError: resp = requests.get(str(self.thumbnail), headers={'User-Agent': UA}) resp.raise_for_status() image_content = resp.content with open(self.cached_picture_path(), 'wb') as image_file: image_file.write(image_content)
[docs] def titles(self) -> list[str]: return sorted(chain(self.synonyms, (self.title,)))
[docs] def inclusion_ranking(self) -> tuple[int, float]: """ Return an estimated representation of how likely this anime is to be something in the library, in the form of a tuple of numbers that you can sort by. Helpful for situations where we have multiple matches for the same name. Lower is more likely. """ return ( ['TV', 'MOVIE', 'OVA', 'ONA', 'SPECIAL', 'UNKNOWN'].index(self.type), 1 / len(self.sources), )
[docs] def urls(self) -> list[tuple[str, HttpUrl]]: return sorted( ( (website, url) for website, url in ( (ANIME_WEBSITES.get(source.host), source) for source in self.sources if source.host is not None ) if website is not None ), key=lambda u: u[0], )
[docs] def related_anime(self) -> list[str]: from .models import Track return [ title for title, anime in sorted( ( (title, anime) for title, anime in ( (anime_title, get_anime(anime_title)) for anime_title in Track.all_anime_titles() ) if anime is not None and any((source in self.relations for source in anime.sources)) ), key=lambda ta: ta[1].quarter, ) ]
[docs] def normalise_title(title: str) -> str: """ Normalise the format of a title to erase distinctions that aren't important to us. """ return re.sub(r'[`‘’‛ߵߴ❛❜]', "'", title)
by_normalised_title: dict[str, Anime] = {} with open( os.path.join( os.path.dirname(__file__), 'data', 'mpaod', 'anime-offline-database.json', ), 'rt', ) as aodf: for d in ujson.load(aodf)['data']: a = Anime( **{ 'sources': [], 'relations': d.pop('relatedAnime'), **{camel_to_snake(k): v for k, v in d.items()}, } ) for title in (normalise_title(title) for title in chain([a.title], a.synonyms)): if (title not in by_normalised_title) or ( by_normalised_title[title].inclusion_ranking() > a.inclusion_ranking() ): by_normalised_title[title] = a
[docs] def get_anime(title: str) -> Optional[Anime]: """ >>> get_anime('Machikado Mazoku').title 'Machikado Mazoku' >>> get_anime('The Demon Girl Next Door').title 'Machikado Mazoku' >>> get_anime('shamiko') """ return by_normalised_title.get(normalise_title(title))
[docs] def fuzzy_nkdsu_aliases() -> dict[str, str]: """ Return a dict of ``{alias: nkdsu_title}`` where ``nkdsu_title`` is an anime included in the nkd.su database, and ``alias`` is a lowercased alternative title for the ``nkdsu_title`` it points to. >>> from pprint import pprint >>> from nkdsu.apps.vote.models import Track >>> from django.utils.timezone import now >>> defaults = dict( ... id3_artist='someone', hidden=False, inudesu=False, added=now(), revealed=now() ... ) With some anime titles that don't have very many synonyms: >>> Track.objects.create(**defaults, id='1', id3_title='song (Eiji OP1)') <Track: ‘song’ (Eiji OP1) - someone> >>> Track.objects.create(**defaults, id='2', id3_title='ditty (◯ ED1)') <Track: ‘ditty’ (◯ ED1) - someone> >>> pprint(fuzzy_nkdsu_aliases()) {'"eiji"': 'Eiji', 'circle': '◯', 'eiji': 'Eiji', 'o (sawako kabuki)': '◯', '°': '◯', '○': '◯', '◯': '◯', '「エイジ」': 'Eiji', 'エイジ': 'Eiji'} """ from .models import Track return { alt_title.lower(): title for anime, title in map(lambda t: (get_anime(t), t), Track.all_anime_titles()) if anime is not None for alt_title in anime.titles() }
[docs] def suggest_anime(query: str) -> set[str]: lower_query = query.lower() suggestions: set[str] = set() for alias, canonical_title in fuzzy_nkdsu_aliases().items(): if lower_query in alias: suggestions.add(canonical_title) if len(suggestions) > MAX_ANIME_SUGGESTIONS: return set() return suggestions