feat: major architectural refactor

- Refactor memory system (episodic/STM/LTM with components)
- Implement complete subtitle domain (scanner, matcher, placer)
- Add YAML workflow infrastructure
- Externalize knowledge base (patterns, release groups)
- Add comprehensive testing suite
- Create manual testing CLIs
This commit is contained in:
2026-05-11 21:33:37 +02:00
parent 62b5d0b998
commit 249c5de76a
103 changed files with 8559 additions and 1346 deletions
+217
View File
@@ -0,0 +1,217 @@
"""Tests for SubtitleScanner and _classify helper."""
import pytest
from pathlib import Path
from alfred.domain.subtitles.scanner import (
SubtitleCandidate,
SubtitleScanner,
_classify,
)
# ---------------------------------------------------------------------------
# _classify — unit tests for the filename parser
# ---------------------------------------------------------------------------
class TestClassify:
def test_iso_lang_code(self, tmp_path):
p = tmp_path / "fr.srt"
p.write_text("")
lang, is_sdh, is_forced = _classify(p)
assert lang == "fr"
assert not is_sdh
assert not is_forced
def test_english_keyword(self, tmp_path):
p = tmp_path / "english.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "en"
def test_french_keyword(self, tmp_path):
p = tmp_path / "Show.S01E01.French.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "fr"
def test_vostfr_is_french(self, tmp_path):
p = tmp_path / "Show.S01E01.VOSTFR.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang == "fr"
def test_sdh_token(self, tmp_path):
p = tmp_path / "fr.sdh.srt"
p.write_text("")
lang, is_sdh, _ = _classify(p)
assert lang == "fr"
assert is_sdh
def test_hi_alias_for_sdh(self, tmp_path):
p = tmp_path / "en.hi.srt"
p.write_text("")
_, is_sdh, _ = _classify(p)
assert is_sdh
def test_forced_token(self, tmp_path):
p = tmp_path / "fr.forced.srt"
p.write_text("")
_, _, is_forced = _classify(p)
assert is_forced
def test_unknown_language_returns_none(self, tmp_path):
p = tmp_path / "Show.S01E01.720p.srt"
p.write_text("")
lang, _, _ = _classify(p)
assert lang is None
def test_dot_separator(self, tmp_path):
p = tmp_path / "fr.sdh.srt"
p.write_text("")
lang, is_sdh, _ = _classify(p)
assert lang == "fr"
assert is_sdh
def test_hyphen_separator(self, tmp_path):
p = tmp_path / "fr-forced.srt"
p.write_text("")
lang, _, is_forced = _classify(p)
assert lang == "fr"
assert is_forced
# ---------------------------------------------------------------------------
# SubtitleCandidate.destination_name
# ---------------------------------------------------------------------------
class TestSubtitleCandidateDestinationName:
def _make(self, lang="fr", is_sdh=False, is_forced=False, ext=".srt", path=None):
return SubtitleCandidate(
source_path=path or Path("/fake/fr.srt"),
language=lang,
is_sdh=is_sdh,
is_forced=is_forced,
extension=ext,
)
def test_standard(self):
assert self._make().destination_name == "fr.srt"
def test_sdh(self):
assert self._make(is_sdh=True).destination_name == "fr.sdh.srt"
def test_forced(self):
assert self._make(is_forced=True).destination_name == "fr.forced.srt"
def test_ass_extension(self):
assert self._make(ext=".ass").destination_name == "fr.ass"
def test_english_standard(self):
assert self._make(lang="en").destination_name == "en.srt"
# ---------------------------------------------------------------------------
# SubtitleScanner — integration with real filesystem
# ---------------------------------------------------------------------------
class TestSubtitleScanner:
def _scanner(self, languages=None, min_size_kb=0, keep_sdh=True, keep_forced=True):
return SubtitleScanner(
languages=languages or ["fr", "en"],
min_size_kb=min_size_kb,
keep_sdh=keep_sdh,
keep_forced=keep_forced,
)
def _video(self, tmp_path):
video = tmp_path / "Movie.mkv"
video.write_bytes(b"video")
return video
def test_finds_adjacent_subtitle(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.srt").write_text("subtitle content")
candidates = self._scanner().scan(video)
assert len(candidates) == 1
assert candidates[0].language == "fr"
def test_finds_multiple_languages(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.srt").write_text("fr subtitle")
(tmp_path / "en.srt").write_text("en subtitle")
candidates = self._scanner().scan(video)
langs = {c.language for c in candidates}
assert langs == {"fr", "en"}
def test_scans_subs_subfolder(self, tmp_path):
video = self._video(tmp_path)
subs = tmp_path / "Subs"
subs.mkdir()
(subs / "fr.srt").write_text("subtitle")
candidates = self._scanner().scan(video)
assert any(c.language == "fr" for c in candidates)
def test_filters_unknown_language(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "unknown.srt").write_text("subtitle")
candidates = self._scanner().scan(video)
assert len(candidates) == 0
def test_filters_wrong_language(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "de.srt").write_text("german subtitle")
candidates = self._scanner(languages=["fr"]).scan(video)
assert len(candidates) == 0
def test_filters_too_small_file(self, tmp_path):
video = self._video(tmp_path)
small = tmp_path / "fr.srt"
small.write_bytes(b"x") # 1 byte, well below any min_size_kb
candidates = self._scanner(min_size_kb=10).scan(video)
assert len(candidates) == 0
def test_filters_sdh_when_not_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.sdh.srt").write_text("sdh subtitle")
candidates = self._scanner(keep_sdh=False).scan(video)
assert len(candidates) == 0
def test_filters_forced_when_not_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.forced.srt").write_text("forced subtitle")
candidates = self._scanner(keep_forced=False).scan(video)
assert len(candidates) == 0
def test_keeps_sdh_when_wanted(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.sdh.srt").write_text("sdh subtitle")
candidates = self._scanner(keep_sdh=True).scan(video)
assert len(candidates) == 1
assert candidates[0].is_sdh
def test_ignores_non_subtitle_files(self, tmp_path):
video = self._video(tmp_path)
(tmp_path / "fr.nfo").write_text("nfo file")
(tmp_path / "fr.jpg").write_bytes(b"image")
candidates = self._scanner().scan(video)
assert len(candidates) == 0
def test_returns_empty_when_no_subtitles(self, tmp_path):
video = self._video(tmp_path)
candidates = self._scanner().scan(video)
assert candidates == []