chore: sprint cleanup — language unification, parser unification, fossils removal
Several weeks of work accumulated without being committed. Grouped here for clarity; see CHANGELOG.md [Unreleased] for the user-facing summary. Highlights ---------- P1 #2 — ISO 639-2/B canonical migration - New Language VO + LanguageRegistry (alfred/domain/shared/knowledge/). - iso_languages.yaml as single source of truth for language codes. - SubtitleKnowledgeBase now delegates lookup to LanguageRegistry; subtitles.yaml only declares subtitle-specific tokens (vostfr, vf, vff, …). - SubtitlePreferences default → ["fre", "eng"]; subtitle filenames written as {iso639_2b}.srt (legacy fr.srt still read via alias). - Scanner: dropped _LANG_KEYWORDS / _SDH_TOKENS / _FORCED_TOKENS / SUBTITLE_EXTENSIONS hardcoded dicts. - Fixed: 'hi' token no longer marks SDH (conflicted with Hindi alias). - Added settings.min_movie_size_bytes (was a module constant). P1 #3 — Release parser unification + data-driven tokenizer - parse_release() is now the single source of truth for release-name parsing. - alfred/knowledge/release/separators.yaml declares the token separators used by the tokenizer (., space, [, ], (, ), _). New conventions can be added without code changes. - Tokenizer now splits on any configured separator instead of name.split('.'). Releases like 'The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]' parse via the direct path without sanitization fallback. - Site-tag extraction always runs first; well-formedness only rejects truly forbidden chars. - _parse_season_episode() extended with NxNN / NxNNxNN alt forms. - Removed dead helpers: _sanitize, _normalize. Domain cleanup - Deleted fossil services with zero production callers: alfred/domain/movies/services.py alfred/domain/tv_shows/services.py alfred/domain/subtitles/services.py (replaced by subtitles/services/ package) alfred/domain/subtitles/repositories.py - Split monolithic subtitle services into a package (identifier, matcher, placer, pattern_detector, utils) + dedicated knowledge/ package. - MediaInfo split into dedicated package (alfred/domain/shared/media/: audio, video, subtitle, info, matching). Persistence cleanup - Removed dead JSON repositories (movie/subtitle/tvshow_repository.py). Tests - Major expansion of the test suite organized to mirror the source tree. - Removed obsolete *_edge_cases test files superseded by structured tests. - Suite: 990 passed, 8 skipped. Misc - .gitignore: exclude env_backup/ and *.bak. - Adjustments across agent/llm, app.py, application/filesystem, and infrastructure/filesystem to align with the new domain layout.
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
"""Tests for ``alfred.infrastructure.subtitle.rule_repository.RuleSetRepository``.
|
||||
|
||||
Loads/saves the SubtitleRuleSet inheritance chain from ``.alfred/`` YAML.
|
||||
|
||||
Coverage:
|
||||
|
||||
- ``TestLoad`` — no files → ``global_default``; rules.yaml override applied
|
||||
on top; release_groups/{NAME}.yaml override applied;
|
||||
SubtitlePreferences seeds the base when provided; full 3-level chain.
|
||||
- ``TestFilterOverride`` — unknown keys discarded.
|
||||
- ``TestSaveLocal`` — atomic write, merges with existing, creates .alfred/.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from alfred.infrastructure.persistence.memory.ltm.components.subtitle_preferences import (
|
||||
SubtitlePreferences,
|
||||
)
|
||||
from alfred.infrastructure.subtitle.rule_repository import (
|
||||
RuleSetRepository,
|
||||
_filter_override,
|
||||
)
|
||||
|
||||
|
||||
def _write(path: Path, data: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(yaml.safe_dump(data), encoding="utf-8")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# _filter_override #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestFilterOverride:
|
||||
def test_keeps_only_valid_keys(self):
|
||||
out = _filter_override({
|
||||
"languages": ["fra"],
|
||||
"formats": ["srt"],
|
||||
"types": ["standard"],
|
||||
"format_priority": ["srt"],
|
||||
"min_confidence": 0.8,
|
||||
"unknown_key": "ignored",
|
||||
"another": 42,
|
||||
})
|
||||
assert set(out) == {
|
||||
"languages", "formats", "types", "format_priority", "min_confidence"
|
||||
}
|
||||
assert "unknown_key" not in out
|
||||
|
||||
def test_empty(self):
|
||||
assert _filter_override({}) == {}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# load #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestLoad:
|
||||
def test_no_files_returns_global_default(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rs = repo.load()
|
||||
# Should resolve cleanly using the hardcoded defaults.
|
||||
rules = rs.resolve()
|
||||
assert rules.preferred_languages # non-empty
|
||||
assert rules.min_confidence > 0
|
||||
|
||||
def test_subtitle_preferences_override_base(self, tmp_path):
|
||||
prefs = SubtitlePreferences(
|
||||
languages=["jpn"], formats=["ass"], types=["standard"]
|
||||
)
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rules = repo.load(subtitle_preferences=prefs).resolve()
|
||||
assert rules.preferred_languages == ["jpn"]
|
||||
assert rules.preferred_formats == ["ass"]
|
||||
assert rules.allowed_types == ["standard"]
|
||||
|
||||
def test_local_rules_yaml_applied(self, tmp_path):
|
||||
_write(
|
||||
tmp_path / ".alfred" / "rules.yaml",
|
||||
{"override": {"languages": ["spa"], "min_confidence": 0.95}},
|
||||
)
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rules = repo.load().resolve()
|
||||
assert rules.preferred_languages == ["spa"]
|
||||
assert rules.min_confidence == 0.95
|
||||
|
||||
def test_release_group_override_applied(self, tmp_path):
|
||||
_write(
|
||||
tmp_path / ".alfred" / "release_groups" / "KONTRAST.yaml",
|
||||
{"override": {"format_priority": ["ass", "srt"]}},
|
||||
)
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rules = repo.load(release_group="KONTRAST").resolve()
|
||||
assert rules.format_priority == ["ass", "srt"]
|
||||
|
||||
def test_full_three_level_chain(self, tmp_path):
|
||||
# Base: prefs sets languages=["jpn"]
|
||||
prefs = SubtitlePreferences(languages=["jpn"])
|
||||
# Group: overrides format_priority
|
||||
_write(
|
||||
tmp_path / ".alfred" / "release_groups" / "GRP.yaml",
|
||||
{"override": {"format_priority": ["ass"]}},
|
||||
)
|
||||
# Local: overrides min_confidence
|
||||
_write(
|
||||
tmp_path / ".alfred" / "rules.yaml",
|
||||
{"override": {"min_confidence": 0.99}},
|
||||
)
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rules = repo.load(
|
||||
release_group="GRP", subtitle_preferences=prefs
|
||||
).resolve()
|
||||
# All three levels visible — local overrides on top
|
||||
assert rules.preferred_languages == ["jpn"]
|
||||
assert rules.format_priority == ["ass"]
|
||||
assert rules.min_confidence == 0.99
|
||||
|
||||
def test_release_group_yaml_without_override_section_ignored(self, tmp_path):
|
||||
_write(
|
||||
tmp_path / ".alfred" / "release_groups" / "GRP.yaml",
|
||||
{"name": "GRP"}, # no 'override' key
|
||||
)
|
||||
# Must not crash and must not introduce an intermediate node.
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rs = repo.load(release_group="GRP")
|
||||
# No extra rule set was created → it's still the global default.
|
||||
assert rs.scope.level == "global"
|
||||
|
||||
def test_missing_release_group_file_silently_ignored(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
rs = repo.load(release_group="DOES_NOT_EXIST")
|
||||
assert rs.scope.level == "global"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# save_local #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
|
||||
class TestSaveLocal:
|
||||
def test_creates_file(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
repo.save_local({"languages": ["spa"]})
|
||||
path = tmp_path / ".alfred" / "rules.yaml"
|
||||
assert path.is_file()
|
||||
loaded = yaml.safe_load(path.read_text())
|
||||
assert loaded == {"override": {"languages": ["spa"]}}
|
||||
|
||||
def test_merges_with_existing(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
repo.save_local({"languages": ["spa"]})
|
||||
repo.save_local({"min_confidence": 0.8})
|
||||
loaded = yaml.safe_load((tmp_path / ".alfred" / "rules.yaml").read_text())
|
||||
assert loaded["override"]["languages"] == ["spa"]
|
||||
assert loaded["override"]["min_confidence"] == 0.8
|
||||
|
||||
def test_overwrites_existing_key(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
repo.save_local({"languages": ["spa"]})
|
||||
repo.save_local({"languages": ["jpn"]})
|
||||
loaded = yaml.safe_load((tmp_path / ".alfred" / "rules.yaml").read_text())
|
||||
assert loaded["override"]["languages"] == ["jpn"]
|
||||
|
||||
def test_temp_file_cleaned_up(self, tmp_path):
|
||||
repo = RuleSetRepository(tmp_path)
|
||||
repo.save_local({"languages": ["spa"]})
|
||||
# No stale .tmp file
|
||||
assert not (tmp_path / ".alfred" / "rules.yaml.tmp").exists()
|
||||
Reference in New Issue
Block a user