e07c9ec77b
Several weeks of work accumulated without being committed. Grouped here for clarity; see CHANGELOG.md [Unreleased] for the user-facing summary. Highlights ---------- P1 #2 — ISO 639-2/B canonical migration - New Language VO + LanguageRegistry (alfred/domain/shared/knowledge/). - iso_languages.yaml as single source of truth for language codes. - SubtitleKnowledgeBase now delegates lookup to LanguageRegistry; subtitles.yaml only declares subtitle-specific tokens (vostfr, vf, vff, …). - SubtitlePreferences default → ["fre", "eng"]; subtitle filenames written as {iso639_2b}.srt (legacy fr.srt still read via alias). - Scanner: dropped _LANG_KEYWORDS / _SDH_TOKENS / _FORCED_TOKENS / SUBTITLE_EXTENSIONS hardcoded dicts. - Fixed: 'hi' token no longer marks SDH (conflicted with Hindi alias). - Added settings.min_movie_size_bytes (was a module constant). P1 #3 — Release parser unification + data-driven tokenizer - parse_release() is now the single source of truth for release-name parsing. - alfred/knowledge/release/separators.yaml declares the token separators used by the tokenizer (., space, [, ], (, ), _). New conventions can be added without code changes. - Tokenizer now splits on any configured separator instead of name.split('.'). Releases like 'The Father (2020) [1080p] [WEBRip] [5.1] [YTS.MX]' parse via the direct path without sanitization fallback. - Site-tag extraction always runs first; well-formedness only rejects truly forbidden chars. - _parse_season_episode() extended with NxNN / NxNNxNN alt forms. - Removed dead helpers: _sanitize, _normalize. Domain cleanup - Deleted fossil services with zero production callers: alfred/domain/movies/services.py alfred/domain/tv_shows/services.py alfred/domain/subtitles/services.py (replaced by subtitles/services/ package) alfred/domain/subtitles/repositories.py - Split monolithic subtitle services into a package (identifier, matcher, placer, pattern_detector, utils) + dedicated knowledge/ package. - MediaInfo split into dedicated package (alfred/domain/shared/media/: audio, video, subtitle, info, matching). Persistence cleanup - Removed dead JSON repositories (movie/subtitle/tvshow_repository.py). Tests - Major expansion of the test suite organized to mirror the source tree. - Removed obsolete *_edge_cases test files superseded by structured tests. - Suite: 990 passed, 8 skipped. Misc - .gitignore: exclude env_backup/ and *.bak. - Adjustments across agent/llm, app.py, application/filesystem, and infrastructure/filesystem to align with the new domain layout.
282 lines
11 KiB
Python
282 lines
11 KiB
Python
"""Tests for ``alfred.infrastructure.metadata.store.MetadataStore``.
|
|
|
|
The store manages ``<release_root>/.alfred/metadata.yaml`` — a per-release
|
|
sidecar with parse, probe, TMDB, pattern, and subtitle-history sections.
|
|
|
|
Coverage:
|
|
|
|
- ``TestIdentityAndExists`` — accessors + ``exists()``.
|
|
- ``TestLoad`` — empty/missing/corrupt YAML returns ``{}``.
|
|
- ``TestSave`` — atomic write creates ``.alfred/`` + temp file is gone.
|
|
- ``TestUpdateSection`` — replaces the section + adds ``_updated_at``.
|
|
- ``TestUpdateParse/Probe/Tmdb`` — strips ``status`` from payload;
|
|
TMDB promotes ``imdb_id`` / ``tmdb_id`` / ``media_type`` / ``title``
|
|
to the top level.
|
|
- ``TestPattern`` — ``confirmed_pattern`` returns the id only when flag
|
|
is set; ``mark_pattern_confirmed`` preserves pre-existing keys.
|
|
- ``TestSubtitleHistory`` — append + release-group dedup.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import yaml
|
|
|
|
from alfred.infrastructure.metadata.store import MetadataStore
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Identity / exists #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestIdentityAndExists:
|
|
def test_paths(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
assert s.release_root == tmp_path
|
|
assert s.metadata_path == tmp_path / ".alfred" / "metadata.yaml"
|
|
|
|
def test_exists_false_initially(self, tmp_path):
|
|
assert MetadataStore(tmp_path).exists() is False
|
|
|
|
def test_exists_after_save(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"a": 1})
|
|
assert s.exists() is True
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Load #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestLoad:
|
|
def test_missing_file_returns_empty(self, tmp_path):
|
|
assert MetadataStore(tmp_path).load() == {}
|
|
|
|
def test_empty_yaml_returns_empty(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
(tmp_path / ".alfred").mkdir()
|
|
(tmp_path / ".alfred" / "metadata.yaml").write_text("")
|
|
assert s.load() == {}
|
|
|
|
def test_corrupt_yaml_returns_empty(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
(tmp_path / ".alfred").mkdir()
|
|
(tmp_path / ".alfred" / "metadata.yaml").write_text("not: : valid: yaml: [")
|
|
# Logged warning, but never raises.
|
|
assert s.load() == {}
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Save #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestSave:
|
|
def test_creates_alfred_dir(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"a": 1})
|
|
assert (tmp_path / ".alfred").is_dir()
|
|
assert (tmp_path / ".alfred" / "metadata.yaml").is_file()
|
|
|
|
def test_yaml_roundtrip(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
data = {"a": 1, "b": ["x", "y"], "c": {"nested": True}}
|
|
s.save(data)
|
|
loaded = yaml.safe_load((tmp_path / ".alfred" / "metadata.yaml").read_text())
|
|
assert loaded == data
|
|
# And via the store API.
|
|
assert s.load() == data
|
|
|
|
def test_temp_file_cleaned_up(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"a": 1})
|
|
# No stale .tmp left around.
|
|
assert not (tmp_path / ".alfred" / "metadata.yaml.tmp").exists()
|
|
|
|
def test_unicode_preserved(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"title": "Amélie"})
|
|
assert s.load() == {"title": "Amélie"}
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# update_section #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestUpdateSection:
|
|
def test_adds_section_with_timestamp(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_section("parse", {"title": "X"})
|
|
data = s.load()
|
|
assert data["parse"]["title"] == "X"
|
|
assert "_updated_at" in data["parse"]
|
|
# ISO-8601 with TZ offset
|
|
assert "T" in data["parse"]["_updated_at"]
|
|
|
|
def test_section_replaced_wholesale(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_section("parse", {"a": 1, "b": 2})
|
|
s.update_section("parse", {"c": 3})
|
|
data = s.load()
|
|
assert "a" not in data["parse"]
|
|
assert data["parse"]["c"] == 3
|
|
|
|
def test_preserves_other_sections(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_section("parse", {"a": 1})
|
|
s.update_section("probe", {"b": 2})
|
|
data = s.load()
|
|
assert data["parse"]["a"] == 1
|
|
assert data["probe"]["b"] == 2
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# update_parse / update_probe #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestUpdateParseAndProbe:
|
|
def test_update_parse_strips_status(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_parse({"status": "ok", "title": "X", "year": 2020})
|
|
data = s.load()
|
|
assert "status" not in data["parse"]
|
|
assert data["parse"]["title"] == "X"
|
|
assert data["parse"]["year"] == 2020
|
|
|
|
def test_update_probe_strips_status(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_probe({"status": "ok", "resolution": "1080p"})
|
|
assert s.load()["probe"]["resolution"] == "1080p"
|
|
assert "status" not in s.load()["probe"]
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# update_tmdb #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestUpdateTmdb:
|
|
def test_promotes_identity_to_top_level(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_tmdb({
|
|
"status": "ok",
|
|
"imdb_id": "tt1375666",
|
|
"tmdb_id": 27205,
|
|
"media_type": "movie",
|
|
"title": "Inception",
|
|
})
|
|
data = s.load()
|
|
assert data["imdb_id"] == "tt1375666"
|
|
assert data["tmdb_id"] == 27205
|
|
assert data["media_type"] == "movie"
|
|
assert data["title"] == "Inception"
|
|
# And the full block is still under tmdb
|
|
assert data["tmdb"]["imdb_id"] == "tt1375666"
|
|
|
|
def test_does_not_overwrite_existing_title(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
# Pre-existing title (e.g. from earlier confirmation).
|
|
s.save({"title": "Old Title"})
|
|
s.update_tmdb({"title": "New Title", "imdb_id": "tt1"})
|
|
data = s.load()
|
|
# setdefault means the existing title wins.
|
|
assert data["title"] == "Old Title"
|
|
assert data["imdb_id"] == "tt1"
|
|
|
|
def test_none_values_not_promoted(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.update_tmdb({"imdb_id": None, "tmdb_id": 27205, "media_type": None})
|
|
data = s.load()
|
|
assert "imdb_id" not in data
|
|
assert data["tmdb_id"] == 27205
|
|
assert "media_type" not in data
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Pattern #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestPattern:
|
|
def test_confirmed_pattern_empty_when_missing(self, tmp_path):
|
|
assert MetadataStore(tmp_path).confirmed_pattern() is None
|
|
|
|
def test_confirmed_pattern_only_when_flag_true(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"detected_pattern": "adjacent", "pattern_confirmed": False})
|
|
assert s.confirmed_pattern() is None
|
|
s.save({"detected_pattern": "adjacent", "pattern_confirmed": True})
|
|
assert s.confirmed_pattern() == "adjacent"
|
|
|
|
def test_mark_pattern_confirmed_sets_flag(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.mark_pattern_confirmed("subs_flat")
|
|
data = s.load()
|
|
assert data["detected_pattern"] == "subs_flat"
|
|
assert data["pattern_confirmed"] is True
|
|
|
|
def test_mark_pattern_preserves_media_info(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.mark_pattern_confirmed(
|
|
"adjacent",
|
|
media_info={
|
|
"media_type": "movie",
|
|
"imdb_id": "tt1",
|
|
"title": "Foo",
|
|
},
|
|
)
|
|
data = s.load()
|
|
assert data["media_type"] == "movie"
|
|
assert data["imdb_id"] == "tt1"
|
|
assert data["title"] == "Foo"
|
|
|
|
def test_mark_pattern_does_not_overwrite_existing_identity(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.save({"title": "Existing", "imdb_id": "tt_old"})
|
|
s.mark_pattern_confirmed(
|
|
"adjacent",
|
|
media_info={"imdb_id": "tt_new", "title": "New"},
|
|
)
|
|
data = s.load()
|
|
# setdefault on existing keys → old values win.
|
|
assert data["title"] == "Existing"
|
|
assert data["imdb_id"] == "tt_old"
|
|
|
|
|
|
# --------------------------------------------------------------------------- #
|
|
# Subtitle history #
|
|
# --------------------------------------------------------------------------- #
|
|
|
|
|
|
class TestSubtitleHistory:
|
|
def test_initially_empty(self, tmp_path):
|
|
assert MetadataStore(tmp_path).subtitle_history() == []
|
|
|
|
def test_append_one(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.append_subtitle_history_entry({"tracks": 2, "release_group": "GRP"})
|
|
hist = s.subtitle_history()
|
|
assert len(hist) == 1
|
|
assert hist[0]["tracks"] == 2
|
|
|
|
def test_release_group_recorded_once(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.append_subtitle_history_entry({"release_group": "GRP"})
|
|
s.append_subtitle_history_entry({"release_group": "GRP"})
|
|
s.append_subtitle_history_entry({"release_group": "OTHER"})
|
|
groups = s.load()["release_groups"]
|
|
assert groups == ["GRP", "OTHER"]
|
|
|
|
def test_no_release_group_does_not_create_groups_list(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
s.append_subtitle_history_entry({"tracks": 0})
|
|
assert "release_groups" not in s.load()
|
|
|
|
def test_multiple_entries_preserved_in_order(self, tmp_path):
|
|
s = MetadataStore(tmp_path)
|
|
for i in range(3):
|
|
s.append_subtitle_history_entry({"i": i})
|
|
assert [e["i"] for e in s.subtitle_history()] == [0, 1, 2]
|