diff --git a/alfred/domain/release/parser/tokens.py b/alfred/domain/release/parser/tokens.py index 8eb3b44..677740c 100644 --- a/alfred/domain/release/parser/tokens.py +++ b/alfred/domain/release/parser/tokens.py @@ -53,6 +53,7 @@ class TokenRole(str, Enum): HDR = "hdr" EDITION = "edition" LANGUAGE = "language" + DISTRIBUTOR = "distributor" # Meta SITE_TAG = "site_tag" diff --git a/alfred/domain/release/ports/knowledge.py b/alfred/domain/release/ports/knowledge.py index 52200bf..ff6982e 100644 --- a/alfred/domain/release/ports/knowledge.py +++ b/alfred/domain/release/ports/knowledge.py @@ -24,6 +24,7 @@ class ReleaseKnowledge(Protocol): resolutions: set[str] sources: set[str] codecs: set[str] + distributors: set[str] language_tokens: set[str] forbidden_chars: set[str] hdr_extra: set[str] diff --git a/alfred/domain/release/value_objects.py b/alfred/domain/release/value_objects.py index 87329aa..b3fa431 100644 --- a/alfred/domain/release/value_objects.py +++ b/alfred/domain/release/value_objects.py @@ -105,6 +105,7 @@ class ParsedRelease: bit_depth: str | None = None # "10bit", "8bit", … hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … + distributor: str | None = None # "NF", "AMZN", "DSNP", … (streaming origin) def __post_init__(self) -> None: if not self.raw: diff --git a/alfred/infrastructure/knowledge/release.py b/alfred/infrastructure/knowledge/release.py index 4ea6375..60623e4 100644 --- a/alfred/infrastructure/knowledge/release.py +++ b/alfred/infrastructure/knowledge/release.py @@ -64,6 +64,15 @@ def load_sources() -> set[str]: return set(_load("sources.yaml").get("sources", [])) +def load_distributors() -> set[str]: + """Streaming distributor tokens (NF, AMZN, DSNP, …). + + Distinct from ``load_sources()`` — distributors are uppercase scene + tags identifying the platform, not the capture origin. + """ + return {t.upper() for t in _load("distributors.yaml").get("distributors", [])} + + def load_codecs() -> set[str]: return set(_load("codecs.yaml").get("codecs", [])) diff --git a/alfred/infrastructure/knowledge/release_kb.py b/alfred/infrastructure/knowledge/release_kb.py index 980004f..c84df71 100644 --- a/alfred/infrastructure/knowledge/release_kb.py +++ b/alfred/infrastructure/knowledge/release_kb.py @@ -20,6 +20,7 @@ from alfred.domain.release.parser.tokens import TokenRole from .release import ( load_audio, load_codecs, + load_distributors, load_editions, load_forbidden_chars, load_group_schemas, @@ -72,6 +73,7 @@ class YamlReleaseKnowledge: self.resolutions: set[str] = load_resolutions() self.sources: set[str] = load_sources() | load_sources_extra() self.codecs: set[str] = load_codecs() + self.distributors: set[str] = load_distributors() self.language_tokens: set[str] = load_language_tokens() self.forbidden_chars: set[str] = load_forbidden_chars() self.hdr_extra: set[str] = load_hdr_extra() diff --git a/alfred/knowledge/release/distributors.yaml b/alfred/knowledge/release/distributors.yaml new file mode 100644 index 0000000..f4203af --- /dev/null +++ b/alfred/knowledge/release/distributors.yaml @@ -0,0 +1,17 @@ +# Known streaming distributor tokens (case-insensitive match). +# +# These tags identify *which platform* the release was sourced from +# (Netflix, Amazon, Disney+, …). Distinct from ``sources.yaml`` which +# captures the encoding origin (WEB-DL, BluRay, …). A typical release +# carries both: ``Show.S01E01.1080p.NF.WEB-DL.x264-GROUP`` → +# source=WEB-DL, distributor=NF. +distributors: + - NF # Netflix + - AMZN # Amazon Prime Video + - DSNP # Disney+ + - HMAX # HBO Max + - ATVP # Apple TV+ + - HULU # Hulu + - PCOK # Peacock + - PMTP # Paramount+ + - CR # Crunchyroll diff --git a/alfred/knowledge/release/sources.yaml b/alfred/knowledge/release/sources.yaml index 3c7b8eb..3daed04 100644 --- a/alfred/knowledge/release/sources.yaml +++ b/alfred/knowledge/release/sources.yaml @@ -1,4 +1,9 @@ -# Known release source tokens (case-insensitive match) +# Known release source tokens (case-insensitive match). +# +# "Source" here means the capture/encoding origin (disc, broadcast, web +# stream) — NOT the streaming distributor (Netflix, Disney+, …). Those +# live in ``distributors.yaml`` because they're a separate dimension: +# a release is typically "WEB-DL from NF" — both should be captured. sources: - bluray - blu-ray @@ -14,8 +19,3 @@ sources: - dvdrip - dvd - vodrip - - amzn - - nf - - dsnp - - hmax - - atvp diff --git a/tests/fixtures/releases/shitty/notre_planete_lowercase_e/expected.yaml b/tests/fixtures/releases/shitty/notre_planete_lowercase_e/expected.yaml index e54ecfe..f902b08 100644 --- a/tests/fixtures/releases/shitty/notre_planete_lowercase_e/expected.yaml +++ b/tests/fixtures/releases/shitty/notre_planete_lowercase_e/expected.yaml @@ -1,7 +1,8 @@ release_name: "Notre.planete.s01e01.1080p.NF.WEB-DL.DDP5.1.x264-NTb" # Lowercase 's01e01' and lowercased title word ('planete') correctly parsed. -# NF (Netflix) source tag is not in the source KB — drops; WEB-DL wins. +# NF is the Netflix streaming distributor (separate dimension from source); +# WEB-DL is the encoding source. parsed: title: "Notre.planete" year: null @@ -11,6 +12,7 @@ parsed: source: "WEB-DL" codec: "x264" group: "NTb" + distributor: "NF" tech_string: "1080p.WEB-DL.x264" media_type: "tv_show" parse_path: "direct"