feat(release): distinguish streaming distributors from sources

Introduce a separate dimension for streaming-platform tags (NF, AMZN,
DSNP, HMAX, ATVP, …) so they stop polluting the encoding-source field.
WEB-DL is the source; the platform that released it is the distributor.

- new distributors.yaml knowledge file
- ReleaseKnowledge port exposes distributors set
- TokenRole.DISTRIBUTOR + ParsedRelease.distributor field
- removed NF/AMZN/DSNP/HMAX/ATVP from sources.yaml
- notre_planete fixture now records distributor: NF
This commit is contained in:
2026-05-20 01:03:11 +02:00
parent 7dc7f0c241
commit fd3bd1ad8c
8 changed files with 40 additions and 7 deletions
+1
View File
@@ -53,6 +53,7 @@ class TokenRole(str, Enum):
HDR = "hdr" HDR = "hdr"
EDITION = "edition" EDITION = "edition"
LANGUAGE = "language" LANGUAGE = "language"
DISTRIBUTOR = "distributor"
# Meta # Meta
SITE_TAG = "site_tag" SITE_TAG = "site_tag"
+1
View File
@@ -24,6 +24,7 @@ class ReleaseKnowledge(Protocol):
resolutions: set[str] resolutions: set[str]
sources: set[str] sources: set[str]
codecs: set[str] codecs: set[str]
distributors: set[str]
language_tokens: set[str] language_tokens: set[str]
forbidden_chars: set[str] forbidden_chars: set[str]
hdr_extra: set[str] hdr_extra: set[str]
+1
View File
@@ -105,6 +105,7 @@ class ParsedRelease:
bit_depth: str | None = None # "10bit", "8bit", … bit_depth: str | None = None # "10bit", "8bit", …
hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", … hdr_format: str | None = None # "DV", "HDR10", "DV.HDR10", …
edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", … edition: str | None = None # "UNRATED", "EXTENDED", "DIRECTORS.CUT", …
distributor: str | None = None # "NF", "AMZN", "DSNP", … (streaming origin)
def __post_init__(self) -> None: def __post_init__(self) -> None:
if not self.raw: if not self.raw:
@@ -64,6 +64,15 @@ def load_sources() -> set[str]:
return set(_load("sources.yaml").get("sources", [])) return set(_load("sources.yaml").get("sources", []))
def load_distributors() -> set[str]:
"""Streaming distributor tokens (NF, AMZN, DSNP, …).
Distinct from ``load_sources()`` — distributors are uppercase scene
tags identifying the platform, not the capture origin.
"""
return {t.upper() for t in _load("distributors.yaml").get("distributors", [])}
def load_codecs() -> set[str]: def load_codecs() -> set[str]:
return set(_load("codecs.yaml").get("codecs", [])) return set(_load("codecs.yaml").get("codecs", []))
@@ -20,6 +20,7 @@ from alfred.domain.release.parser.tokens import TokenRole
from .release import ( from .release import (
load_audio, load_audio,
load_codecs, load_codecs,
load_distributors,
load_editions, load_editions,
load_forbidden_chars, load_forbidden_chars,
load_group_schemas, load_group_schemas,
@@ -72,6 +73,7 @@ class YamlReleaseKnowledge:
self.resolutions: set[str] = load_resolutions() self.resolutions: set[str] = load_resolutions()
self.sources: set[str] = load_sources() | load_sources_extra() self.sources: set[str] = load_sources() | load_sources_extra()
self.codecs: set[str] = load_codecs() self.codecs: set[str] = load_codecs()
self.distributors: set[str] = load_distributors()
self.language_tokens: set[str] = load_language_tokens() self.language_tokens: set[str] = load_language_tokens()
self.forbidden_chars: set[str] = load_forbidden_chars() self.forbidden_chars: set[str] = load_forbidden_chars()
self.hdr_extra: set[str] = load_hdr_extra() self.hdr_extra: set[str] = load_hdr_extra()
@@ -0,0 +1,17 @@
# Known streaming distributor tokens (case-insensitive match).
#
# These tags identify *which platform* the release was sourced from
# (Netflix, Amazon, Disney+, …). Distinct from ``sources.yaml`` which
# captures the encoding origin (WEB-DL, BluRay, …). A typical release
# carries both: ``Show.S01E01.1080p.NF.WEB-DL.x264-GROUP`` →
# source=WEB-DL, distributor=NF.
distributors:
- NF # Netflix
- AMZN # Amazon Prime Video
- DSNP # Disney+
- HMAX # HBO Max
- ATVP # Apple TV+
- HULU # Hulu
- PCOK # Peacock
- PMTP # Paramount+
- CR # Crunchyroll
+6 -6
View File
@@ -1,4 +1,9 @@
# Known release source tokens (case-insensitive match) # Known release source tokens (case-insensitive match).
#
# "Source" here means the capture/encoding origin (disc, broadcast, web
# stream) — NOT the streaming distributor (Netflix, Disney+, …). Those
# live in ``distributors.yaml`` because they're a separate dimension:
# a release is typically "WEB-DL from NF" — both should be captured.
sources: sources:
- bluray - bluray
- blu-ray - blu-ray
@@ -14,8 +19,3 @@ sources:
- dvdrip - dvdrip
- dvd - dvd
- vodrip - vodrip
- amzn
- nf
- dsnp
- hmax
- atvp
@@ -1,7 +1,8 @@
release_name: "Notre.planete.s01e01.1080p.NF.WEB-DL.DDP5.1.x264-NTb" release_name: "Notre.planete.s01e01.1080p.NF.WEB-DL.DDP5.1.x264-NTb"
# Lowercase 's01e01' and lowercased title word ('planete') correctly parsed. # Lowercase 's01e01' and lowercased title word ('planete') correctly parsed.
# NF (Netflix) source tag is not in the source KB — drops; WEB-DL wins. # NF is the Netflix streaming distributor (separate dimension from source);
# WEB-DL is the encoding source.
parsed: parsed:
title: "Notre.planete" title: "Notre.planete"
year: null year: null
@@ -11,6 +12,7 @@ parsed:
source: "WEB-DL" source: "WEB-DL"
codec: "x264" codec: "x264"
group: "NTb" group: "NTb"
distributor: "NF"
tech_string: "1080p.WEB-DL.x264" tech_string: "1080p.WEB-DL.x264"
media_type: "tv_show" media_type: "tv_show"
parse_path: "direct" parse_path: "direct"