diff --git a/alfred/domain/shared/value_objects.py b/alfred/domain/shared/value_objects.py index 30c74ee..b468a2e 100644 --- a/alfred/domain/shared/value_objects.py +++ b/alfred/domain/shared/value_objects.py @@ -1,5 +1,7 @@ """Shared value objects used across multiple domains.""" +from __future__ import annotations + import re from dataclasses import dataclass from pathlib import Path @@ -142,19 +144,49 @@ class Language: raise ValidationError( f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}" ) - # Normalize iso to lowercase - object.__setattr__(self, "iso", self.iso.lower()) - # Normalize aliases to a tuple of lowercase strings (dedup, preserve order) + if self.iso != self.iso.lower(): + raise ValidationError( + f"Language.iso must be lowercase, got {self.iso!r} — " + f"use Language.from_raw() to construct from arbitrary input" + ) + for alias in self.aliases: + if not isinstance(alias, str) or alias != alias.lower().strip() or not alias: + raise ValidationError( + f"Language.aliases must be lowercase non-empty strings, " + f"got {alias!r} — use Language.from_raw() to normalize" + ) + + @classmethod + def from_raw( + cls, + iso: str, + english_name: str, + native_name: str, + aliases: tuple[str, ...] | list[str] = (), + ) -> Language: + """ + Construct a Language from arbitrary (possibly un-normalized) input. + + Use this factory when loading from external sources (YAML, user input, + third-party APIs) — it lowercases the iso code and normalizes/dedups + the alias tuple. The direct constructor is strict and rejects + un-normalized input. + """ seen: set[str] = set() normalized: list[str] = [] - for alias in self.aliases: + for alias in aliases: if not isinstance(alias, str): continue a = alias.lower().strip() if a and a not in seen: seen.add(a) normalized.append(a) - object.__setattr__(self, "aliases", tuple(normalized)) + return cls( + iso=iso.lower(), + english_name=english_name, + native_name=native_name, + aliases=tuple(normalized), + ) def matches(self, raw: str) -> bool: """ diff --git a/alfred/infrastructure/knowledge/language_registry.py b/alfred/infrastructure/knowledge/language_registry.py index e3734b3..7ae3f45 100644 --- a/alfred/infrastructure/knowledge/language_registry.py +++ b/alfred/infrastructure/knowledge/language_registry.py @@ -87,7 +87,7 @@ class LanguageRegistry: merged = _merge_language_entries(builtin, learned) for iso, entry in merged.items(): - language = Language( + language = Language.from_raw( iso=iso, english_name=entry.get("english_name", iso), native_name=entry.get("native_name", iso),