refactor(shared): Language VO is strict; from_raw() factory for un-normalized input

object.__setattr__ inside __post_init__ on a frozen dataclass is a
code smell — it bypasses the immutability guarantee to mutate fields
mid-construction. Split the responsibilities:

* Direct constructor is strict — rejects un-normalized input (uppercase
  iso, whitespace in aliases, etc.) so once a Language exists in the
  system, its fields are guaranteed canonical.
* Language.from_raw() factory handles arbitrary YAML/user input — it
  lowercases the iso, dedups/normalizes aliases, then constructs.

Only caller that built from raw data (LanguageRegistry loading YAML)
moves to from_raw(). Test fixtures already pass normalized data so
they keep using the direct constructor.
This commit is contained in:
2026-05-20 23:48:30 +02:00
parent cfa9f54d9f
commit 5bcf22b408
2 changed files with 38 additions and 6 deletions
+37 -5
View File
@@ -1,5 +1,7 @@
"""Shared value objects used across multiple domains.""" """Shared value objects used across multiple domains."""
from __future__ import annotations
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@@ -142,19 +144,49 @@ class Language:
raise ValidationError( raise ValidationError(
f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}" f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
) )
# Normalize iso to lowercase if self.iso != self.iso.lower():
object.__setattr__(self, "iso", self.iso.lower()) raise ValidationError(
# Normalize aliases to a tuple of lowercase strings (dedup, preserve order) f"Language.iso must be lowercase, got {self.iso!r}"
f"use Language.from_raw() to construct from arbitrary input"
)
for alias in self.aliases:
if not isinstance(alias, str) or alias != alias.lower().strip() or not alias:
raise ValidationError(
f"Language.aliases must be lowercase non-empty strings, "
f"got {alias!r} — use Language.from_raw() to normalize"
)
@classmethod
def from_raw(
cls,
iso: str,
english_name: str,
native_name: str,
aliases: tuple[str, ...] | list[str] = (),
) -> Language:
"""
Construct a Language from arbitrary (possibly un-normalized) input.
Use this factory when loading from external sources (YAML, user input,
third-party APIs) — it lowercases the iso code and normalizes/dedups
the alias tuple. The direct constructor is strict and rejects
un-normalized input.
"""
seen: set[str] = set() seen: set[str] = set()
normalized: list[str] = [] normalized: list[str] = []
for alias in self.aliases: for alias in aliases:
if not isinstance(alias, str): if not isinstance(alias, str):
continue continue
a = alias.lower().strip() a = alias.lower().strip()
if a and a not in seen: if a and a not in seen:
seen.add(a) seen.add(a)
normalized.append(a) normalized.append(a)
object.__setattr__(self, "aliases", tuple(normalized)) return cls(
iso=iso.lower(),
english_name=english_name,
native_name=native_name,
aliases=tuple(normalized),
)
def matches(self, raw: str) -> bool: def matches(self, raw: str) -> bool:
""" """
@@ -87,7 +87,7 @@ class LanguageRegistry:
merged = _merge_language_entries(builtin, learned) merged = _merge_language_entries(builtin, learned)
for iso, entry in merged.items(): for iso, entry in merged.items():
language = Language( language = Language.from_raw(
iso=iso, iso=iso,
english_name=entry.get("english_name", iso), english_name=entry.get("english_name", iso),
native_name=entry.get("native_name", iso), native_name=entry.get("native_name", iso),