refactor(shared): Language VO is strict; from_raw() factory for un-normalized input
object.__setattr__ inside __post_init__ on a frozen dataclass is a code smell — it bypasses the immutability guarantee to mutate fields mid-construction. Split the responsibilities: * Direct constructor is strict — rejects un-normalized input (uppercase iso, whitespace in aliases, etc.) so once a Language exists in the system, its fields are guaranteed canonical. * Language.from_raw() factory handles arbitrary YAML/user input — it lowercases the iso, dedups/normalizes aliases, then constructs. Only caller that built from raw data (LanguageRegistry loading YAML) moves to from_raw(). Test fixtures already pass normalized data so they keep using the direct constructor.
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
"""Shared value objects used across multiple domains."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
@@ -142,19 +144,49 @@ class Language:
|
||||
raise ValidationError(
|
||||
f"Language.iso must be a 3-letter ISO 639-2/B code, got {self.iso!r}"
|
||||
)
|
||||
# Normalize iso to lowercase
|
||||
object.__setattr__(self, "iso", self.iso.lower())
|
||||
# Normalize aliases to a tuple of lowercase strings (dedup, preserve order)
|
||||
if self.iso != self.iso.lower():
|
||||
raise ValidationError(
|
||||
f"Language.iso must be lowercase, got {self.iso!r} — "
|
||||
f"use Language.from_raw() to construct from arbitrary input"
|
||||
)
|
||||
for alias in self.aliases:
|
||||
if not isinstance(alias, str) or alias != alias.lower().strip() or not alias:
|
||||
raise ValidationError(
|
||||
f"Language.aliases must be lowercase non-empty strings, "
|
||||
f"got {alias!r} — use Language.from_raw() to normalize"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_raw(
|
||||
cls,
|
||||
iso: str,
|
||||
english_name: str,
|
||||
native_name: str,
|
||||
aliases: tuple[str, ...] | list[str] = (),
|
||||
) -> Language:
|
||||
"""
|
||||
Construct a Language from arbitrary (possibly un-normalized) input.
|
||||
|
||||
Use this factory when loading from external sources (YAML, user input,
|
||||
third-party APIs) — it lowercases the iso code and normalizes/dedups
|
||||
the alias tuple. The direct constructor is strict and rejects
|
||||
un-normalized input.
|
||||
"""
|
||||
seen: set[str] = set()
|
||||
normalized: list[str] = []
|
||||
for alias in self.aliases:
|
||||
for alias in aliases:
|
||||
if not isinstance(alias, str):
|
||||
continue
|
||||
a = alias.lower().strip()
|
||||
if a and a not in seen:
|
||||
seen.add(a)
|
||||
normalized.append(a)
|
||||
object.__setattr__(self, "aliases", tuple(normalized))
|
||||
return cls(
|
||||
iso=iso.lower(),
|
||||
english_name=english_name,
|
||||
native_name=native_name,
|
||||
aliases=tuple(normalized),
|
||||
)
|
||||
|
||||
def matches(self, raw: str) -> bool:
|
||||
"""
|
||||
|
||||
@@ -87,7 +87,7 @@ class LanguageRegistry:
|
||||
merged = _merge_language_entries(builtin, learned)
|
||||
|
||||
for iso, entry in merged.items():
|
||||
language = Language(
|
||||
language = Language.from_raw(
|
||||
iso=iso,
|
||||
english_name=entry.get("english_name", iso),
|
||||
native_name=entry.get("native_name", iso),
|
||||
|
||||
Reference in New Issue
Block a user