249c5de76a
- Refactor memory system (episodic/STM/LTM with components) - Implement complete subtitle domain (scanner, matcher, placer) - Add YAML workflow infrastructure - Externalize knowledge base (patterns, release groups) - Add comprehensive testing suite - Create manual testing CLIs
90 lines
2.4 KiB
YAML
90 lines
2.4 KiB
YAML
name: subtitles
|
|
version: "1.0"
|
|
description: "Subtitle classification rules — formats, types, languages and their tokens"
|
|
|
|
defaults:
|
|
languages: ["fra", "eng"]
|
|
formats: ["srt"]
|
|
types: ["standard", "forced", "sdh"]
|
|
format_priority: ["srt", "ass"]
|
|
min_confidence: 0.7
|
|
|
|
formats:
|
|
srt:
|
|
extensions: [".srt"]
|
|
description: "SubRip — plain text, universal"
|
|
ass:
|
|
extensions: [".ass", ".ssa"]
|
|
description: "Advanced SubStation Alpha — with styles and positioning"
|
|
|
|
types:
|
|
standard:
|
|
tokens: []
|
|
description: "Normal subtitle track"
|
|
sdh:
|
|
tokens: ["sdh", "hi", "cc", "hearing"]
|
|
description: "Hearing-impaired — includes sound effects and speaker labels"
|
|
forced:
|
|
tokens: ["forced", "foreign"]
|
|
description: "Foreign lines only — e.g. alien speech in an otherwise English film"
|
|
|
|
languages:
|
|
fra:
|
|
tokens: ["fr", "fra", "french", "francais", "vf", "vff", "vostfr"]
|
|
eng:
|
|
tokens: ["en", "eng", "english"]
|
|
spa:
|
|
tokens: ["es", "spa", "spanish", "espanol", "español"]
|
|
deu:
|
|
tokens: ["de", "deu", "ger", "german", "deutsch"]
|
|
ita:
|
|
tokens: ["it", "ita", "italian", "italiano"]
|
|
por:
|
|
tokens: ["pt", "por", "portuguese", "portugues", "português"]
|
|
nld:
|
|
tokens: ["nl", "nld", "dut", "dutch", "nederlands"]
|
|
nor:
|
|
tokens: ["no", "nor", "norwegian", "norsk"]
|
|
swe:
|
|
tokens: ["sv", "swe", "swedish", "svenska"]
|
|
dan:
|
|
tokens: ["da", "dan", "danish", "dansk"]
|
|
fin:
|
|
tokens: ["fi", "fin", "finnish", "suomi"]
|
|
pol:
|
|
tokens: ["pl", "pol", "polish", "polski"]
|
|
ces:
|
|
tokens: ["cs", "ces", "cze", "czech"]
|
|
slk:
|
|
tokens: ["sk", "slk", "slo", "slovak"]
|
|
hun:
|
|
tokens: ["hu", "hun", "hungarian", "magyar"]
|
|
ron:
|
|
tokens: ["ro", "ron", "rum", "romanian", "romana", "română"]
|
|
bul:
|
|
tokens: ["bg", "bul", "bulgarian"]
|
|
hrv:
|
|
tokens: ["hr", "hrv", "croatian", "hrvatski"]
|
|
srp:
|
|
tokens: ["sr", "srp", "serbian", "srpski"]
|
|
slv:
|
|
tokens: ["sl", "slv", "slovenian", "slovensko"]
|
|
est:
|
|
tokens: ["et", "est", "estonian", "eesti"]
|
|
lav:
|
|
tokens: ["lv", "lav", "latvian", "latviesu"]
|
|
lit:
|
|
tokens: ["lt", "lit", "lithuanian", "lietuviu"]
|
|
mkd:
|
|
tokens: ["mk", "mkd", "mac", "macedonian"]
|
|
jpn:
|
|
tokens: ["ja", "jpn", "japanese"]
|
|
zho:
|
|
tokens: ["zh", "zho", "chi", "chinese"]
|
|
kor:
|
|
tokens: ["ko", "kor", "korean"]
|
|
ara:
|
|
tokens: ["ar", "ara", "arabic"]
|
|
tur:
|
|
tokens: ["tr", "tur", "turkish"]
|