From 56213ebc0363d296928897e330642b3de1b6ff3a Mon Sep 17 00:00:00 2001 From: sa6anw Date: Fri, 18 Jul 2025 20:04:25 +0000 Subject: [PATCH] i18n --- clean_language_blocks.py | 65 +++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/clean_language_blocks.py b/clean_language_blocks.py index 838eee4..3b3ce9f 100644 --- a/clean_language_blocks.py +++ b/clean_language_blocks.py @@ -3,47 +3,56 @@ import re import shutil SOURCE_DIR = "docs" -ENGLISH_DIR = os.path.join(SOURCE_DIR, "en") +ENGLISH_DIR = "docs/en" -# Kopiera alla filer från docs/ till docs/en/ if os.path.exists(ENGLISH_DIR): shutil.rmtree(ENGLISH_DIR) shutil.copytree(SOURCE_DIR, ENGLISH_DIR) +# Hantera block i formatet: +# :sv +# text... +# ::: +def remove_other_language_blocks(content, keep_lang): + block_pattern = re.compile(r":(sv|en)\n(.*?)\n:::", re.DOTALL) + + result = [] + pos = 0 + for match in block_pattern.finditer(content): + start, end = match.span() + lang, block = match.groups() + + # Behåll neutral text mellan block + result.append(content[pos:start]) + if lang == keep_lang: + result.append(block.strip() + "\n\n") + pos = end + result.append(content[pos:]) + return ''.join(result) + +# Inline-taggar, t.ex. [:sv]text +def remove_other_language_inline(content, keep_lang): + def replacer(match): + lang, text = match.groups() + return text if lang == keep_lang else "" + return re.sub(r"\[:(sv|en)](.*?)(?=(\[:|$))", replacer, content) + def clean_file(path, keep_lang): with open(path, "r", encoding="utf-8") as f: content = f.read() - # Ta bort språkblock som inte ska behållas - if keep_lang == "sv": - content = re.sub(r":::\s*en\s*\n(.*?)\n:::", "", content, flags=re.DOTALL) - elif keep_lang == "en": - content = re.sub(r":::\s*sv\s*\n(.*?)\n:::", "", content, flags=re.DOTALL) - - # Ta bort språkblock-taggar för det språk vi behåller - content = re.sub(r":::\s*(sv|en)", "", content) - content = re.sub(r":::", "", content) - - # Hantera inline-översättningar: [:sv]Text[:], [:en]Text[:] - def inline_replacer(match): - lang = match.group(1) - text = match.group(2) - return text if lang == keep_lang else "" - - # Använd re.DOTALL för att hantera radbrytningar i inline-taggar - content = re.sub(r"\[:(sv|en)](.*?)\[:\]", inline_replacer, content, flags=re.DOTALL) + content = remove_other_language_blocks(content, keep_lang) + content = remove_other_language_inline(content, keep_lang) with open(path, "w", encoding="utf-8") as f: f.write(content.strip() + "\n") -# Rensa svenska filer i docs/ +# Rensa svenska filer for filename in os.listdir(SOURCE_DIR): - path = os.path.join(SOURCE_DIR, filename) - if os.path.isfile(path) and filename.endswith(".md"): - clean_file(path, keep_lang="sv") + if filename.endswith(".md"): + clean_file(os.path.join(SOURCE_DIR, filename), keep_lang="sv") -# Rensa engelska filer i docs/en/ +# Rensa engelska filer for filename in os.listdir(ENGLISH_DIR): - path = os.path.join(ENGLISH_DIR, filename) - if os.path.isfile(path) and filename.endswith(".md"): - clean_file(path, keep_lang="en") + if filename.endswith(".md"): + clean_file(os.path.join(ENGLISH_DIR, filename), keep_lang="en")