From 0f34d291d74b1115014dfd8c3ee29eeb5ddcc5f8 Mon Sep 17 00:00:00 2001 From: sa6anw Date: Fri, 18 Jul 2025 07:18:30 +0000 Subject: [PATCH] i18n --- split_lang.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 split_lang.py diff --git a/split_lang.py b/split_lang.py new file mode 100644 index 0000000..13cc34e --- /dev/null +++ b/split_lang.py @@ -0,0 +1,40 @@ +import os +import re + +SOURCE_DIR = "docs" +OUTPUT_DIRS = { + "sv": "docs_sv", + "en": "docs_en" +} + +lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL) + +def split_content_by_language(content): + blocks = {"sv": "", "en": ""} + pos = 0 + for match in lang_block_re.finditer(content): + start, end = match.span() + lang, text = match.groups() + neutral = content[pos:start] + for l in blocks: + blocks[l] += neutral + blocks[lang] += text.strip() + "\n\n" + pos = end + tail = content[pos:] + for l in blocks: + blocks[l] += tail + return blocks + +for filename in os.listdir(SOURCE_DIR): + if filename.endswith(".md"): + filepath = os.path.join(SOURCE_DIR, filename) + with open(filepath, "r", encoding="utf-8") as f: + content = f.read() + + blocks = split_content_by_language(content) + + for lang, text in blocks.items(): + os.makedirs(OUTPUT_DIRS[lang], exist_ok=True) + out_path = os.path.join(OUTPUT_DIRS[lang], filename) + with open(out_path, "w", encoding="utf-8") as f: + f.write(text.strip())