From 9a553f212d76c488e658a04470e1eaffaf632ee2 Mon Sep 17 00:00:00 2001 From: sa6anw Date: Fri, 18 Jul 2025 08:13:42 +0000 Subject: [PATCH] i18n --- split_lang.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/split_lang.py b/split_lang.py index 13cc34e..a1c90b9 100644 --- a/split_lang.py +++ b/split_lang.py @@ -12,7 +12,9 @@ lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL) def split_content_by_language(content): blocks = {"sv": "", "en": ""} pos = 0 - for match in lang_block_re.finditer(content): + matches = list(lang_block_re.finditer(content)) + + for match in matches: start, end = match.span() lang, text = match.groups() neutral = content[pos:start] @@ -20,9 +22,12 @@ def split_content_by_language(content): blocks[l] += neutral blocks[lang] += text.strip() + "\n\n" pos = end + + # Tail inkluderas i båda språkversionerna tail = content[pos:] for l in blocks: blocks[l] += tail + return blocks for filename in os.listdir(SOURCE_DIR): @@ -34,7 +39,8 @@ for filename in os.listdir(SOURCE_DIR): blocks = split_content_by_language(content) for lang, text in blocks.items(): - os.makedirs(OUTPUT_DIRS[lang], exist_ok=True) - out_path = os.path.join(OUTPUT_DIRS[lang], filename) - with open(out_path, "w", encoding="utf-8") as f: - f.write(text.strip()) + if text.strip(): + os.makedirs(OUTPUT_DIRS[lang], exist_ok=True) + out_path = os.path.join(OUTPUT_DIRS[lang], filename) + with open(out_path, "w", encoding="utf-8") as f: + f.write(text.strip())