diff --git a/split_lang.py b/split_lang.py index 444bc2d..e421581 100644 --- a/split_lang.py +++ b/split_lang.py @@ -2,31 +2,35 @@ import os import re SOURCE_DIR = "docs" -OUTPUT_DIRS = { - "sv": "docs_sv", - "en": "docs_en" -} +OUTPUT_DIRS = {"sv": "docs_sv", "en": "docs_en"} -lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL) +lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL | re.IGNORECASE) def split_content_by_language(content): blocks = {"sv": "", "en": ""} + neutral_parts = [] pos = 0 + matches = list(lang_block_re.finditer(content)) + if not matches: + return {"sv": content, "en": ""} + for match in matches: start, end = match.span() lang, text = match.groups() neutral = content[pos:start] - for l in blocks: - blocks[l] += neutral - blocks[lang] += text.strip() + "\n\n" + neutral_parts.append(neutral) + blocks[lang.lower()] += text.strip() + "\n\n" pos = end - # Tail inkluderas i båda språkversionerna - tail = content[pos:] + # Innehåll efter sista språkblock + neutral_parts.append(content[pos:]) + + # Lägg neutral text i båda + tail = ''.join(neutral_parts) for l in blocks: - blocks[l] += tail + blocks[l] = tail + blocks[l] return blocks @@ -46,5 +50,4 @@ for filename in os.listdir(SOURCE_DIR): f.write(text.strip()) print(f"✔ Skrev {lang}/{filename}") else: - print(f"⚠️ Hoppade över {lang}/{filename} (tom)") - + print(f"⚠️ Skippade {lang}/{filename} (tom)")