diff --git a/split_lang.py b/split_lang.py index e421581..493a175 100644 --- a/split_lang.py +++ b/split_lang.py @@ -5,16 +5,27 @@ SOURCE_DIR = "docs" OUTPUT_DIRS = {"sv": "docs_sv", "en": "docs_en"} lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL | re.IGNORECASE) +frontmatter_re = re.compile(r"(?s)^---\n(.*?)\n---\n") def split_content_by_language(content): blocks = {"sv": "", "en": ""} neutral_parts = [] - pos = 0 + # Ta ut frontmatter först (t.ex. title) + frontmatter_match = frontmatter_re.match(content) + frontmatter = "" + if frontmatter_match: + frontmatter = f"---\n{frontmatter_match.group(1)}\n---\n" + content = content[frontmatter_match.end():] + + pos = 0 matches = list(lang_block_re.finditer(content)) if not matches: - return {"sv": content, "en": ""} + # Inga språkblock alls → tolka som endast svenska + blocks["sv"] = frontmatter + content + blocks["en"] = "" + return blocks for match in matches: start, end = match.span() @@ -24,13 +35,13 @@ def split_content_by_language(content): blocks[lang.lower()] += text.strip() + "\n\n" pos = end - # Innehåll efter sista språkblock - neutral_parts.append(content[pos:]) + tail = content[pos:] + neutral_parts.append(tail) + neutral = ''.join(neutral_parts) - # Lägg neutral text i båda - tail = ''.join(neutral_parts) + # Montera ihop språkfiler: frontmatter + språktext + neutral for l in blocks: - blocks[l] = tail + blocks[l] + blocks[l] = frontmatter + blocks[l].strip() + "\n\n" + neutral.strip() return blocks @@ -47,7 +58,7 @@ for filename in os.listdir(SOURCE_DIR): os.makedirs(OUTPUT_DIRS[lang], exist_ok=True) out_path = os.path.join(OUTPUT_DIRS[lang], filename) with open(out_path, "w", encoding="utf-8") as f: - f.write(text.strip()) + f.write(text.strip() + "\n") print(f"✔ Skrev {lang}/{filename}") else: print(f"⚠️ Skippade {lang}/{filename} (tom)")