import os import re SOURCE_DIR = "docs" OUTPUT_DIRS = {"sv": "docs_sv", "en": "docs_en"} lang_block_re = re.compile(r":::\s*(sv|en)\n(.*?)\n:::", re.DOTALL | re.IGNORECASE) frontmatter_re = re.compile(r"(?s)^---\n(.*?)\n---\n") def split_content_by_language(content): blocks = {"sv": "", "en": ""} neutral_parts = [] # Ta ut frontmatter först (t.ex. title) frontmatter_match = frontmatter_re.match(content) frontmatter = "" if frontmatter_match: frontmatter = f"---\n{frontmatter_match.group(1)}\n---\n" content = content[frontmatter_match.end():] pos = 0 matches = list(lang_block_re.finditer(content)) if not matches: # Inga språkblock alls → tolka som endast svenska blocks["sv"] = frontmatter + content blocks["en"] = "" return blocks for match in matches: start, end = match.span() lang, text = match.groups() neutral = content[pos:start] neutral_parts.append(neutral) blocks[lang.lower()] += text.strip() + "\n\n" pos = end tail = content[pos:] neutral_parts.append(tail) neutral = ''.join(neutral_parts) # Montera ihop språkfiler: frontmatter + språktext + neutral for l in blocks: blocks[l] = frontmatter + blocks[l].strip() + "\n\n" + neutral.strip() return blocks for filename in os.listdir(SOURCE_DIR): if filename.endswith(".md"): filepath = os.path.join(SOURCE_DIR, filename) with open(filepath, "r", encoding="utf-8") as f: content = f.read() blocks = split_content_by_language(content) for lang, text in blocks.items(): if text.strip(): os.makedirs(OUTPUT_DIRS[lang], exist_ok=True) out_path = os.path.join(OUTPUT_DIRS[lang], filename) with open(out_path, "w", encoding="utf-8") as f: f.write(text.strip() + "\n") print(f"✔ Skrev {lang}/{filename}") else: print(f"⚠️ Skippade {lang}/{filename} (tom)")