sa6anw.se/clean_language_blocks.py

59 lines
1.6 KiB
Python

import os
import re
import shutil
SOURCE_DIR = "docs"
ENGLISH_DIR = "docs/en"
if os.path.exists(ENGLISH_DIR):
shutil.rmtree(ENGLISH_DIR)
shutil.copytree(SOURCE_DIR, ENGLISH_DIR)
# Hantera block i formatet:
# :sv
# text...
# :::
def remove_other_language_blocks(content, keep_lang):
block_pattern = re.compile(r":(sv|en)\n(.*?)\n:::", re.DOTALL)
result = []
pos = 0
for match in block_pattern.finditer(content):
start, end = match.span()
lang, block = match.groups()
# Behåll neutral text mellan block
result.append(content[pos:start])
if lang == keep_lang:
result.append(block.strip() + "\n\n")
pos = end
result.append(content[pos:])
return ''.join(result)
# Inline-taggar, t.ex. [:sv]text
def remove_other_language_inline(content, keep_lang):
def replacer(match):
lang, text = match.groups()
return text if lang == keep_lang else ""
return re.sub(r"\[:(sv|en)](.*?)(?=(\[:|$))", replacer, content)
def clean_file(path, keep_lang):
with open(path, "r", encoding="utf-8") as f:
content = f.read()
content = remove_other_language_blocks(content, keep_lang)
content = remove_other_language_inline(content, keep_lang)
with open(path, "w", encoding="utf-8") as f:
f.write(content.strip() + "\n")
# Rensa svenska filer
for filename in os.listdir(SOURCE_DIR):
if filename.endswith(".md"):
clean_file(os.path.join(SOURCE_DIR, filename), keep_lang="sv")
# Rensa engelska filer
for filename in os.listdir(ENGLISH_DIR):
if filename.endswith(".md"):
clean_file(os.path.join(ENGLISH_DIR, filename), keep_lang="en")