66 lines
2.7 KiB
Python
66 lines
2.7 KiB
Python
import os
|
|
from markdownify import markdownify as md
|
|
|
|
def convert_all_html_in_folder(source_folder, output_folder):
|
|
if not os.path.exists(output_folder):
|
|
os.makedirs(output_folder)
|
|
|
|
markdown_files_list = []
|
|
|
|
for root, dirs, files in os.walk(source_folder):
|
|
for file in files:
|
|
if file.endswith(".html") or file.endswith(".htm"):
|
|
html_path = os.path.join(root, file)
|
|
relative_path = os.path.relpath(html_path, source_folder)
|
|
md_filename_rel = os.path.splitext(relative_path)[0] + ".md"
|
|
md_path = os.path.join(output_folder, md_filename_rel)
|
|
|
|
os.makedirs(os.path.dirname(md_path), exist_ok=True)
|
|
|
|
html_content = None
|
|
|
|
# Versuch 1: UTF-8 (Standard)
|
|
try:
|
|
with open(html_path, 'r', encoding='utf-8') as f:
|
|
html_content = f.read()
|
|
except UnicodeDecodeError:
|
|
# Versuch 2: Latin-1 (für die Umlaute in deinen Dateien)
|
|
try:
|
|
with open(html_path, 'r', encoding='latin-1') as f:
|
|
html_content = f.read()
|
|
except Exception as e:
|
|
print(f"Kritischer Fehler bei {html_path}: {e}")
|
|
|
|
if html_content:
|
|
try:
|
|
markdown_text = md(html_content, heading_style="ATX")
|
|
with open(md_path, 'w', encoding='utf-8') as f:
|
|
f.write(markdown_text)
|
|
|
|
markdown_files_list.append(md_filename_rel)
|
|
print(f"Erfolgreich: {html_path}")
|
|
except Exception as e:
|
|
print(f"Konvertierungsfehler bei {html_path}: {e}")
|
|
|
|
create_readme(output_folder, markdown_files_list)
|
|
|
|
def create_readme(folder, file_list):
|
|
readme_path = os.path.join(folder, "README.md")
|
|
file_list.sort()
|
|
|
|
with open(readme_path, 'w', encoding='utf-8') as f:
|
|
f.write("# Inhaltsverzeichnis der konvertierten Dateien\n\n")
|
|
f.write("Hier sind alle konvertierten Markdown-Dateien aufgelistet:\n\n")
|
|
for file_rel_path in file_list:
|
|
display_name = os.path.basename(file_rel_path)
|
|
f.write(f"* [{display_name}]({file_rel_path})\n")
|
|
|
|
print(f"\nREADME.md wurde erstellt unter: {readme_path}")
|
|
|
|
# --- Deine Einstellungen ---
|
|
input_dir = "/Users/calvin/Downloads/ak-21.de/Jahrbuch Umfragen"
|
|
output_dir = "/Users/calvin/Downloads/ak-21.de/markdown"
|
|
|
|
if __name__ == "__main__":
|
|
convert_all_html_in_folder(input_dir, output_dir)
|
|
print("\nFertig! Die Umlaute sollten jetzt passen. Gruß, Schnitzel.") |