initial commit

2025-10-03 20:15:49 +03:00
commit 51fdc352ef
4 changed files with 89 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+typst-docs/*
+!typst-docs/.gitk
--- a/typst-docs/.gitk
+++ b/typst-docs/.gitk
--- a/typst-documentation-fetcher.py
+++ b/typst-documentation-fetcher.py
@@ -0,0 +1,67 @@
+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+#     "bs4",
+#     "requests",
+# ]
+# ///
+
+import xml.etree.ElementTree as ET
+from concurrent.futures import ThreadPoolExecutor
+
+import requests
+from bs4 import BeautifulSoup
+
+
+def main() -> None:
+    sitemap = requests.get("https://typst.app/sitemap-0.xml")
+    root = ET.fromstring(sitemap.content)
+    page_urls = [
+        el[0].text
+        for el in root
+        if isinstance(el[0].text, str) and "/reference/" in el[0].text
+    ]
+
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        results = list(executor.map(fetch_page, page_urls))
+        results = list(executor.map(strip_page, results))
+        results = list(executor.map(write_files, results))
+
+
+def fetch_page(url: str) -> tuple[str, str]:
+    print(f"downloading {url}...")
+    try:
+        response = requests.get(url, timeout=10)
+        print(f"{url} successfully fetched!")
+        return (url, response.text)
+    except Exception as e:
+        raise RuntimeError(e)
+
+
+def strip_page(url_page: tuple[str, str]) -> tuple[str, str]:
+    url, page = url_page
+    print(f"stripping trash from {url}")
+    soup = BeautifulSoup(page, "html.parser")
+    for tag in soup(["header", "nav", "footer", "head", "script"]):
+        tag.decompose()
+    for tag in soup.find_all(True):
+        tag.attrs = {}
+    return (url, str(soup))
+
+
+def write_files(url_page: tuple[str, str]):
+    url, page = url_page
+    url = url[url.rfind("/reference/") + len("/reference/") : -1]
+    if url == "":
+        url = "index"
+
+    filename = url.replace("/", "_") + ".html"
+    filename = "typst-docs/" + filename
+    with open(filename, "w", encoding="utf-8") as f:
+        print(f"writing to {filename}")
+        f.write(page)
+        print(f"success with {filename}!")
+
+
+if __name__ == "__main__":
+    main()
--- a/typst-documentation.fish
+++ b/typst-documentation.fish
@@ -0,0 +1,20 @@
+#!/usr/bin/env fish
+
+function html_to_md
+    set -l filename $argv[1]
+    echo "processing $filename..."
+    pandoc -f html -t markdown $filename -o (basename $filename .html).md
+end
+
+# funcsave html_to_md
+
+function main
+    rm typst-docs/*
+    uv run typst-documentation-fetcher.py
+    cd typst-docs
+    parallel -j 8 html_to_md ::: (eza -1)
+    cat *.md DOCUMENTATION.md
+    cd ../
+end
+
+main