initial commit

This commit is contained in:
2025-10-03 20:15:49 +03:00
commit 51fdc352ef
4 changed files with 89 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
typst-docs/*
!typst-docs/.gitk

0
typst-docs/.gitk Normal file
View File

View File

@@ -0,0 +1,67 @@
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "bs4",
# "requests",
# ]
# ///
import xml.etree.ElementTree as ET
from concurrent.futures import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup
def main() -> None:
sitemap = requests.get("https://typst.app/sitemap-0.xml")
root = ET.fromstring(sitemap.content)
page_urls = [
el[0].text
for el in root
if isinstance(el[0].text, str) and "/reference/" in el[0].text
]
with ThreadPoolExecutor(max_workers=8) as executor:
results = list(executor.map(fetch_page, page_urls))
results = list(executor.map(strip_page, results))
results = list(executor.map(write_files, results))
def fetch_page(url: str) -> tuple[str, str]:
print(f"downloading {url}...")
try:
response = requests.get(url, timeout=10)
print(f"{url} successfully fetched!")
return (url, response.text)
except Exception as e:
raise RuntimeError(e)
def strip_page(url_page: tuple[str, str]) -> tuple[str, str]:
url, page = url_page
print(f"stripping trash from {url}")
soup = BeautifulSoup(page, "html.parser")
for tag in soup(["header", "nav", "footer", "head", "script"]):
tag.decompose()
for tag in soup.find_all(True):
tag.attrs = {}
return (url, str(soup))
def write_files(url_page: tuple[str, str]):
url, page = url_page
url = url[url.rfind("/reference/") + len("/reference/") : -1]
if url == "":
url = "index"
filename = url.replace("/", "_") + ".html"
filename = "typst-docs/" + filename
with open(filename, "w", encoding="utf-8") as f:
print(f"writing to {filename}")
f.write(page)
print(f"success with {filename}!")
if __name__ == "__main__":
main()

20
typst-documentation.fish Executable file
View File

@@ -0,0 +1,20 @@
#!/usr/bin/env fish
function html_to_md
set -l filename $argv[1]
echo "processing $filename..."
pandoc -f html -t markdown $filename -o (basename $filename .html).md
end
# funcsave html_to_md
function main
rm typst-docs/*
uv run typst-documentation-fetcher.py
cd typst-docs
parallel -j 8 html_to_md ::: (eza -1)
cat *.md DOCUMENTATION.md
cd ../
end
main