initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
typst-docs/*
|
||||
!typst-docs/.gitk
|
||||
0
typst-docs/.gitk
Normal file
0
typst-docs/.gitk
Normal file
67
typst-documentation-fetcher.py
Normal file
67
typst-documentation-fetcher.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# /// script
|
||||
# requires-python = ">=3.13"
|
||||
# dependencies = [
|
||||
# "bs4",
|
||||
# "requests",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sitemap = requests.get("https://typst.app/sitemap-0.xml")
|
||||
root = ET.fromstring(sitemap.content)
|
||||
page_urls = [
|
||||
el[0].text
|
||||
for el in root
|
||||
if isinstance(el[0].text, str) and "/reference/" in el[0].text
|
||||
]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
results = list(executor.map(fetch_page, page_urls))
|
||||
results = list(executor.map(strip_page, results))
|
||||
results = list(executor.map(write_files, results))
|
||||
|
||||
|
||||
def fetch_page(url: str) -> tuple[str, str]:
|
||||
print(f"downloading {url}...")
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
print(f"{url} successfully fetched!")
|
||||
return (url, response.text)
|
||||
except Exception as e:
|
||||
raise RuntimeError(e)
|
||||
|
||||
|
||||
def strip_page(url_page: tuple[str, str]) -> tuple[str, str]:
|
||||
url, page = url_page
|
||||
print(f"stripping trash from {url}")
|
||||
soup = BeautifulSoup(page, "html.parser")
|
||||
for tag in soup(["header", "nav", "footer", "head", "script"]):
|
||||
tag.decompose()
|
||||
for tag in soup.find_all(True):
|
||||
tag.attrs = {}
|
||||
return (url, str(soup))
|
||||
|
||||
|
||||
def write_files(url_page: tuple[str, str]):
|
||||
url, page = url_page
|
||||
url = url[url.rfind("/reference/") + len("/reference/") : -1]
|
||||
if url == "":
|
||||
url = "index"
|
||||
|
||||
filename = url.replace("/", "_") + ".html"
|
||||
filename = "typst-docs/" + filename
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
print(f"writing to {filename}")
|
||||
f.write(page)
|
||||
print(f"success with {filename}!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
20
typst-documentation.fish
Executable file
20
typst-documentation.fish
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env fish
|
||||
|
||||
function html_to_md
|
||||
set -l filename $argv[1]
|
||||
echo "processing $filename..."
|
||||
pandoc -f html -t markdown $filename -o (basename $filename .html).md
|
||||
end
|
||||
|
||||
# funcsave html_to_md
|
||||
|
||||
function main
|
||||
rm typst-docs/*
|
||||
uv run typst-documentation-fetcher.py
|
||||
cd typst-docs
|
||||
parallel -j 8 html_to_md ::: (eza -1)
|
||||
cat *.md DOCUMENTATION.md
|
||||
cd ../
|
||||
end
|
||||
|
||||
main
|
||||
Reference in New Issue
Block a user