Files
2025-10-05 12:42:16 +03:00

67 lines
1.8 KiB
TypeScript

import { XMLParser } from "fast-xml-parser";
import * as cheerio from 'cheerio';
type Page = {
url: string;
contents: string;
}
const main = async () => {
const sitemapLink = "https://typst.app/sitemap-0.xml";
const sitemapResp = await fetch(sitemapLink);
const sitemap = await sitemapResp.text();
const parser = new XMLParser();
const result = parser.parse(sitemap);
const urlArr: { loc: string }[] = result.urlset.url;
const urls: string[] = urlArr
.map(url => url.loc)
.filter(url => url.includes("/reference/"));
const pages = await Promise.all(urls.map(url => fetchPage(url)))
const cleanPages = await Promise.all(pages.map(page => stripPage(page)))
await Promise.all(cleanPages.map(page => savePage(page)));
};
async function fetchPage(url: string): Promise<Page> {
console.log(`downloading ${url}...`);
const response = await fetch(url);
const page = await response.text();
return { url: url, contents: page };
}
const unneededElements = ["header", "nav", "footer", "head", "script"];
async function stripPage(page: Page): Promise<Page> {
console.log(`cleaning page ${page.url}`);
const $ = cheerio.load(page.contents);
unneededElements.forEach(tag => $(tag).remove());
page.contents = $.html();
return page;
}
async function savePage(page: Page) {
const refIdx = page.url.indexOf("/reference/");
let fileName = page.url
.substring(refIdx + "/reference/".length)
.replaceAll("/", "_");
if (fileName === "") {
fileName = "index";
} else {
fileName = fileName.slice(0, -1);
}
fileName = fileName + ".html";
console.log(`writing ${fileName}`);
await Bun.write(`typst-docs/${fileName}`, page.contents)
console.log(`wrote ${fileName} to disk!`);
}
main();