refactor, but leave old code working (i forgor to commit it)

This commit is contained in:
2025-10-05 12:41:30 +03:00
parent 4ffb8bea59
commit e0252a23b5
4 changed files with 139 additions and 1 deletions
+2
View File
@@ -173,3 +173,5 @@ dist
# Finder (MacOS) folder config
.DS_Store
typst-docs/
BIN
View File
Binary file not shown.
+133 -1
View File
@@ -1 +1,133 @@
console.log("Hello via Bun!");
import { XMLParser } from "fast-xml-parser";
import * as cheerio from 'cheerio';
type Page = {
url: string;
contents: string;
}
const main = async () => {
const sitemapLink = "https://typst.app/sitemap-0.xml";
const sitemapResp = await fetch(sitemapLink);
const sitemap = await sitemapResp.text();
const parser = new XMLParser();
const result = parser.parse(sitemap);
// console.log(result.urlset);
const urlArr: { loc: string }[] = result.urlset.url;
const urls: string[] = urlArr
.map(url => url.loc)
.filter(url => url.includes("/reference/"));
const pages = await Promise.all(urls.map(url => fetchPage(url)))
// const pages = await fetchPages(urls);
const cleanPages = await Promise.all(pages.map(page => stripPage(page)))
// const cleanPages = await stripPages(pages);
await Promise.all(cleanPages.map(page => savePage(page)));
// await savePages(cleanPages);
};
async function fetchPage(url: string): Promise<Page> {
console.log(`downloading ${url}...`);
const response = await fetch(url);
const page = await response.text();
return { url: url, contents: page };
}
// async function fetchPages(urls: string[]): Promise<Page[]> {
// const promises = urls.map(url => {
// console.log(`downloading ${url}...`);
// return fetch(url);
// });
//
// const responses = (await Promise.allSettled(promises))
// .filter(fulfilledOnly);
// if (responses.length !== urls.length) {
// throw new Error("responses.length !== urls.length");
// }
//
// const pages = (await Promise.allSettled(responses.map(resp => resp.value.text())))
// .filter(fulfilledOnly)
// .map(res => res.value);
// if (pages.length !== urls.length) {
// throw new Error("responses.length !== urls.length");
// }
//
// return pages.map((page, idx) => ({ url: urls[idx], contents: page }));
// }
const unneededElements = ["header", "nav", "footer", "head", "script"];
async function stripPage(page: Page): Promise<Page> {
console.log(`cleaning page ${page.url}`);
const $ = cheerio.load(page.contents);
unneededElements.forEach(tag => $(tag).remove());
page.contents = $.html();
return page;
}
// async function stripPages(pages: Page[]): Promise<Page[]> {
// const unneededElements = ["header", "nav", "footer", "head", "script"];
//
// return (await Promise.allSettled(pages.map(async (page) => {
// console.log(`cleaning page ${page.url}`);
// const $ = cheerio.load(page.contents);
// unneededElements.forEach(tag => $(tag).remove());
// page.contents = $.html();
//
// return page;
// })))
// .filter(fulfilledOnly)
// .map(res => res.value);
//
// }
// function fulfilledOnly<T>(res: PromiseSettledResult<T>) {
// return res.status === "fulfilled";
// }
async function savePage(page: Page) {
const refIdx = page.url.indexOf("/reference/");
let fileName = page.url
.substring(refIdx + "/reference/".length)
.replaceAll("/", "_");
if (fileName === "") {
fileName = "index";
} else {
fileName = fileName.slice(0, -1);
}
fileName = fileName + ".html";
console.log(`writing ${fileName}`);
await Bun.write(`typst-docs/${fileName}`, page.contents)
console.log(`wrote ${fileName} to disk!`);
}
// async function savePages(pages: Page[]) {
// await Promise.allSettled(pages.map(page => {
// const refIdx = page.url.indexOf("/reference/");
// let fileName = page.url
// .substring(refIdx + "/reference/".length)
// .replaceAll("/", "_");
//
// if (fileName === "") {
// fileName = "index";
// } else {
// fileName = fileName.slice(0, -1);
// }
// fileName = fileName + ".html";
//
// console.log(`writing ${fileName}`)
//
// Bun.write(`typst-docs/${fileName}`, page.contents)
// }))
// }
main();
+4
View File
@@ -7,5 +7,9 @@
},
"peerDependencies": {
"typescript": "^5.0.0"
},
"dependencies": {
"cheerio": "^1.1.2",
"fast-xml-parser": "^5.3.0"
}
}