summaryrefslogtreecommitdiffhomepage
path: root/vhosts/blog/nuldoc-src/djot/to_html.ts
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-04-09 22:24:50 +0900
committernsfisis <nsfisis@gmail.com>2025-04-09 22:24:50 +0900
commitd1b4bc44170196a4dcef5254d092fc387e73792e (patch)
treea6bbe0e61b73c99a815d730ae7956a5124200066 /vhosts/blog/nuldoc-src/djot/to_html.ts
parentbba1212ab46ed85c2ed3b646f2362bdbb1f45b63 (diff)
parent4f46d262e6967c9c638b40f3b0246d21b7a9b9dc (diff)
downloadnsfisis.dev-d1b4bc44170196a4dcef5254d092fc387e73792e.tar.gz
nsfisis.dev-d1b4bc44170196a4dcef5254d092fc387e73792e.tar.zst
nsfisis.dev-d1b4bc44170196a4dcef5254d092fc387e73792e.zip
Merge branch 'nuldoc-djot'
Diffstat (limited to 'vhosts/blog/nuldoc-src/djot/to_html.ts')
-rw-r--r--vhosts/blog/nuldoc-src/djot/to_html.ts320
1 files changed, 320 insertions, 0 deletions
diff --git a/vhosts/blog/nuldoc-src/djot/to_html.ts b/vhosts/blog/nuldoc-src/djot/to_html.ts
new file mode 100644
index 00000000..5ee76023
--- /dev/null
+++ b/vhosts/blog/nuldoc-src/djot/to_html.ts
@@ -0,0 +1,320 @@
+import { BundledLanguage, bundledLanguages, codeToHtml } from "shiki";
+import { Document } from "./document.ts";
+import { NuldocError } from "../errors.ts";
+import {
+ addClass,
+ Element,
+ forEachChild,
+ forEachChildRecursively,
+ forEachChildRecursivelyAsync,
+ Node,
+ RawHTML,
+ Text,
+} from "../dom.ts";
+
+export default async function toHtml(doc: Document): Promise<Document> {
+ removeUnnecessaryTextNode(doc);
+ transformLinkLikeToAnchorElement(doc);
+ transformSectionIdAttribute(doc);
+ setSectionTitleAnchor(doc);
+ transformSectionTitleElement(doc);
+ transformNoteElement(doc);
+ addAttributesToExternalLinkElement(doc);
+ setDefaultLangAttribute(doc);
+ traverseFootnotes(doc);
+ removeUnnecessaryParagraphNode(doc);
+ await transformAndHighlightCodeBlockElement(doc);
+ return doc;
+}
+
+function removeUnnecessaryTextNode(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element") {
+ return;
+ }
+
+ let changed = true;
+ while (changed) {
+ changed = false;
+ if (n.children.length === 0) {
+ break;
+ }
+ const firstChild = n.children[0];
+ if (firstChild.kind === "text" && firstChild.content.trim() === "") {
+ n.children.shift();
+ changed = true;
+ }
+ if (n.children.length === 0) {
+ break;
+ }
+ const lastChild = n.children[n.children.length - 1];
+ if (lastChild.kind === "text" && lastChild.content.trim() === "") {
+ n.children.pop();
+ changed = true;
+ }
+ }
+ });
+}
+
+function transformLinkLikeToAnchorElement(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (
+ n.kind !== "element" || n.name === "a" || n.name === "code" ||
+ n.name === "codeblock"
+ ) {
+ return;
+ }
+
+ const newChildren: Node[] = [];
+ for (const child of n.children) {
+ if (child.kind !== "text") {
+ newChildren.push(child);
+ continue;
+ }
+ let restContent = child.content;
+ while (restContent !== "") {
+ const match = /^(.*?)(https?:\/\/[^ \n]+)(.*)$/s.exec(restContent);
+ if (!match) {
+ newChildren.push({ kind: "text", content: restContent, raw: false });
+ restContent = "";
+ break;
+ }
+ const [_, prefix, url, suffix] = match;
+ newChildren.push({ kind: "text", content: prefix, raw: false });
+ newChildren.push({
+ kind: "element",
+ name: "a",
+ attributes: new Map([["href", url]]),
+ children: [{ kind: "text", content: url, raw: false }],
+ });
+ restContent = suffix;
+ }
+ }
+ n.children = newChildren;
+ });
+}
+
+function transformSectionIdAttribute(doc: Document) {
+ const sectionStack: string[] = [];
+ const usedIds = new Set<string>();
+
+ const processNode = (n: Node) => {
+ if (n.kind !== "element") {
+ return;
+ }
+
+ if (n.name === "section") {
+ const idAttr = n.attributes.get("id");
+ if (!idAttr) {
+ return;
+ }
+
+ let newId: string;
+ if (sectionStack.length === 0) {
+ newId = `section--${idAttr}`;
+ } else {
+ newId = `section--${sectionStack.join("--")}--${idAttr}`;
+ }
+
+ if (usedIds.has(newId)) {
+ throw new NuldocError(
+ `[nuldoc.tohtml] Duplicate section ID: ${newId}`,
+ );
+ }
+
+ usedIds.add(newId);
+ n.attributes.set("id", newId);
+ sectionStack.push(idAttr);
+
+ forEachChild(n, processNode);
+
+ sectionStack.pop();
+ } else {
+ forEachChild(n, processNode);
+ }
+ };
+
+ forEachChild(doc.root, processNode);
+}
+
+function setSectionTitleAnchor(doc: Document) {
+ const sectionStack: Element[] = [];
+ const g = (c: Node) => {
+ if (c.kind !== "element") {
+ return;
+ }
+
+ if (c.name === "section") {
+ sectionStack.push(c);
+ }
+ forEachChild(c, g);
+ if (c.name === "section") {
+ sectionStack.pop();
+ }
+ if (c.name === "h") {
+ const currentSection = sectionStack[sectionStack.length - 1];
+ if (!currentSection) {
+ throw new NuldocError(
+ "[nuldoc.tohtml] <h> element must be inside <section>",
+ );
+ }
+ const sectionId = currentSection.attributes.get("id");
+ const aElement: Element = {
+ kind: "element",
+ name: "a",
+ attributes: new Map(),
+ children: c.children,
+ };
+ aElement.attributes.set("href", `#${sectionId}`);
+ c.children = [aElement];
+ }
+ };
+ forEachChild(doc.root, g);
+}
+
+function transformSectionTitleElement(doc: Document) {
+ let sectionLevel = 1;
+ const g = (c: Node) => {
+ if (c.kind !== "element") {
+ return;
+ }
+
+ if (c.name === "section") {
+ sectionLevel += 1;
+ c.attributes.set("--section-level", sectionLevel.toString());
+ }
+ forEachChild(c, g);
+ if (c.name === "section") {
+ sectionLevel -= 1;
+ }
+ if (c.name === "h") {
+ c.name = `h${sectionLevel}`;
+ }
+ };
+ forEachChild(doc.root, g);
+}
+
+function transformNoteElement(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element" || n.name !== "note") {
+ return;
+ }
+
+ const labelElement: Element = {
+ kind: "element",
+ name: "div",
+ attributes: new Map([["class", "admonition-label"]]),
+ children: [{
+ kind: "text",
+ content: "NOTE",
+ raw: false,
+ }],
+ };
+ const contentElement: Element = {
+ kind: "element",
+ name: "div",
+ attributes: new Map([["class", "admonition-content"]]),
+ children: n.children,
+ };
+ n.name = "div";
+ addClass(n, "admonition");
+ n.children = [
+ labelElement,
+ contentElement,
+ ];
+ });
+}
+
+function addAttributesToExternalLinkElement(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element" || n.name !== "a") {
+ return;
+ }
+
+ const href = n.attributes.get("href") ?? "";
+ if (!href.startsWith("http")) {
+ return;
+ }
+ n.attributes
+ .set("target", "_blank")
+ .set("rel", "noreferrer");
+ });
+}
+
+function setDefaultLangAttribute(_doc: Document) {
+ // TODO
+ // if (!e.attributes.has("lang")) {
+ // e.attributes.set("lang", "ja-JP");
+ // }
+}
+
+function traverseFootnotes(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element" || n.name !== "footnote") {
+ return;
+ }
+
+ // TODO
+ // <footnote>x</footnote>
+ //
+ // <sup class="footnote">[<a id="_footnoteref_1" class="footnote" href="#_footnotedef_1">1</a>]</sup>
+ //
+ // <div class="footnote" id="_footnotedef_1">
+ // <a href="#_footnoteref_1">1</a>. RAS syndrome
+ // </div>
+ n.name = "span";
+ n.children = [];
+ });
+}
+
+function removeUnnecessaryParagraphNode(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element" || (n.name !== "ul" && n.name !== "ol")) {
+ return;
+ }
+
+ const isTight = n.attributes.get("--tight") === "true";
+ if (!isTight) {
+ return;
+ }
+
+ for (const child of n.children) {
+ if (child.kind !== "element" || child.name !== "li") {
+ continue;
+ }
+ if (child.children.length !== 1) {
+ continue;
+ }
+ const grandChild = child.children[0];
+ if (grandChild.kind !== "element" || grandChild.name !== "p") {
+ continue;
+ }
+ child.children = grandChild.children;
+ }
+ });
+}
+
+async function transformAndHighlightCodeBlockElement(doc: Document) {
+ await forEachChildRecursivelyAsync(doc.root, async (n) => {
+ if (n.kind !== "element" || n.name !== "codeblock") {
+ return;
+ }
+
+ const language = n.attributes.get("language") || "text";
+ const sourceCodeNode = n.children[0] as Text | RawHTML;
+ const sourceCode = sourceCodeNode.content.trimEnd();
+
+ const highlighted = await codeToHtml(sourceCode, {
+ lang: language in bundledLanguages ? language as BundledLanguage : "text",
+ theme: "github-light",
+ colorReplacements: {
+ "#fff": "#f5f5f5",
+ },
+ });
+
+ sourceCodeNode.content = highlighted;
+ sourceCodeNode.raw = true;
+ n.name = "div";
+ n.attributes.set("class", "codeblock");
+ });
+}