aboutsummaryrefslogtreecommitdiffhomepage
path: root/services/nuldoc/nuldoc-src/markdown
diff options
context:
space:
mode:
Diffstat (limited to 'services/nuldoc/nuldoc-src/markdown')
-rw-r--r--services/nuldoc/nuldoc-src/markdown/document.ts75
-rw-r--r--services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts575
-rw-r--r--services/nuldoc/nuldoc-src/markdown/parse.ts47
-rw-r--r--services/nuldoc/nuldoc-src/markdown/to_html.ts499
4 files changed, 1196 insertions, 0 deletions
diff --git a/services/nuldoc/nuldoc-src/markdown/document.ts b/services/nuldoc/nuldoc-src/markdown/document.ts
new file mode 100644
index 0000000..1aee87b
--- /dev/null
+++ b/services/nuldoc/nuldoc-src/markdown/document.ts
@@ -0,0 +1,75 @@
+import type { Root as MdastRoot } from "mdast";
+import { join } from "@std/path";
+import { z } from "zod/mod.ts";
+import { Config } from "../config.ts";
+import { Element } from "../dom.ts";
+import { Revision, stringToDate } from "../revision.ts";
+import { mdast2ndoc } from "./mdast2ndoc.ts";
+
+export const PostMetadataSchema = z.object({
+ article: z.object({
+ uuid: z.string(),
+ title: z.string(),
+ description: z.string(),
+ tags: z.array(z.string()),
+ toc: z.boolean().optional(),
+ revisions: z.array(z.object({
+ date: z.string(),
+ remark: z.string(),
+ isInternal: z.boolean().optional(),
+ })),
+ }),
+});
+
+export type PostMetadata = z.infer<typeof PostMetadataSchema>;
+
+export type TocEntry = {
+ id: string;
+ text: string;
+ level: number;
+ children: TocEntry[];
+};
+
+export type TocRoot = {
+ entries: TocEntry[];
+};
+
+export type Document = {
+ root: Element;
+ sourceFilePath: string;
+ uuid: string;
+ link: string;
+ title: string;
+ description: string;
+ tags: string[];
+ revisions: Revision[];
+ toc?: TocRoot;
+ isTocEnabled: boolean;
+};
+
+export function createNewDocumentFromMdast(
+ root: MdastRoot,
+ meta: PostMetadata,
+ sourceFilePath: string,
+ config: Config,
+): Document {
+ const cwd = Deno.cwd();
+ const contentDir = join(cwd, config.locations.contentDir);
+ const link = sourceFilePath.replace(contentDir, "").replace(".xml", "/");
+ return {
+ root: mdast2ndoc(root),
+ sourceFilePath,
+ uuid: meta.article.uuid,
+ link: link,
+ title: meta.article.title,
+ description: meta.article.description,
+ tags: meta.article.tags,
+ revisions: meta.article.revisions.map((r, i) => ({
+ number: i,
+ date: stringToDate(r.date),
+ remark: r.remark,
+ isInternal: !!r.isInternal,
+ })),
+ isTocEnabled: meta.article.toc !== false,
+ };
+}
diff --git a/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts b/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts
new file mode 100644
index 0000000..367627c
--- /dev/null
+++ b/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts
@@ -0,0 +1,575 @@
+import type {
+ Blockquote,
+ Code,
+ Definition,
+ Delete,
+ Emphasis,
+ FootnoteDefinition,
+ FootnoteReference,
+ Heading,
+ Html,
+ Image,
+ InlineCode,
+ Link,
+ List,
+ ListItem,
+ Paragraph,
+ PhrasingContent,
+ Root,
+ RootContent,
+ Strong,
+ Table,
+ TableCell,
+ TableRow,
+ Text as MdastText,
+ ThematicBreak,
+} from "mdast";
+import type {
+ ContainerDirective,
+ LeafDirective,
+ TextDirective,
+} from "mdast-util-directive";
+import { elem, Element, Node, rawHTML, text } from "../dom.ts";
+
+type DirectiveNode = ContainerDirective | LeafDirective | TextDirective;
+
+function isDirective(node: RootContent): node is DirectiveNode {
+ return (
+ node.type === "containerDirective" ||
+ node.type === "leafDirective" ||
+ node.type === "textDirective"
+ );
+}
+
+// Extract section ID and attributes from heading if present
+// Supports syntax like {#id} or {#id attr="value"}
+function extractSectionId(
+ node: Heading,
+): {
+ id: string | null;
+ attributes: Record<string, string>;
+ children: Heading["children"];
+} {
+ if (node.children.length === 0) {
+ return { id: null, attributes: {}, children: node.children };
+ }
+
+ const lastChild = node.children[node.children.length - 1];
+ if (lastChild && lastChild.type === "text") {
+ // Match {#id ...} or {#id attr="value" ...}
+ const match = lastChild.value.match(/\s*\{#([^\s}]+)([^}]*)\}\s*$/);
+ if (match) {
+ const id = match[1];
+ const attrString = match[2].trim();
+ const attributes: Record<string, string> = {};
+
+ // Parse attributes like toc="false" (supports smart quotes too)
+ // U+0022 = ", U+201C = ", U+201D = "
+ const attrRegex =
+ /(\w+)=["\u201c\u201d]([^"\u201c\u201d]*)["\u201c\u201d]/g;
+ let attrMatch;
+ while ((attrMatch = attrRegex.exec(attrString)) !== null) {
+ attributes[attrMatch[1]] = attrMatch[2];
+ }
+
+ const newValue = lastChild.value.replace(/\s*\{#[^}]+\}\s*$/, "");
+ if (newValue === "") {
+ return { id, attributes, children: node.children.slice(0, -1) };
+ } else {
+ const newChildren = [...node.children];
+ newChildren[newChildren.length - 1] = { ...lastChild, value: newValue };
+ return { id, attributes, children: newChildren };
+ }
+ }
+ }
+
+ return { id: null, attributes: {}, children: node.children };
+}
+
+function processBlock(node: RootContent): Element | Element[] | null {
+ switch (node.type) {
+ case "heading":
+ // Headings are handled specially in mdast2ndoc
+ return null;
+ case "paragraph":
+ return processParagraph(node);
+ case "thematicBreak":
+ return processThematicBreak(node);
+ case "blockquote":
+ return processBlockquote(node);
+ case "code":
+ return processCode(node);
+ case "list":
+ return processList(node);
+ case "table":
+ return processTable(node);
+ case "html":
+ return processHtmlBlock(node);
+ case "definition":
+ return processDefinition(node);
+ case "footnoteDefinition":
+ return processFootnoteDefinition(node);
+ default:
+ if (isDirective(node)) {
+ return processDirective(node);
+ }
+ return null;
+ }
+}
+
+function processParagraph(node: Paragraph): Element {
+ return elem("p", {}, ...node.children.map(processInline));
+}
+
+function processThematicBreak(_node: ThematicBreak): Element {
+ return elem("hr", {});
+}
+
+function processBlockquote(node: Blockquote): Element {
+ const children: Node[] = [];
+ for (const child of node.children) {
+ const result = processBlock(child);
+ if (result) {
+ if (Array.isArray(result)) {
+ children.push(...result);
+ } else {
+ children.push(result);
+ }
+ }
+ }
+ return elem("blockquote", {}, ...children);
+}
+
+function processCode(node: Code): Element {
+ const attributes: Record<string, string> = {};
+
+ if (node.lang) {
+ attributes.language = node.lang;
+ }
+
+ // Parse meta string for filename and numbered attributes
+ if (node.meta) {
+ const filenameMatch = node.meta.match(/filename="([^"]+)"/);
+ if (filenameMatch) {
+ attributes.filename = filenameMatch[1];
+ }
+
+ if (node.meta.includes("numbered")) {
+ attributes.numbered = "true";
+ }
+ }
+
+ return elem("codeblock", attributes, text(node.value));
+}
+
+function processList(node: List): Element {
+ const attributes: Record<string, string> = {};
+ attributes.__tight = node.spread === false ? "true" : "false";
+
+ const isTaskList = node.children.some(
+ (item) => item.checked !== null && item.checked !== undefined,
+ );
+
+ if (isTaskList) {
+ attributes.type = "task";
+ }
+
+ if (node.ordered && node.start !== null && node.start !== 1) {
+ attributes.start = node.start!.toString();
+ }
+
+ const children = node.children.map((item) =>
+ processListItem(item, isTaskList)
+ );
+
+ return elem(node.ordered ? "ol" : "ul", attributes, ...children);
+}
+
+function processListItem(node: ListItem, isTaskList: boolean): Element {
+ const attributes: Record<string, string> = {};
+
+ if (isTaskList) {
+ attributes.checked = node.checked ? "true" : "false";
+ }
+
+ const children: Node[] = [];
+ for (const child of node.children) {
+ const result = processBlock(child);
+ if (result) {
+ if (Array.isArray(result)) {
+ children.push(...result);
+ } else {
+ children.push(result);
+ }
+ }
+ }
+
+ return elem("li", attributes, ...children);
+}
+
+function processTable(node: Table): Element {
+ const tableElement = elem("table", {});
+ const headerRows: Element[] = [];
+ const bodyRows: Element[] = [];
+
+ node.children.forEach((row, rowIndex) => {
+ const rowElement = processTableRow(row, rowIndex === 0, node.align);
+ if (rowIndex === 0) {
+ headerRows.push(rowElement);
+ } else {
+ bodyRows.push(rowElement);
+ }
+ });
+
+ if (headerRows.length > 0) {
+ tableElement.children.push(elem("thead", undefined, ...headerRows));
+ }
+
+ if (bodyRows.length > 0) {
+ tableElement.children.push(elem("tbody", undefined, ...bodyRows));
+ }
+
+ return tableElement;
+}
+
+function processTableRow(
+ node: TableRow,
+ isHeader: boolean,
+ alignments: (string | null)[] | null | undefined,
+): Element {
+ const cells = node.children.map((cell, index) =>
+ processTableCell(cell, isHeader, alignments?.[index])
+ );
+ return elem("tr", {}, ...cells);
+}
+
+function processTableCell(
+ node: TableCell,
+ isHeader: boolean,
+ alignment: string | null | undefined,
+): Element {
+ const attributes: Record<string, string> = {};
+ if (alignment && alignment !== "none") {
+ attributes.align = alignment;
+ }
+
+ return elem(
+ isHeader ? "th" : "td",
+ attributes,
+ ...node.children.map(processInline),
+ );
+}
+
+function processHtmlBlock(node: Html): Element {
+ return elem("div", { class: "raw-html" }, rawHTML(node.value));
+}
+
+function processDefinition(_node: Definition): null {
+ // Link definitions are handled elsewhere
+ return null;
+}
+
+function processFootnoteDefinition(node: FootnoteDefinition): Element {
+ const children: Node[] = [];
+ for (const child of node.children) {
+ const result = processBlock(child);
+ if (result) {
+ if (Array.isArray(result)) {
+ children.push(...result);
+ } else {
+ children.push(result);
+ }
+ }
+ }
+ return elem("footnote", { id: node.identifier }, ...children);
+}
+
+function processDirective(node: DirectiveNode): Element | null {
+ const name = node.name;
+
+ if (name === "note" || name === "edit") {
+ const attributes: Record<string, string> = {};
+
+ // Copy directive attributes
+ if (node.attributes) {
+ for (const [key, value] of Object.entries(node.attributes)) {
+ if (value !== undefined && value !== null) {
+ attributes[key] = String(value);
+ }
+ }
+ }
+
+ const children: Node[] = [];
+ if ("children" in node && node.children) {
+ for (const child of node.children as RootContent[]) {
+ const result = processBlock(child);
+ if (result) {
+ if (Array.isArray(result)) {
+ children.push(...result);
+ } else {
+ children.push(result);
+ }
+ }
+ }
+ }
+
+ return elem("note", attributes, ...children);
+ }
+
+ // For other directives, treat as div
+ const children: Node[] = [];
+ if ("children" in node && node.children) {
+ for (const child of node.children as RootContent[]) {
+ const result = processBlock(child);
+ if (result) {
+ if (Array.isArray(result)) {
+ children.push(...result);
+ } else {
+ children.push(result);
+ }
+ }
+ }
+ }
+
+ return elem(
+ "div",
+ node.attributes as Record<string, string> || {},
+ ...children,
+ );
+}
+
+function processInline(node: PhrasingContent): Node {
+ switch (node.type) {
+ case "text":
+ return processText(node);
+ case "emphasis":
+ return processEmphasis(node);
+ case "strong":
+ return processStrong(node);
+ case "inlineCode":
+ return processInlineCode(node);
+ case "link":
+ return processLink(node);
+ case "image":
+ return processImage(node);
+ case "delete":
+ return processDelete(node);
+ case "break":
+ return elem("br");
+ case "html":
+ return rawHTML(node.value);
+ case "footnoteReference":
+ return processFootnoteReference(node);
+ default:
+ // Handle any unexpected node types
+ if ("value" in node) {
+ return text(String(node.value));
+ }
+ if ("children" in node && Array.isArray(node.children)) {
+ return elem(
+ "span",
+ {},
+ ...node.children.map((c: PhrasingContent) => processInline(c)),
+ );
+ }
+ return text("");
+ }
+}
+
+function processText(node: MdastText): Node {
+ return text(node.value);
+}
+
+function processEmphasis(node: Emphasis): Element {
+ return elem("em", {}, ...node.children.map(processInline));
+}
+
+function processStrong(node: Strong): Element {
+ return elem("strong", {}, ...node.children.map(processInline));
+}
+
+function processInlineCode(node: InlineCode): Element {
+ return elem("code", {}, text(node.value));
+}
+
+function processLink(node: Link): Element {
+ const attributes: Record<string, string> = {};
+ if (node.url) {
+ attributes.href = node.url;
+ }
+ if (node.title) {
+ attributes.title = node.title;
+ }
+ // Detect autolinks (URL equals link text)
+ const isAutolink = node.children.length === 1 &&
+ node.children[0].type === "text" &&
+ node.children[0].value === node.url;
+ if (isAutolink) {
+ attributes.class = "url";
+ }
+ return elem("a", attributes, ...node.children.map(processInline));
+}
+
+function processImage(node: Image): Element {
+ const attributes: Record<string, string> = {};
+ if (node.url) {
+ attributes.src = node.url;
+ }
+ if (node.alt) {
+ attributes.alt = node.alt;
+ }
+ if (node.title) {
+ attributes.title = node.title;
+ }
+ return elem("img", attributes);
+}
+
+function processDelete(node: Delete): Element {
+ return elem("del", {}, ...node.children.map(processInline));
+}
+
+function processFootnoteReference(node: FootnoteReference): Element {
+ return elem("footnoteref", { reference: node.identifier });
+}
+
+// Build hierarchical section structure from flat mdast
+// This mimics Djot's section structure where headings create nested sections
+export function mdast2ndoc(root: Root): Element {
+ const footnotes: Element[] = [];
+ const nonFootnoteChildren: RootContent[] = [];
+
+ // Separate footnotes from other content
+ for (const child of root.children) {
+ if (child.type === "footnoteDefinition") {
+ const footnote = processFootnoteDefinition(child);
+ footnotes.push(footnote);
+ } else {
+ nonFootnoteChildren.push(child);
+ }
+ }
+
+ // Build hierarchical sections
+ const articleContent = buildSectionHierarchy(nonFootnoteChildren);
+
+ // Add footnotes section if any exist
+ if (footnotes.length > 0) {
+ const footnoteSection = elem(
+ "section",
+ { class: "footnotes" },
+ ...footnotes,
+ );
+ articleContent.push(footnoteSection);
+ }
+
+ return elem(
+ "__root__",
+ undefined,
+ elem("article", undefined, ...articleContent),
+ );
+}
+
+type SectionInfo = {
+ id: string | null;
+ attributes: Record<string, string>;
+ level: number;
+ heading: Element;
+ children: Node[];
+};
+
+function buildSectionHierarchy(nodes: RootContent[]): Node[] {
+ // Group nodes into sections based on headings
+ // Each heading starts a new section at its level
+ const result: Node[] = [];
+ const sectionStack: SectionInfo[] = [];
+
+ for (const node of nodes) {
+ if (node.type === "heading") {
+ const level = node.depth;
+ const { id, attributes, children } = extractSectionId(node);
+
+ // Create heading element
+ const headingElement = elem(
+ "h",
+ {},
+ ...children.map(processInline),
+ );
+
+ // Close sections that are at same or deeper level
+ while (
+ sectionStack.length > 0 &&
+ sectionStack[sectionStack.length - 1].level >= level
+ ) {
+ const closedSection = sectionStack.pop()!;
+ const sectionElement = createSectionElement(closedSection);
+
+ if (sectionStack.length > 0) {
+ // Add to parent section
+ sectionStack[sectionStack.length - 1].children.push(sectionElement);
+ } else {
+ // Add to result
+ result.push(sectionElement);
+ }
+ }
+
+ // Start new section
+ const newSection: SectionInfo = {
+ id,
+ attributes,
+ level,
+ heading: headingElement,
+ children: [],
+ };
+ sectionStack.push(newSection);
+ } else {
+ // Non-heading content
+ const processed = processBlock(node);
+ if (processed) {
+ if (sectionStack.length > 0) {
+ // Add to current section
+ if (Array.isArray(processed)) {
+ sectionStack[sectionStack.length - 1].children.push(...processed);
+ } else {
+ sectionStack[sectionStack.length - 1].children.push(processed);
+ }
+ } else {
+ // Content before any heading
+ if (Array.isArray(processed)) {
+ result.push(...processed);
+ } else {
+ result.push(processed);
+ }
+ }
+ }
+ }
+ }
+
+ // Close remaining sections
+ while (sectionStack.length > 0) {
+ const closedSection = sectionStack.pop()!;
+ const sectionElement = createSectionElement(closedSection);
+
+ if (sectionStack.length > 0) {
+ // Add to parent section
+ sectionStack[sectionStack.length - 1].children.push(sectionElement);
+ } else {
+ // Add to result
+ result.push(sectionElement);
+ }
+ }
+
+ return result;
+}
+
+function createSectionElement(sectionInfo: SectionInfo): Element {
+ const attributes: Record<string, string> = { ...sectionInfo.attributes };
+ if (sectionInfo.id) {
+ attributes.id = sectionInfo.id;
+ }
+
+ return elem(
+ "section",
+ attributes,
+ sectionInfo.heading,
+ ...sectionInfo.children,
+ );
+}
diff --git a/services/nuldoc/nuldoc-src/markdown/parse.ts b/services/nuldoc/nuldoc-src/markdown/parse.ts
new file mode 100644
index 0000000..c0875a2
--- /dev/null
+++ b/services/nuldoc/nuldoc-src/markdown/parse.ts
@@ -0,0 +1,47 @@
+import type { Root as MdastRoot } from "mdast";
+import { unified } from "unified";
+import remarkParse from "remark-parse";
+import remarkGfm from "remark-gfm";
+import remarkDirective from "remark-directive";
+import remarkSmartypants from "remark-smartypants";
+import { parse as parseToml } from "@std/toml";
+import { Config } from "../config.ts";
+import {
+ createNewDocumentFromMdast,
+ Document,
+ PostMetadata,
+ PostMetadataSchema,
+} from "./document.ts";
+import toHtml from "./to_html.ts";
+
+export async function parseMarkdownFile(
+ filePath: string,
+ config: Config,
+): Promise<Document> {
+ try {
+ const fileContent = await Deno.readTextFile(filePath);
+ const [, frontmatter, ...rest] = fileContent.split(/^---$/m);
+ const meta = parseMetadata(frontmatter);
+ const content = rest.join("---");
+
+ const processor = unified()
+ .use(remarkParse)
+ .use(remarkGfm)
+ .use(remarkDirective)
+ .use(remarkSmartypants);
+
+ const root = await processor.run(processor.parse(content)) as MdastRoot;
+
+ const doc = createNewDocumentFromMdast(root, meta, filePath, config);
+ return await toHtml(doc);
+ } catch (e) {
+ if (e instanceof Error) {
+ e.message = `${e.message} in ${filePath}`;
+ }
+ throw e;
+ }
+}
+
+function parseMetadata(s: string): PostMetadata {
+ return PostMetadataSchema.parse(parseToml(s));
+}
diff --git a/services/nuldoc/nuldoc-src/markdown/to_html.ts b/services/nuldoc/nuldoc-src/markdown/to_html.ts
new file mode 100644
index 0000000..8219b74
--- /dev/null
+++ b/services/nuldoc/nuldoc-src/markdown/to_html.ts
@@ -0,0 +1,499 @@
+import { BundledLanguage, bundledLanguages, codeToHtml } from "shiki";
+import { Document, TocEntry } from "./document.ts";
+import { NuldocError } from "../errors.ts";
+import {
+ addClass,
+ elem,
+ Element,
+ forEachChild,
+ forEachChildRecursively,
+ forEachChildRecursivelyAsync,
+ forEachElementOfType,
+ innerText,
+ Node,
+ processTextNodesInElement,
+ RawHTML,
+ rawHTML,
+ Text,
+ text,
+} from "../dom.ts";
+
+export default async function toHtml(doc: Document): Promise<Document> {
+ mergeConsecutiveTextNodes(doc);
+ removeUnnecessaryTextNode(doc);
+ transformLinkLikeToAnchorElement(doc);
+ transformSectionIdAttribute(doc);
+ setSectionTitleAnchor(doc);
+ transformSectionTitleElement(doc);
+ transformNoteElement(doc);
+ addAttributesToExternalLinkElement(doc);
+ traverseFootnotes(doc);
+ removeUnnecessaryParagraphNode(doc);
+ await transformAndHighlightCodeBlockElement(doc);
+ mergeConsecutiveTextNodes(doc);
+ generateTableOfContents(doc);
+ removeTocAttributes(doc);
+ return doc;
+}
+
+function mergeConsecutiveTextNodes(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element") {
+ return;
+ }
+
+ const newChildren: Node[] = [];
+ let currentTextContent = "";
+
+ for (const child of n.children) {
+ if (child.kind === "text") {
+ currentTextContent += child.content;
+ } else {
+ if (currentTextContent !== "") {
+ newChildren.push(text(currentTextContent));
+ currentTextContent = "";
+ }
+ newChildren.push(child);
+ }
+ }
+
+ if (currentTextContent !== "") {
+ newChildren.push(text(currentTextContent));
+ }
+
+ n.children = newChildren;
+ });
+}
+
+function removeUnnecessaryTextNode(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element") {
+ return;
+ }
+
+ let changed = true;
+ while (changed) {
+ changed = false;
+ if (n.children.length === 0) {
+ break;
+ }
+ const firstChild = n.children[0];
+ if (firstChild.kind === "text" && firstChild.content.trim() === "") {
+ n.children.shift();
+ changed = true;
+ }
+ if (n.children.length === 0) {
+ break;
+ }
+ const lastChild = n.children[n.children.length - 1];
+ if (lastChild.kind === "text" && lastChild.content.trim() === "") {
+ n.children.pop();
+ changed = true;
+ }
+ }
+ });
+}
+
+function transformLinkLikeToAnchorElement(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (
+ n.kind !== "element" || n.name === "a" || n.name === "code" ||
+ n.name === "codeblock"
+ ) {
+ return;
+ }
+
+ processTextNodesInElement(n, (content) => {
+ const nodes: Node[] = [];
+ let restContent = content;
+ while (restContent !== "") {
+ const match = /^(.*?)(https?:\/\/[^ \n]+)(.*)$/s.exec(restContent);
+ if (!match) {
+ nodes.push(text(restContent));
+ restContent = "";
+ break;
+ }
+ const [_, prefix, url, suffix] = match;
+ nodes.push(text(prefix));
+ nodes.push(elem("a", { href: url, class: "url" }, text(url)));
+ restContent = suffix;
+ }
+ return nodes;
+ });
+ });
+}
+
+function transformSectionIdAttribute(doc: Document) {
+ const sectionStack: string[] = [];
+ const usedIds = new Set<string>();
+
+ const processNode = (n: Node) => {
+ if (n.kind !== "element") {
+ return;
+ }
+
+ if (n.name === "section") {
+ const idAttr = n.attributes.id;
+ if (!idAttr) {
+ return;
+ }
+
+ let newId: string;
+ if (sectionStack.length === 0) {
+ newId = `section--${idAttr}`;
+ } else {
+ newId = `section--${sectionStack.join("--")}--${idAttr}`;
+ }
+
+ if (usedIds.has(newId)) {
+ throw new NuldocError(
+ `[nuldoc.tohtml] Duplicate section ID: ${newId}`,
+ );
+ }
+
+ usedIds.add(newId);
+ n.attributes.id = newId;
+ sectionStack.push(idAttr);
+
+ forEachChild(n, processNode);
+
+ sectionStack.pop();
+ } else {
+ forEachChild(n, processNode);
+ }
+ };
+
+ forEachChild(doc.root, processNode);
+}
+
+function setSectionTitleAnchor(doc: Document) {
+ const sectionStack: Element[] = [];
+ const g = (c: Node) => {
+ if (c.kind !== "element") {
+ return;
+ }
+
+ if (c.name === "section") {
+ sectionStack.push(c);
+ }
+ forEachChild(c, g);
+ if (c.name === "section") {
+ sectionStack.pop();
+ }
+ if (c.name === "h") {
+ const currentSection = sectionStack[sectionStack.length - 1];
+ if (!currentSection) {
+ throw new NuldocError(
+ "[nuldoc.tohtml] <h> element must be inside <section>",
+ );
+ }
+ const sectionId = currentSection.attributes.id;
+ const aElement = elem("a", undefined, ...c.children);
+ aElement.attributes.href = `#${sectionId}`;
+ c.children = [aElement];
+ }
+ };
+ forEachChild(doc.root, g);
+}
+
+function transformSectionTitleElement(doc: Document) {
+ let sectionLevel = 1;
+ const g = (c: Node) => {
+ if (c.kind !== "element") {
+ return;
+ }
+
+ if (c.name === "section") {
+ sectionLevel += 1;
+ c.attributes.__sectionLevel = sectionLevel.toString();
+ }
+ forEachChild(c, g);
+ if (c.name === "section") {
+ sectionLevel -= 1;
+ }
+ if (c.name === "h") {
+ c.name = `h${sectionLevel}`;
+ }
+ };
+ forEachChild(doc.root, g);
+}
+
+function transformNoteElement(doc: Document) {
+ forEachElementOfType(doc.root, "note", (n) => {
+ const editatAttr = n.attributes?.editat;
+ const operationAttr = n.attributes?.operation;
+ const isEditBlock = editatAttr && operationAttr;
+
+ const labelElement = elem(
+ "div",
+ { class: "admonition-label" },
+ text(isEditBlock ? `${editatAttr} ${operationAttr}` : "NOTE"),
+ );
+ const contentElement = elem(
+ "div",
+ { class: "admonition-content" },
+ ...n.children,
+ );
+ n.name = "div";
+ addClass(n, "admonition");
+ n.children = [labelElement, contentElement];
+ });
+}
+
+function addAttributesToExternalLinkElement(doc: Document) {
+ forEachElementOfType(doc.root, "a", (n) => {
+ const href = n.attributes.href ?? "";
+ if (!href.startsWith("http")) {
+ return;
+ }
+ n.attributes.target = "_blank";
+ n.attributes.rel = "noreferrer";
+ });
+}
+
+function traverseFootnotes(doc: Document) {
+ let footnoteCounter = 0;
+ const footnoteMap = new Map<string, number>();
+
+ forEachElementOfType(doc.root, "footnoteref", (n) => {
+ const reference = n.attributes.reference;
+ if (!reference) {
+ return;
+ }
+
+ let footnoteNumber: number;
+ if (footnoteMap.has(reference)) {
+ footnoteNumber = footnoteMap.get(reference)!;
+ } else {
+ footnoteNumber = ++footnoteCounter;
+ footnoteMap.set(reference, footnoteNumber);
+ }
+
+ n.name = "sup";
+ delete n.attributes.reference;
+ n.attributes.class = "footnote";
+ n.children = [
+ elem(
+ "a",
+ {
+ id: `footnoteref--${reference}`,
+ class: "footnote",
+ href: `#footnote--${reference}`,
+ },
+ text(`[${footnoteNumber}]`),
+ ),
+ ];
+ });
+
+ forEachElementOfType(doc.root, "footnote", (n) => {
+ const id = n.attributes.id;
+ if (!id || !footnoteMap.has(id)) {
+ n.name = "span";
+ n.children = [];
+ return;
+ }
+
+ const footnoteNumber = footnoteMap.get(id)!;
+
+ n.name = "div";
+ delete n.attributes.id;
+ n.attributes.class = "footnote";
+ n.attributes.id = `footnote--${id}`;
+
+ n.children = [
+ elem(
+ "a",
+ { href: `#footnoteref--${id}` },
+ text(`${footnoteNumber}. `),
+ ),
+ ...n.children,
+ ];
+ });
+}
+
+function removeUnnecessaryParagraphNode(doc: Document) {
+ forEachChildRecursively(doc.root, (n) => {
+ if (n.kind !== "element" || (n.name !== "ul" && n.name !== "ol")) {
+ return;
+ }
+
+ const isTight = n.attributes.__tight === "true";
+ if (!isTight) {
+ return;
+ }
+
+ for (const child of n.children) {
+ if (child.kind !== "element" || child.name !== "li") {
+ continue;
+ }
+ const newGrandChildren: Node[] = [];
+ for (const grandChild of child.children) {
+ if (grandChild.kind === "element" && grandChild.name === "p") {
+ newGrandChildren.push(...grandChild.children);
+ } else {
+ newGrandChildren.push(grandChild);
+ }
+ }
+ child.children = newGrandChildren;
+ }
+ });
+}
+
+async function transformAndHighlightCodeBlockElement(doc: Document) {
+ await forEachChildRecursivelyAsync(doc.root, async (n) => {
+ if (n.kind !== "element" || n.name !== "codeblock") {
+ return;
+ }
+
+ const language = n.attributes.language || "text";
+ const filename = n.attributes.filename;
+ const numbered = n.attributes.numbered;
+ const sourceCodeNode = n.children[0] as Text | RawHTML;
+ const sourceCode = sourceCodeNode.kind === "text"
+ ? sourceCodeNode.content.trimEnd()
+ : sourceCodeNode.html.trimEnd();
+
+ const highlighted = await codeToHtml(sourceCode, {
+ lang: language in bundledLanguages ? language as BundledLanguage : "text",
+ theme: "github-light",
+ colorReplacements: {
+ "#fff": "#f5f5f5",
+ },
+ });
+
+ n.name = "div";
+ n.attributes.class = "codeblock";
+ delete n.attributes.language;
+
+ if (numbered === "true") {
+ delete n.attributes.numbered;
+ addClass(n, "numbered");
+ }
+ if (filename) {
+ delete n.attributes.filename;
+
+ n.children = [
+ elem("div", { class: "filename" }, text(filename)),
+ rawHTML(highlighted),
+ ];
+ } else {
+ if (sourceCodeNode.kind === "text") {
+ n.children[0] = rawHTML(highlighted);
+ } else {
+ sourceCodeNode.html = highlighted;
+ }
+ }
+ });
+}
+
+function generateTableOfContents(doc: Document) {
+ if (!doc.isTocEnabled) {
+ return;
+ }
+ const tocEntries: TocEntry[] = [];
+ const stack: TocEntry[] = [];
+ const excludedLevels: number[] = []; // Track levels to exclude
+
+ const processNode = (node: Node) => {
+ if (node.kind !== "element") {
+ return;
+ }
+
+ const match = node.name.match(/^h(\d+)$/);
+ if (match) {
+ const level = parseInt(match[1]);
+
+ let parentSection: Element | null = null;
+ const findParentSection = (n: Node, target: Node): Element | null => {
+ if (n.kind !== "element") return null;
+
+ for (const child of n.children) {
+ if (child === target && n.name === "section") {
+ return n;
+ }
+ const result = findParentSection(child, target);
+ if (result) return result;
+ }
+ return null;
+ };
+
+ parentSection = findParentSection(doc.root, node);
+ if (!parentSection) return;
+
+ // Check if this section has toc=false attribute
+ const tocAttribute = parentSection.attributes.toc;
+ if (tocAttribute === "false") {
+ // Add this level to excluded levels and remove deeper levels
+ excludedLevels.length = 0;
+ excludedLevels.push(level);
+ return;
+ }
+
+ // Check if this header should be excluded based on parent exclusion
+ const shouldExclude = excludedLevels.some((excludedLevel) =>
+ level > excludedLevel
+ );
+ if (shouldExclude) {
+ return;
+ }
+
+ // Clean up excluded levels that are now at same or deeper level
+ while (
+ excludedLevels.length > 0 &&
+ excludedLevels[excludedLevels.length - 1] >= level
+ ) {
+ excludedLevels.pop();
+ }
+
+ const sectionId = parentSection.attributes.id;
+ if (!sectionId) return;
+
+ let headingText = "";
+ for (const child of node.children) {
+ if (child.kind === "element" && child.name === "a") {
+ headingText = innerText(child);
+ }
+ }
+
+ const entry: TocEntry = {
+ id: sectionId,
+ text: headingText,
+ level: level,
+ children: [],
+ };
+
+ while (stack.length > 0 && stack[stack.length - 1].level >= level) {
+ stack.pop();
+ }
+
+ if (stack.length === 0) {
+ tocEntries.push(entry);
+ } else {
+ stack[stack.length - 1].children.push(entry);
+ }
+
+ stack.push(entry);
+ }
+
+ forEachChild(node, processNode);
+ };
+
+ forEachChild(doc.root, processNode);
+
+ // Don't generate TOC if there's only one top-level section with no children
+ if (tocEntries.length === 1 && tocEntries[0].children.length === 0) {
+ return;
+ }
+
+ doc.toc = {
+ entries: tocEntries,
+ };
+}
+
+function removeTocAttributes(doc: Document) {
+ forEachChildRecursively(doc.root, (node) => {
+ if (node.kind === "element" && node.name === "section") {
+ delete node.attributes.toc;
+ }
+ });
+}