From c754d24b162ecd504f3c4bdd8632045dd0398768 Mon Sep 17 00:00:00 2001 From: nsfisis Date: Thu, 27 Nov 2025 05:05:04 +0900 Subject: feat(nuldoc): Djot to Markdown --- services/nuldoc/nuldoc-src/commands/build.ts | 14 +- .../nuldoc-src/components/TableOfContents.ts | 2 +- services/nuldoc/nuldoc-src/djot/djot2ndoc.ts | 604 --------------------- services/nuldoc/nuldoc-src/djot/document.ts | 75 --- services/nuldoc/nuldoc-src/djot/parse.ts | 33 -- services/nuldoc/nuldoc-src/djot/to_html.ts | 499 ----------------- services/nuldoc/nuldoc-src/generators/post.ts | 4 +- services/nuldoc/nuldoc-src/markdown/document.ts | 75 +++ services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts | 575 ++++++++++++++++++++ services/nuldoc/nuldoc-src/markdown/parse.ts | 47 ++ services/nuldoc/nuldoc-src/markdown/to_html.ts | 499 +++++++++++++++++ services/nuldoc/nuldoc-src/pages/PostPage.ts | 2 +- 12 files changed, 1207 insertions(+), 1222 deletions(-) delete mode 100644 services/nuldoc/nuldoc-src/djot/djot2ndoc.ts delete mode 100644 services/nuldoc/nuldoc-src/djot/document.ts delete mode 100644 services/nuldoc/nuldoc-src/djot/parse.ts delete mode 100644 services/nuldoc/nuldoc-src/djot/to_html.ts create mode 100644 services/nuldoc/nuldoc-src/markdown/document.ts create mode 100644 services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts create mode 100644 services/nuldoc/nuldoc-src/markdown/parse.ts create mode 100644 services/nuldoc/nuldoc-src/markdown/to_html.ts (limited to 'services/nuldoc/nuldoc-src') diff --git a/services/nuldoc/nuldoc-src/commands/build.ts b/services/nuldoc/nuldoc-src/commands/build.ts index 5efff55..2b67d2b 100644 --- a/services/nuldoc/nuldoc-src/commands/build.ts +++ b/services/nuldoc/nuldoc-src/commands/build.ts @@ -2,7 +2,7 @@ import { dirname, join, joinGlobs, relative } from "@std/path"; import { ensureDir, expandGlob } from "@std/fs"; import { generateFeedPageFromEntries } from "../generators/atom.ts"; import { Config, getTagLabel } from "../config.ts"; -import { parseDjotFile } from "../djot/parse.ts"; +import { parseMarkdownFile } from "../markdown/parse.ts"; import { Page } from "../page.ts"; import { render } from "../render.ts"; import { dateToString } from "../revision.ts"; @@ -55,7 +55,7 @@ async function buildPostPages(config: Config): Promise { async function collectPostFiles(sourceDir: string): Promise { const filePaths = []; - const globPattern = joinGlobs([sourceDir, "**", "*.dj"]); + const globPattern = joinGlobs([sourceDir, "**", "*.md"]); for await (const entry of expandGlob(globPattern)) { filePaths.push(entry.path); } @@ -69,7 +69,7 @@ async function parsePosts( const posts = []; for (const postFile of postFiles) { posts.push( - await generatePostPage(await parseDjotFile(postFile, config), config), + await generatePostPage(await parseMarkdownFile(postFile, config), config), ); } return posts; @@ -265,9 +265,9 @@ async function copyBlogAssetFiles(config: Config) { for await (const { isFile, path } of expandGlob(globPattern)) { if (!isFile) continue; - // Skip .dj, .toml, .pdf files + // Skip .md, .toml, .pdf files if ( - path.endsWith(".dj") || + path.endsWith(".md") || path.endsWith(".toml") || path.endsWith(".pdf") ) { @@ -290,9 +290,9 @@ async function copySlidesAssetFiles(config: Config) { for await (const { isFile, path } of expandGlob(globPattern)) { if (!isFile) continue; - // Skip .dj, .toml, .pdf files + // Skip .md, .toml, .pdf files if ( - path.endsWith(".dj") || + path.endsWith(".md") || path.endsWith(".toml") || path.endsWith(".pdf") ) { diff --git a/services/nuldoc/nuldoc-src/components/TableOfContents.ts b/services/nuldoc/nuldoc-src/components/TableOfContents.ts index ac4205a..37796ff 100644 --- a/services/nuldoc/nuldoc-src/components/TableOfContents.ts +++ b/services/nuldoc/nuldoc-src/components/TableOfContents.ts @@ -1,4 +1,4 @@ -import { TocEntry, TocRoot } from "../djot/document.ts"; +import { TocEntry, TocRoot } from "../markdown/document.ts"; import { elem, Element } from "../dom.ts"; type Props = { diff --git a/services/nuldoc/nuldoc-src/djot/djot2ndoc.ts b/services/nuldoc/nuldoc-src/djot/djot2ndoc.ts deleted file mode 100644 index 627e8d6..0000000 --- a/services/nuldoc/nuldoc-src/djot/djot2ndoc.ts +++ /dev/null @@ -1,604 +0,0 @@ -import { - Block as DjotBlock, - BlockQuote as DjotBlockQuote, - BulletList as DjotBulletList, - CodeBlock as DjotCodeBlock, - Definition as DjotDefinition, - DefinitionList as DjotDefinitionList, - DefinitionListItem as DjotDefinitionListItem, - Delete as DjotDelete, - DisplayMath as DjotDisplayMath, - Div as DjotDiv, - Doc as DjotDoc, - DoubleQuoted as DjotDoubleQuoted, - Email as DjotEmail, - Emph as DjotEmph, - FootnoteReference as DjotFootnoteReference, - HardBreak as DjotHardBreak, - Heading as DjotHeading, - Image as DjotImage, - Inline as DjotInline, - InlineMath as DjotInlineMath, - Insert as DjotInsert, - Link as DjotLink, - ListItem as DjotListItem, - Mark as DjotMark, - NonBreakingSpace as DjotNonBreakingSpace, - OrderedList as DjotOrderedList, - Para as DjotPara, - RawBlock as DjotRawBlock, - RawInline as DjotRawInline, - Section as DjotSection, - SingleQuoted as DjotSingleQuoted, - SmartPunctuation as DjotSmartPunctuation, - SoftBreak as DjotSoftBreak, - Span as DjotSpan, - Str as DjotStr, - Strong as DjotStrong, - Subscript as DjotSubscript, - Superscript as DjotSuperscript, - Symb as DjotSymb, - Table as DjotTable, - TaskList as DjotTaskList, - TaskListItem as DjotTaskListItem, - Term as DjotTerm, - ThematicBreak as DjotThematicBreak, - Url as DjotUrl, - Verbatim as DjotVerbatim, -} from "@djot/djot"; -import { addClass, elem, Element, Node, rawHTML, text } from "../dom.ts"; - -function processBlock(node: DjotBlock): Element { - switch (node.tag) { - case "section": - return processSection(node); - case "para": - return processPara(node); - case "heading": - return processHeading(node); - case "thematic_break": - return processThematicBreak(node); - case "block_quote": - return processBlockQuote(node); - case "code_block": - return processCodeBlock(node); - case "bullet_list": - return processBulletList(node); - case "ordered_list": - return processOrderedList(node); - case "task_list": - return processTaskList(node); - case "definition_list": - return processDefinitionList(node); - case "table": - return processTable(node); - case "div": - return processDiv(node); - case "raw_block": - return processRawBlock(node); - } -} - -function processSection(node: DjotSection): Element { - return elem( - "section", - node.attributes, - ...node.children.map(processBlock), - ); -} - -function processPara(node: DjotPara): Element { - return elem( - "p", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processHeading(node: DjotHeading): Element { - return elem("h", node.attributes, ...node.children.map(processInline)); -} - -function processThematicBreak(node: DjotThematicBreak): Element { - return elem("hr", node.attributes); -} - -function processBlockQuote(node: DjotBlockQuote): Element { - return elem( - "blockquote", - node.attributes, - ...node.children.map(processBlock), - ); -} - -function processCodeBlock(node: DjotCodeBlock): Element { - const attributes = node.attributes || {}; - if (node.lang) { - attributes.language = node.lang; - } - if (node.attributes?.filename) { - attributes.filename = node.attributes.filename; - } - if (node.attributes?.numbered) { - attributes.numbered = "true"; - } - return elem("codeblock", attributes, text(node.text)); -} - -function processBulletList(node: DjotBulletList): Element { - const attributes = node.attributes || {}; - attributes.__tight = node.tight ? "true" : "false"; - return elem("ul", attributes, ...node.children.map(processListItem)); -} - -function processOrderedList(node: DjotOrderedList): Element { - const attributes = node.attributes || {}; - attributes.__tight = node.tight ? "true" : "false"; - if (node.start !== undefined && node.start !== 1) { - attributes.start = node.start.toString(); - } - return elem("ol", attributes, ...node.children.map(processListItem)); -} - -function processTaskList(node: DjotTaskList): Element { - const attributes = node.attributes || {}; - attributes.type = "task"; - attributes.__tight = node.tight ? "true" : "false"; - return elem("ul", attributes, ...node.children.map(processTaskListItem)); -} - -function processListItem(node: DjotListItem): Element { - return elem( - "li", - node.attributes, - ...node.children.map(processBlock), - ); -} - -function processTaskListItem(node: DjotTaskListItem): Element { - const attributes = node.attributes || {}; - attributes.checked = node.checkbox === "checked" ? "true" : "false"; - return elem("li", attributes, ...node.children.map(processBlock)); -} - -function processDefinitionList(node: DjotDefinitionList): Element { - return elem( - "dl", - node.attributes, - ...node.children.flatMap(processDefinitionListItem), - ); -} - -function processDefinitionListItem(node: DjotDefinitionListItem): Element[] { - return [ - processTerm(node.children[0]), - processDefinition(node.children[1]), - ]; -} - -function processTerm(node: DjotTerm): Element { - return elem( - "dt", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processDefinition(node: DjotDefinition): Element { - return elem( - "dd", - node.attributes, - ...node.children.map(processBlock), - ); -} - -function processTable(node: DjotTable): Element { - // Tables in Djot have a caption as first child and then rows - // For now, we'll create a basic table structure and ignore caption - const tableElement = elem("table", node.attributes); - - // Process caption if it exists (first child) - if (node.children.length > 0 && node.children[0].tag === "caption") { - const caption = elem( - "caption", - undefined, - ...node.children[0].children.map(processInline), - ); - tableElement.children.push(caption); - } - - // Group rows into thead, tbody based on head property - const headerRows: Element[] = []; - const bodyRows: Element[] = []; - - // Start from index 1 to skip caption - for (let i = 1; i < node.children.length; i++) { - const row = node.children[i]; - if (row.tag === "row") { - const rowElement = elem( - "tr", - row.attributes, - ...row.children.map((cell) => { - const cellAttributes = cell.attributes || {}; - // Set alignment attribute if needed - if (cell.align !== "default") { - cellAttributes.align = cell.align; - } - return elem( - cell.head ? "th" : "td", - cellAttributes, - ...cell.children.map(processInline), - ); - }), - ); - - if (row.head) { - headerRows.push(rowElement); - } else { - bodyRows.push(rowElement); - } - } - } - - // Add thead and tbody if needed - if (headerRows.length > 0) { - tableElement.children.push(elem("thead", undefined, ...headerRows)); - } - - if (bodyRows.length > 0) { - tableElement.children.push(elem("tbody", undefined, ...bodyRows)); - } - - return tableElement; -} - -function processInline(node: DjotInline): Node { - switch (node.tag) { - case "str": - return processStr(node); - case "soft_break": - return processSoftBreak(node); - case "hard_break": - return processHardBreak(node); - case "verbatim": - return processVerbatim(node); - case "emph": - return processEmph(node); - case "strong": - return processStrong(node); - case "link": - return processLink(node); - case "image": - return processImage(node); - case "mark": - return processMark(node); - case "superscript": - return processSuperscript(node); - case "subscript": - return processSubscript(node); - case "insert": - return processInsert(node); - case "delete": - return processDelete(node); - case "email": - return processEmail(node); - case "footnote_reference": - return processFootnoteReference(node); - case "url": - return processUrl(node); - case "span": - return processSpan(node); - case "inline_math": - return processInlineMath(node); - case "display_math": - return processDisplayMath(node); - case "non_breaking_space": - return processNonBreakingSpace(node); - case "symb": - return processSymb(node); - case "raw_inline": - return processRawInline(node); - case "double_quoted": - return processDoubleQuoted(node); - case "single_quoted": - return processSingleQuoted(node); - case "smart_punctuation": - return processSmartPunctuation(node); - } -} - -function processStr(node: DjotStr): Node { - return text(node.text); -} - -function processSoftBreak(_node: DjotSoftBreak): Node { - return text("\n"); -} - -function processHardBreak(_node: DjotHardBreak): Node { - return elem("br"); -} - -function processVerbatim(node: DjotVerbatim): Element { - return elem("code", node.attributes, text(node.text)); -} - -function processEmph(node: DjotEmph): Element { - return elem( - "em", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processStrong(node: DjotStrong): Element { - return elem( - "strong", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processLink(node: DjotLink): Element { - const attributes = node.attributes || {}; - if (node.destination !== undefined) { - attributes.href = node.destination; - } - return elem("a", attributes, ...node.children.map(processInline)); -} - -function processImage(node: DjotImage): Element { - const attributes = node.attributes || {}; - if (node.destination !== undefined) { - attributes.src = node.destination; - } - - // Alt text is derived from children in Djot - const alt = node.children - .map((child) => { - if (child.tag === "str") { - return child.text; - } - return ""; - }) - .join(""); - - if (alt) { - attributes.alt = alt; - } - - return elem("img", attributes); -} - -function processMark(node: DjotMark): Element { - return elem( - "mark", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processSuperscript(node: DjotSuperscript): Element { - return elem( - "sup", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processSubscript(node: DjotSubscript): Element { - return elem( - "sub", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processInsert(node: DjotInsert): Element { - return elem( - "ins", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processDelete(node: DjotDelete): Element { - return elem( - "del", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processEmail(node: DjotEmail): Element { - return elem("email", node.attributes, text(node.text)); -} - -function processFootnoteReference(node: DjotFootnoteReference): Element { - return elem("footnoteref", { reference: node.text }); -} - -function processUrl(node: DjotUrl): Element { - const e = elem( - "a", - { - href: node.text, - ...node.attributes, - }, - text(node.text), - ); - addClass(e, "url"); - return e; -} - -function processSpan(node: DjotSpan): Element { - return elem( - "span", - node.attributes, - ...node.children.map(processInline), - ); -} - -function processInlineMath(node: DjotInlineMath): Element { - // For inline math, we'll wrap it in a span with a class - return elem( - "span", - { - class: "math inline", - ...node.attributes, - }, - text(node.text), - ); -} - -function processDisplayMath(node: DjotDisplayMath): Element { - // For display math, we'll wrap it in a div with a class - return elem( - "div", - { - class: "math display", - ...node.attributes, - }, - text(node.text), - ); -} - -function processNonBreakingSpace(_node: DjotNonBreakingSpace): Node { - return text("\u00A0"); // Unicode non-breaking space -} - -function processSymb(node: DjotSymb): Node { - // Map symbol aliases to their Unicode characters - const symbolMap: Record = { - "->": "→", - "<-": "←", - "<->": "↔", - "=>": "⇒", - "<=": "⇐", - "<=>": "⇔", - "--": "–", // en dash - "---": "—", // em dash - "...": "…", // ellipsis - // Add more symbol mappings as needed - }; - - const symbolText = symbolMap[node.alias] || node.alias; - - return text(symbolText); -} - -function processRawInline(node: DjotRawInline): Node { - // If the format is HTML, return as raw HTML - if (node.format === "html" || node.format === "HTML") { - return rawHTML(node.text); - } - - // For other formats, just return as text - return text(node.text); -} - -function processDoubleQuoted(node: DjotDoubleQuoted): Node { - const children = node.children.map(processInline); - const attributes = node.attributes || {}; - - if ( - children.length === 1 && children[0].kind === "text" && - Object.keys(attributes).length === 0 - ) { - const content = children[0].content; - return text(`\u201C${content}\u201D`); - } else { - return elem("span", node.attributes, ...children); - } -} - -function processSingleQuoted(node: DjotSingleQuoted): Node { - const children = node.children.map(processInline); - const attributes = node.attributes || {}; - - if ( - children.length === 1 && children[0].kind === "text" && - Object.keys(attributes).length === 0 - ) { - const content = children[0].content; - return text(`\u2018${content}\u2019`); - } else { - return elem("span", node.attributes, ...children); - } -} - -function processSmartPunctuation(node: DjotSmartPunctuation): Node { - // Map smart punctuation types to Unicode characters - const punctuationMap: Record = { - left_single_quote: "\u2018", // ' - right_single_quote: "\u2019", // ' - left_double_quote: "\u201C", // " - right_double_quote: "\u201D", // " - ellipses: "\u2026", // … - em_dash: "\u2014", // — - en_dash: "\u2013", // – - }; - - return text(punctuationMap[node.type] || node.text); -} - -function processDiv(node: DjotDiv): Element { - if (node.attributes?.class === "note") { - delete node.attributes.class; - return elem( - "note", - node.attributes, - ...node.children.map(processBlock), - ); - } - - if (node.attributes?.class === "edit") { - delete node.attributes.class; - return elem( - "note", - node.attributes, - ...node.children.map(processBlock), - ); - } - - return elem( - "div", - node.attributes, - ...node.children.map(processBlock), - ); -} - -function processRawBlock(node: DjotRawBlock): Element { - // If the format is HTML, wrap the HTML content in a div - if (node.format === "html" || node.format === "HTML") { - return elem("div", { class: "raw-html" }, rawHTML(node.text)); - } - - // For other formats, wrap in a pre tag - return elem("pre", { "data-format": node.format }, text(node.text)); -} - -export function djot2ndoc(doc: DjotDoc): Element { - const children: Node[] = []; - for (const child of doc.children) { - children.push(processBlock(child)); - } - - // Process footnotes if any exist - if (doc.footnotes && Object.keys(doc.footnotes).length > 0) { - const footnoteSection = elem("section", { class: "footnotes" }); - - for (const [id, footnote] of Object.entries(doc.footnotes)) { - const footnoteElement = elem( - "footnote", - { id }, - ...footnote.children.map(processBlock), - ); - footnoteSection.children.push(footnoteElement); - } - - children.push(footnoteSection); - } - - return elem("__root__", undefined, elem("article", undefined, ...children)); -} diff --git a/services/nuldoc/nuldoc-src/djot/document.ts b/services/nuldoc/nuldoc-src/djot/document.ts deleted file mode 100644 index 3e8cd92..0000000 --- a/services/nuldoc/nuldoc-src/djot/document.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { Doc as DjotDoc } from "@djot/djot"; -import { join } from "@std/path"; -import { z } from "zod/mod.ts"; -import { Config } from "../config.ts"; -import { Element } from "../dom.ts"; -import { Revision, stringToDate } from "../revision.ts"; -import { djot2ndoc } from "./djot2ndoc.ts"; - -export const PostMetadataSchema = z.object({ - article: z.object({ - uuid: z.string(), - title: z.string(), - description: z.string(), - tags: z.array(z.string()), - toc: z.boolean().optional(), - revisions: z.array(z.object({ - date: z.string(), - remark: z.string(), - isInternal: z.boolean().optional(), - })), - }), -}); - -export type PostMetadata = z.infer; - -export type TocEntry = { - id: string; - text: string; - level: number; - children: TocEntry[]; -}; - -export type TocRoot = { - entries: TocEntry[]; -}; - -export type Document = { - root: Element; - sourceFilePath: string; - uuid: string; - link: string; - title: string; - description: string; // TODO: should it be markup text? - tags: string[]; - revisions: Revision[]; - toc?: TocRoot; - isTocEnabled: boolean; -}; - -export function createNewDocumentFromDjotDocument( - root: DjotDoc, - meta: PostMetadata, - sourceFilePath: string, - config: Config, -): Document { - const cwd = Deno.cwd(); - const contentDir = join(cwd, config.locations.contentDir); - const link = sourceFilePath.replace(contentDir, "").replace(".xml", "/"); - return { - root: djot2ndoc(root), - sourceFilePath, - uuid: meta.article.uuid, - link: link, - title: meta.article.title, - description: meta.article.description, - tags: meta.article.tags, - revisions: meta.article.revisions.map((r, i) => ({ - number: i, - date: stringToDate(r.date), - remark: r.remark, - isInternal: !!r.isInternal, - })), - isTocEnabled: meta.article.toc !== false, - }; -} diff --git a/services/nuldoc/nuldoc-src/djot/parse.ts b/services/nuldoc/nuldoc-src/djot/parse.ts deleted file mode 100644 index c79a670..0000000 --- a/services/nuldoc/nuldoc-src/djot/parse.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { parse as parseDjot } from "@djot/djot"; -import { parse as parseToml } from "@std/toml"; -import { Config } from "../config.ts"; -import { - createNewDocumentFromDjotDocument, - Document, - PostMetadata, - PostMetadataSchema, -} from "./document.ts"; -import toHtml from "./to_html.ts"; - -export async function parseDjotFile( - filePath: string, - config: Config, -): Promise { - try { - const fileContent = await Deno.readTextFile(filePath); - const [, frontmatter, ...rest] = fileContent.split(/^---$/m); - const meta = parseMetadata(frontmatter); - const root = parseDjot(rest.join("\n")); - const doc = createNewDocumentFromDjotDocument(root, meta, filePath, config); - return await toHtml(doc); - } catch (e) { - if (e instanceof Error) { - e.message = `${e.message} in ${filePath}`; - } - throw e; - } -} - -function parseMetadata(s: string): PostMetadata { - return PostMetadataSchema.parse(parseToml(s)); -} diff --git a/services/nuldoc/nuldoc-src/djot/to_html.ts b/services/nuldoc/nuldoc-src/djot/to_html.ts deleted file mode 100644 index 8219b74..0000000 --- a/services/nuldoc/nuldoc-src/djot/to_html.ts +++ /dev/null @@ -1,499 +0,0 @@ -import { BundledLanguage, bundledLanguages, codeToHtml } from "shiki"; -import { Document, TocEntry } from "./document.ts"; -import { NuldocError } from "../errors.ts"; -import { - addClass, - elem, - Element, - forEachChild, - forEachChildRecursively, - forEachChildRecursivelyAsync, - forEachElementOfType, - innerText, - Node, - processTextNodesInElement, - RawHTML, - rawHTML, - Text, - text, -} from "../dom.ts"; - -export default async function toHtml(doc: Document): Promise { - mergeConsecutiveTextNodes(doc); - removeUnnecessaryTextNode(doc); - transformLinkLikeToAnchorElement(doc); - transformSectionIdAttribute(doc); - setSectionTitleAnchor(doc); - transformSectionTitleElement(doc); - transformNoteElement(doc); - addAttributesToExternalLinkElement(doc); - traverseFootnotes(doc); - removeUnnecessaryParagraphNode(doc); - await transformAndHighlightCodeBlockElement(doc); - mergeConsecutiveTextNodes(doc); - generateTableOfContents(doc); - removeTocAttributes(doc); - return doc; -} - -function mergeConsecutiveTextNodes(doc: Document) { - forEachChildRecursively(doc.root, (n) => { - if (n.kind !== "element") { - return; - } - - const newChildren: Node[] = []; - let currentTextContent = ""; - - for (const child of n.children) { - if (child.kind === "text") { - currentTextContent += child.content; - } else { - if (currentTextContent !== "") { - newChildren.push(text(currentTextContent)); - currentTextContent = ""; - } - newChildren.push(child); - } - } - - if (currentTextContent !== "") { - newChildren.push(text(currentTextContent)); - } - - n.children = newChildren; - }); -} - -function removeUnnecessaryTextNode(doc: Document) { - forEachChildRecursively(doc.root, (n) => { - if (n.kind !== "element") { - return; - } - - let changed = true; - while (changed) { - changed = false; - if (n.children.length === 0) { - break; - } - const firstChild = n.children[0]; - if (firstChild.kind === "text" && firstChild.content.trim() === "") { - n.children.shift(); - changed = true; - } - if (n.children.length === 0) { - break; - } - const lastChild = n.children[n.children.length - 1]; - if (lastChild.kind === "text" && lastChild.content.trim() === "") { - n.children.pop(); - changed = true; - } - } - }); -} - -function transformLinkLikeToAnchorElement(doc: Document) { - forEachChildRecursively(doc.root, (n) => { - if ( - n.kind !== "element" || n.name === "a" || n.name === "code" || - n.name === "codeblock" - ) { - return; - } - - processTextNodesInElement(n, (content) => { - const nodes: Node[] = []; - let restContent = content; - while (restContent !== "") { - const match = /^(.*?)(https?:\/\/[^ \n]+)(.*)$/s.exec(restContent); - if (!match) { - nodes.push(text(restContent)); - restContent = ""; - break; - } - const [_, prefix, url, suffix] = match; - nodes.push(text(prefix)); - nodes.push(elem("a", { href: url, class: "url" }, text(url))); - restContent = suffix; - } - return nodes; - }); - }); -} - -function transformSectionIdAttribute(doc: Document) { - const sectionStack: string[] = []; - const usedIds = new Set(); - - const processNode = (n: Node) => { - if (n.kind !== "element") { - return; - } - - if (n.name === "section") { - const idAttr = n.attributes.id; - if (!idAttr) { - return; - } - - let newId: string; - if (sectionStack.length === 0) { - newId = `section--${idAttr}`; - } else { - newId = `section--${sectionStack.join("--")}--${idAttr}`; - } - - if (usedIds.has(newId)) { - throw new NuldocError( - `[nuldoc.tohtml] Duplicate section ID: ${newId}`, - ); - } - - usedIds.add(newId); - n.attributes.id = newId; - sectionStack.push(idAttr); - - forEachChild(n, processNode); - - sectionStack.pop(); - } else { - forEachChild(n, processNode); - } - }; - - forEachChild(doc.root, processNode); -} - -function setSectionTitleAnchor(doc: Document) { - const sectionStack: Element[] = []; - const g = (c: Node) => { - if (c.kind !== "element") { - return; - } - - if (c.name === "section") { - sectionStack.push(c); - } - forEachChild(c, g); - if (c.name === "section") { - sectionStack.pop(); - } - if (c.name === "h") { - const currentSection = sectionStack[sectionStack.length - 1]; - if (!currentSection) { - throw new NuldocError( - "[nuldoc.tohtml] element must be inside
", - ); - } - const sectionId = currentSection.attributes.id; - const aElement = elem("a", undefined, ...c.children); - aElement.attributes.href = `#${sectionId}`; - c.children = [aElement]; - } - }; - forEachChild(doc.root, g); -} - -function transformSectionTitleElement(doc: Document) { - let sectionLevel = 1; - const g = (c: Node) => { - if (c.kind !== "element") { - return; - } - - if (c.name === "section") { - sectionLevel += 1; - c.attributes.__sectionLevel = sectionLevel.toString(); - } - forEachChild(c, g); - if (c.name === "section") { - sectionLevel -= 1; - } - if (c.name === "h") { - c.name = `h${sectionLevel}`; - } - }; - forEachChild(doc.root, g); -} - -function transformNoteElement(doc: Document) { - forEachElementOfType(doc.root, "note", (n) => { - const editatAttr = n.attributes?.editat; - const operationAttr = n.attributes?.operation; - const isEditBlock = editatAttr && operationAttr; - - const labelElement = elem( - "div", - { class: "admonition-label" }, - text(isEditBlock ? `${editatAttr} ${operationAttr}` : "NOTE"), - ); - const contentElement = elem( - "div", - { class: "admonition-content" }, - ...n.children, - ); - n.name = "div"; - addClass(n, "admonition"); - n.children = [labelElement, contentElement]; - }); -} - -function addAttributesToExternalLinkElement(doc: Document) { - forEachElementOfType(doc.root, "a", (n) => { - const href = n.attributes.href ?? ""; - if (!href.startsWith("http")) { - return; - } - n.attributes.target = "_blank"; - n.attributes.rel = "noreferrer"; - }); -} - -function traverseFootnotes(doc: Document) { - let footnoteCounter = 0; - const footnoteMap = new Map(); - - forEachElementOfType(doc.root, "footnoteref", (n) => { - const reference = n.attributes.reference; - if (!reference) { - return; - } - - let footnoteNumber: number; - if (footnoteMap.has(reference)) { - footnoteNumber = footnoteMap.get(reference)!; - } else { - footnoteNumber = ++footnoteCounter; - footnoteMap.set(reference, footnoteNumber); - } - - n.name = "sup"; - delete n.attributes.reference; - n.attributes.class = "footnote"; - n.children = [ - elem( - "a", - { - id: `footnoteref--${reference}`, - class: "footnote", - href: `#footnote--${reference}`, - }, - text(`[${footnoteNumber}]`), - ), - ]; - }); - - forEachElementOfType(doc.root, "footnote", (n) => { - const id = n.attributes.id; - if (!id || !footnoteMap.has(id)) { - n.name = "span"; - n.children = []; - return; - } - - const footnoteNumber = footnoteMap.get(id)!; - - n.name = "div"; - delete n.attributes.id; - n.attributes.class = "footnote"; - n.attributes.id = `footnote--${id}`; - - n.children = [ - elem( - "a", - { href: `#footnoteref--${id}` }, - text(`${footnoteNumber}. `), - ), - ...n.children, - ]; - }); -} - -function removeUnnecessaryParagraphNode(doc: Document) { - forEachChildRecursively(doc.root, (n) => { - if (n.kind !== "element" || (n.name !== "ul" && n.name !== "ol")) { - return; - } - - const isTight = n.attributes.__tight === "true"; - if (!isTight) { - return; - } - - for (const child of n.children) { - if (child.kind !== "element" || child.name !== "li") { - continue; - } - const newGrandChildren: Node[] = []; - for (const grandChild of child.children) { - if (grandChild.kind === "element" && grandChild.name === "p") { - newGrandChildren.push(...grandChild.children); - } else { - newGrandChildren.push(grandChild); - } - } - child.children = newGrandChildren; - } - }); -} - -async function transformAndHighlightCodeBlockElement(doc: Document) { - await forEachChildRecursivelyAsync(doc.root, async (n) => { - if (n.kind !== "element" || n.name !== "codeblock") { - return; - } - - const language = n.attributes.language || "text"; - const filename = n.attributes.filename; - const numbered = n.attributes.numbered; - const sourceCodeNode = n.children[0] as Text | RawHTML; - const sourceCode = sourceCodeNode.kind === "text" - ? sourceCodeNode.content.trimEnd() - : sourceCodeNode.html.trimEnd(); - - const highlighted = await codeToHtml(sourceCode, { - lang: language in bundledLanguages ? language as BundledLanguage : "text", - theme: "github-light", - colorReplacements: { - "#fff": "#f5f5f5", - }, - }); - - n.name = "div"; - n.attributes.class = "codeblock"; - delete n.attributes.language; - - if (numbered === "true") { - delete n.attributes.numbered; - addClass(n, "numbered"); - } - if (filename) { - delete n.attributes.filename; - - n.children = [ - elem("div", { class: "filename" }, text(filename)), - rawHTML(highlighted), - ]; - } else { - if (sourceCodeNode.kind === "text") { - n.children[0] = rawHTML(highlighted); - } else { - sourceCodeNode.html = highlighted; - } - } - }); -} - -function generateTableOfContents(doc: Document) { - if (!doc.isTocEnabled) { - return; - } - const tocEntries: TocEntry[] = []; - const stack: TocEntry[] = []; - const excludedLevels: number[] = []; // Track levels to exclude - - const processNode = (node: Node) => { - if (node.kind !== "element") { - return; - } - - const match = node.name.match(/^h(\d+)$/); - if (match) { - const level = parseInt(match[1]); - - let parentSection: Element | null = null; - const findParentSection = (n: Node, target: Node): Element | null => { - if (n.kind !== "element") return null; - - for (const child of n.children) { - if (child === target && n.name === "section") { - return n; - } - const result = findParentSection(child, target); - if (result) return result; - } - return null; - }; - - parentSection = findParentSection(doc.root, node); - if (!parentSection) return; - - // Check if this section has toc=false attribute - const tocAttribute = parentSection.attributes.toc; - if (tocAttribute === "false") { - // Add this level to excluded levels and remove deeper levels - excludedLevels.length = 0; - excludedLevels.push(level); - return; - } - - // Check if this header should be excluded based on parent exclusion - const shouldExclude = excludedLevels.some((excludedLevel) => - level > excludedLevel - ); - if (shouldExclude) { - return; - } - - // Clean up excluded levels that are now at same or deeper level - while ( - excludedLevels.length > 0 && - excludedLevels[excludedLevels.length - 1] >= level - ) { - excludedLevels.pop(); - } - - const sectionId = parentSection.attributes.id; - if (!sectionId) return; - - let headingText = ""; - for (const child of node.children) { - if (child.kind === "element" && child.name === "a") { - headingText = innerText(child); - } - } - - const entry: TocEntry = { - id: sectionId, - text: headingText, - level: level, - children: [], - }; - - while (stack.length > 0 && stack[stack.length - 1].level >= level) { - stack.pop(); - } - - if (stack.length === 0) { - tocEntries.push(entry); - } else { - stack[stack.length - 1].children.push(entry); - } - - stack.push(entry); - } - - forEachChild(node, processNode); - }; - - forEachChild(doc.root, processNode); - - // Don't generate TOC if there's only one top-level section with no children - if (tocEntries.length === 1 && tocEntries[0].children.length === 0) { - return; - } - - doc.toc = { - entries: tocEntries, - }; -} - -function removeTocAttributes(doc: Document) { - forEachChildRecursively(doc.root, (node) => { - if (node.kind === "element" && node.name === "section") { - delete node.attributes.toc; - } - }); -} diff --git a/services/nuldoc/nuldoc-src/generators/post.ts b/services/nuldoc/nuldoc-src/generators/post.ts index 11a3ce8..2f466b9 100644 --- a/services/nuldoc/nuldoc-src/generators/post.ts +++ b/services/nuldoc/nuldoc-src/generators/post.ts @@ -1,7 +1,7 @@ import { join } from "@std/path"; import PostPage from "../pages/PostPage.ts"; import { Config } from "../config.ts"; -import { Document } from "../djot/document.ts"; +import { Document } from "../markdown/document.ts"; import { Page } from "../page.ts"; import { Date, Revision } from "../revision.ts"; @@ -41,7 +41,7 @@ export async function generatePostPage( const cwd = Deno.cwd(); const contentDir = join(cwd, config.locations.contentDir); const destFilePath = join( - doc.sourceFilePath.replace(contentDir, "").replace(".dj", ""), + doc.sourceFilePath.replace(contentDir, "").replace(".md", ""), "index.html", ); return { diff --git a/services/nuldoc/nuldoc-src/markdown/document.ts b/services/nuldoc/nuldoc-src/markdown/document.ts new file mode 100644 index 0000000..1aee87b --- /dev/null +++ b/services/nuldoc/nuldoc-src/markdown/document.ts @@ -0,0 +1,75 @@ +import type { Root as MdastRoot } from "mdast"; +import { join } from "@std/path"; +import { z } from "zod/mod.ts"; +import { Config } from "../config.ts"; +import { Element } from "../dom.ts"; +import { Revision, stringToDate } from "../revision.ts"; +import { mdast2ndoc } from "./mdast2ndoc.ts"; + +export const PostMetadataSchema = z.object({ + article: z.object({ + uuid: z.string(), + title: z.string(), + description: z.string(), + tags: z.array(z.string()), + toc: z.boolean().optional(), + revisions: z.array(z.object({ + date: z.string(), + remark: z.string(), + isInternal: z.boolean().optional(), + })), + }), +}); + +export type PostMetadata = z.infer; + +export type TocEntry = { + id: string; + text: string; + level: number; + children: TocEntry[]; +}; + +export type TocRoot = { + entries: TocEntry[]; +}; + +export type Document = { + root: Element; + sourceFilePath: string; + uuid: string; + link: string; + title: string; + description: string; + tags: string[]; + revisions: Revision[]; + toc?: TocRoot; + isTocEnabled: boolean; +}; + +export function createNewDocumentFromMdast( + root: MdastRoot, + meta: PostMetadata, + sourceFilePath: string, + config: Config, +): Document { + const cwd = Deno.cwd(); + const contentDir = join(cwd, config.locations.contentDir); + const link = sourceFilePath.replace(contentDir, "").replace(".xml", "/"); + return { + root: mdast2ndoc(root), + sourceFilePath, + uuid: meta.article.uuid, + link: link, + title: meta.article.title, + description: meta.article.description, + tags: meta.article.tags, + revisions: meta.article.revisions.map((r, i) => ({ + number: i, + date: stringToDate(r.date), + remark: r.remark, + isInternal: !!r.isInternal, + })), + isTocEnabled: meta.article.toc !== false, + }; +} diff --git a/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts b/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts new file mode 100644 index 0000000..367627c --- /dev/null +++ b/services/nuldoc/nuldoc-src/markdown/mdast2ndoc.ts @@ -0,0 +1,575 @@ +import type { + Blockquote, + Code, + Definition, + Delete, + Emphasis, + FootnoteDefinition, + FootnoteReference, + Heading, + Html, + Image, + InlineCode, + Link, + List, + ListItem, + Paragraph, + PhrasingContent, + Root, + RootContent, + Strong, + Table, + TableCell, + TableRow, + Text as MdastText, + ThematicBreak, +} from "mdast"; +import type { + ContainerDirective, + LeafDirective, + TextDirective, +} from "mdast-util-directive"; +import { elem, Element, Node, rawHTML, text } from "../dom.ts"; + +type DirectiveNode = ContainerDirective | LeafDirective | TextDirective; + +function isDirective(node: RootContent): node is DirectiveNode { + return ( + node.type === "containerDirective" || + node.type === "leafDirective" || + node.type === "textDirective" + ); +} + +// Extract section ID and attributes from heading if present +// Supports syntax like {#id} or {#id attr="value"} +function extractSectionId( + node: Heading, +): { + id: string | null; + attributes: Record; + children: Heading["children"]; +} { + if (node.children.length === 0) { + return { id: null, attributes: {}, children: node.children }; + } + + const lastChild = node.children[node.children.length - 1]; + if (lastChild && lastChild.type === "text") { + // Match {#id ...} or {#id attr="value" ...} + const match = lastChild.value.match(/\s*\{#([^\s}]+)([^}]*)\}\s*$/); + if (match) { + const id = match[1]; + const attrString = match[2].trim(); + const attributes: Record = {}; + + // Parse attributes like toc="false" (supports smart quotes too) + // U+0022 = ", U+201C = ", U+201D = " + const attrRegex = + /(\w+)=["\u201c\u201d]([^"\u201c\u201d]*)["\u201c\u201d]/g; + let attrMatch; + while ((attrMatch = attrRegex.exec(attrString)) !== null) { + attributes[attrMatch[1]] = attrMatch[2]; + } + + const newValue = lastChild.value.replace(/\s*\{#[^}]+\}\s*$/, ""); + if (newValue === "") { + return { id, attributes, children: node.children.slice(0, -1) }; + } else { + const newChildren = [...node.children]; + newChildren[newChildren.length - 1] = { ...lastChild, value: newValue }; + return { id, attributes, children: newChildren }; + } + } + } + + return { id: null, attributes: {}, children: node.children }; +} + +function processBlock(node: RootContent): Element | Element[] | null { + switch (node.type) { + case "heading": + // Headings are handled specially in mdast2ndoc + return null; + case "paragraph": + return processParagraph(node); + case "thematicBreak": + return processThematicBreak(node); + case "blockquote": + return processBlockquote(node); + case "code": + return processCode(node); + case "list": + return processList(node); + case "table": + return processTable(node); + case "html": + return processHtmlBlock(node); + case "definition": + return processDefinition(node); + case "footnoteDefinition": + return processFootnoteDefinition(node); + default: + if (isDirective(node)) { + return processDirective(node); + } + return null; + } +} + +function processParagraph(node: Paragraph): Element { + return elem("p", {}, ...node.children.map(processInline)); +} + +function processThematicBreak(_node: ThematicBreak): Element { + return elem("hr", {}); +} + +function processBlockquote(node: Blockquote): Element { + const children: Node[] = []; + for (const child of node.children) { + const result = processBlock(child); + if (result) { + if (Array.isArray(result)) { + children.push(...result); + } else { + children.push(result); + } + } + } + return elem("blockquote", {}, ...children); +} + +function processCode(node: Code): Element { + const attributes: Record = {}; + + if (node.lang) { + attributes.language = node.lang; + } + + // Parse meta string for filename and numbered attributes + if (node.meta) { + const filenameMatch = node.meta.match(/filename="([^"]+)"/); + if (filenameMatch) { + attributes.filename = filenameMatch[1]; + } + + if (node.meta.includes("numbered")) { + attributes.numbered = "true"; + } + } + + return elem("codeblock", attributes, text(node.value)); +} + +function processList(node: List): Element { + const attributes: Record = {}; + attributes.__tight = node.spread === false ? "true" : "false"; + + const isTaskList = node.children.some( + (item) => item.checked !== null && item.checked !== undefined, + ); + + if (isTaskList) { + attributes.type = "task"; + } + + if (node.ordered && node.start !== null && node.start !== 1) { + attributes.start = node.start!.toString(); + } + + const children = node.children.map((item) => + processListItem(item, isTaskList) + ); + + return elem(node.ordered ? "ol" : "ul", attributes, ...children); +} + +function processListItem(node: ListItem, isTaskList: boolean): Element { + const attributes: Record = {}; + + if (isTaskList) { + attributes.checked = node.checked ? "true" : "false"; + } + + const children: Node[] = []; + for (const child of node.children) { + const result = processBlock(child); + if (result) { + if (Array.isArray(result)) { + children.push(...result); + } else { + children.push(result); + } + } + } + + return elem("li", attributes, ...children); +} + +function processTable(node: Table): Element { + const tableElement = elem("table", {}); + const headerRows: Element[] = []; + const bodyRows: Element[] = []; + + node.children.forEach((row, rowIndex) => { + const rowElement = processTableRow(row, rowIndex === 0, node.align); + if (rowIndex === 0) { + headerRows.push(rowElement); + } else { + bodyRows.push(rowElement); + } + }); + + if (headerRows.length > 0) { + tableElement.children.push(elem("thead", undefined, ...headerRows)); + } + + if (bodyRows.length > 0) { + tableElement.children.push(elem("tbody", undefined, ...bodyRows)); + } + + return tableElement; +} + +function processTableRow( + node: TableRow, + isHeader: boolean, + alignments: (string | null)[] | null | undefined, +): Element { + const cells = node.children.map((cell, index) => + processTableCell(cell, isHeader, alignments?.[index]) + ); + return elem("tr", {}, ...cells); +} + +function processTableCell( + node: TableCell, + isHeader: boolean, + alignment: string | null | undefined, +): Element { + const attributes: Record = {}; + if (alignment && alignment !== "none") { + attributes.align = alignment; + } + + return elem( + isHeader ? "th" : "td", + attributes, + ...node.children.map(processInline), + ); +} + +function processHtmlBlock(node: Html): Element { + return elem("div", { class: "raw-html" }, rawHTML(node.value)); +} + +function processDefinition(_node: Definition): null { + // Link definitions are handled elsewhere + return null; +} + +function processFootnoteDefinition(node: FootnoteDefinition): Element { + const children: Node[] = []; + for (const child of node.children) { + const result = processBlock(child); + if (result) { + if (Array.isArray(result)) { + children.push(...result); + } else { + children.push(result); + } + } + } + return elem("footnote", { id: node.identifier }, ...children); +} + +function processDirective(node: DirectiveNode): Element | null { + const name = node.name; + + if (name === "note" || name === "edit") { + const attributes: Record = {}; + + // Copy directive attributes + if (node.attributes) { + for (const [key, value] of Object.entries(node.attributes)) { + if (value !== undefined && value !== null) { + attributes[key] = String(value); + } + } + } + + const children: Node[] = []; + if ("children" in node && node.children) { + for (const child of node.children as RootContent[]) { + const result = processBlock(child); + if (result) { + if (Array.isArray(result)) { + children.push(...result); + } else { + children.push(result); + } + } + } + } + + return elem("note", attributes, ...children); + } + + // For other directives, treat as div + const children: Node[] = []; + if ("children" in node && node.children) { + for (const child of node.children as RootContent[]) { + const result = processBlock(child); + if (result) { + if (Array.isArray(result)) { + children.push(...result); + } else { + children.push(result); + } + } + } + } + + return elem( + "div", + node.attributes as Record || {}, + ...children, + ); +} + +function processInline(node: PhrasingContent): Node { + switch (node.type) { + case "text": + return processText(node); + case "emphasis": + return processEmphasis(node); + case "strong": + return processStrong(node); + case "inlineCode": + return processInlineCode(node); + case "link": + return processLink(node); + case "image": + return processImage(node); + case "delete": + return processDelete(node); + case "break": + return elem("br"); + case "html": + return rawHTML(node.value); + case "footnoteReference": + return processFootnoteReference(node); + default: + // Handle any unexpected node types + if ("value" in node) { + return text(String(node.value)); + } + if ("children" in node && Array.isArray(node.children)) { + return elem( + "span", + {}, + ...node.children.map((c: PhrasingContent) => processInline(c)), + ); + } + return text(""); + } +} + +function processText(node: MdastText): Node { + return text(node.value); +} + +function processEmphasis(node: Emphasis): Element { + return elem("em", {}, ...node.children.map(processInline)); +} + +function processStrong(node: Strong): Element { + return elem("strong", {}, ...node.children.map(processInline)); +} + +function processInlineCode(node: InlineCode): Element { + return elem("code", {}, text(node.value)); +} + +function processLink(node: Link): Element { + const attributes: Record = {}; + if (node.url) { + attributes.href = node.url; + } + if (node.title) { + attributes.title = node.title; + } + // Detect autolinks (URL equals link text) + const isAutolink = node.children.length === 1 && + node.children[0].type === "text" && + node.children[0].value === node.url; + if (isAutolink) { + attributes.class = "url"; + } + return elem("a", attributes, ...node.children.map(processInline)); +} + +function processImage(node: Image): Element { + const attributes: Record = {}; + if (node.url) { + attributes.src = node.url; + } + if (node.alt) { + attributes.alt = node.alt; + } + if (node.title) { + attributes.title = node.title; + } + return elem("img", attributes); +} + +function processDelete(node: Delete): Element { + return elem("del", {}, ...node.children.map(processInline)); +} + +function processFootnoteReference(node: FootnoteReference): Element { + return elem("footnoteref", { reference: node.identifier }); +} + +// Build hierarchical section structure from flat mdast +// This mimics Djot's section structure where headings create nested sections +export function mdast2ndoc(root: Root): Element { + const footnotes: Element[] = []; + const nonFootnoteChildren: RootContent[] = []; + + // Separate footnotes from other content + for (const child of root.children) { + if (child.type === "footnoteDefinition") { + const footnote = processFootnoteDefinition(child); + footnotes.push(footnote); + } else { + nonFootnoteChildren.push(child); + } + } + + // Build hierarchical sections + const articleContent = buildSectionHierarchy(nonFootnoteChildren); + + // Add footnotes section if any exist + if (footnotes.length > 0) { + const footnoteSection = elem( + "section", + { class: "footnotes" }, + ...footnotes, + ); + articleContent.push(footnoteSection); + } + + return elem( + "__root__", + undefined, + elem("article", undefined, ...articleContent), + ); +} + +type SectionInfo = { + id: string | null; + attributes: Record; + level: number; + heading: Element; + children: Node[]; +}; + +function buildSectionHierarchy(nodes: RootContent[]): Node[] { + // Group nodes into sections based on headings + // Each heading starts a new section at its level + const result: Node[] = []; + const sectionStack: SectionInfo[] = []; + + for (const node of nodes) { + if (node.type === "heading") { + const level = node.depth; + const { id, attributes, children } = extractSectionId(node); + + // Create heading element + const headingElement = elem( + "h", + {}, + ...children.map(processInline), + ); + + // Close sections that are at same or deeper level + while ( + sectionStack.length > 0 && + sectionStack[sectionStack.length - 1].level >= level + ) { + const closedSection = sectionStack.pop()!; + const sectionElement = createSectionElement(closedSection); + + if (sectionStack.length > 0) { + // Add to parent section + sectionStack[sectionStack.length - 1].children.push(sectionElement); + } else { + // Add to result + result.push(sectionElement); + } + } + + // Start new section + const newSection: SectionInfo = { + id, + attributes, + level, + heading: headingElement, + children: [], + }; + sectionStack.push(newSection); + } else { + // Non-heading content + const processed = processBlock(node); + if (processed) { + if (sectionStack.length > 0) { + // Add to current section + if (Array.isArray(processed)) { + sectionStack[sectionStack.length - 1].children.push(...processed); + } else { + sectionStack[sectionStack.length - 1].children.push(processed); + } + } else { + // Content before any heading + if (Array.isArray(processed)) { + result.push(...processed); + } else { + result.push(processed); + } + } + } + } + } + + // Close remaining sections + while (sectionStack.length > 0) { + const closedSection = sectionStack.pop()!; + const sectionElement = createSectionElement(closedSection); + + if (sectionStack.length > 0) { + // Add to parent section + sectionStack[sectionStack.length - 1].children.push(sectionElement); + } else { + // Add to result + result.push(sectionElement); + } + } + + return result; +} + +function createSectionElement(sectionInfo: SectionInfo): Element { + const attributes: Record = { ...sectionInfo.attributes }; + if (sectionInfo.id) { + attributes.id = sectionInfo.id; + } + + return elem( + "section", + attributes, + sectionInfo.heading, + ...sectionInfo.children, + ); +} diff --git a/services/nuldoc/nuldoc-src/markdown/parse.ts b/services/nuldoc/nuldoc-src/markdown/parse.ts new file mode 100644 index 0000000..c0875a2 --- /dev/null +++ b/services/nuldoc/nuldoc-src/markdown/parse.ts @@ -0,0 +1,47 @@ +import type { Root as MdastRoot } from "mdast"; +import { unified } from "unified"; +import remarkParse from "remark-parse"; +import remarkGfm from "remark-gfm"; +import remarkDirective from "remark-directive"; +import remarkSmartypants from "remark-smartypants"; +import { parse as parseToml } from "@std/toml"; +import { Config } from "../config.ts"; +import { + createNewDocumentFromMdast, + Document, + PostMetadata, + PostMetadataSchema, +} from "./document.ts"; +import toHtml from "./to_html.ts"; + +export async function parseMarkdownFile( + filePath: string, + config: Config, +): Promise { + try { + const fileContent = await Deno.readTextFile(filePath); + const [, frontmatter, ...rest] = fileContent.split(/^---$/m); + const meta = parseMetadata(frontmatter); + const content = rest.join("---"); + + const processor = unified() + .use(remarkParse) + .use(remarkGfm) + .use(remarkDirective) + .use(remarkSmartypants); + + const root = await processor.run(processor.parse(content)) as MdastRoot; + + const doc = createNewDocumentFromMdast(root, meta, filePath, config); + return await toHtml(doc); + } catch (e) { + if (e instanceof Error) { + e.message = `${e.message} in ${filePath}`; + } + throw e; + } +} + +function parseMetadata(s: string): PostMetadata { + return PostMetadataSchema.parse(parseToml(s)); +} diff --git a/services/nuldoc/nuldoc-src/markdown/to_html.ts b/services/nuldoc/nuldoc-src/markdown/to_html.ts new file mode 100644 index 0000000..8219b74 --- /dev/null +++ b/services/nuldoc/nuldoc-src/markdown/to_html.ts @@ -0,0 +1,499 @@ +import { BundledLanguage, bundledLanguages, codeToHtml } from "shiki"; +import { Document, TocEntry } from "./document.ts"; +import { NuldocError } from "../errors.ts"; +import { + addClass, + elem, + Element, + forEachChild, + forEachChildRecursively, + forEachChildRecursivelyAsync, + forEachElementOfType, + innerText, + Node, + processTextNodesInElement, + RawHTML, + rawHTML, + Text, + text, +} from "../dom.ts"; + +export default async function toHtml(doc: Document): Promise { + mergeConsecutiveTextNodes(doc); + removeUnnecessaryTextNode(doc); + transformLinkLikeToAnchorElement(doc); + transformSectionIdAttribute(doc); + setSectionTitleAnchor(doc); + transformSectionTitleElement(doc); + transformNoteElement(doc); + addAttributesToExternalLinkElement(doc); + traverseFootnotes(doc); + removeUnnecessaryParagraphNode(doc); + await transformAndHighlightCodeBlockElement(doc); + mergeConsecutiveTextNodes(doc); + generateTableOfContents(doc); + removeTocAttributes(doc); + return doc; +} + +function mergeConsecutiveTextNodes(doc: Document) { + forEachChildRecursively(doc.root, (n) => { + if (n.kind !== "element") { + return; + } + + const newChildren: Node[] = []; + let currentTextContent = ""; + + for (const child of n.children) { + if (child.kind === "text") { + currentTextContent += child.content; + } else { + if (currentTextContent !== "") { + newChildren.push(text(currentTextContent)); + currentTextContent = ""; + } + newChildren.push(child); + } + } + + if (currentTextContent !== "") { + newChildren.push(text(currentTextContent)); + } + + n.children = newChildren; + }); +} + +function removeUnnecessaryTextNode(doc: Document) { + forEachChildRecursively(doc.root, (n) => { + if (n.kind !== "element") { + return; + } + + let changed = true; + while (changed) { + changed = false; + if (n.children.length === 0) { + break; + } + const firstChild = n.children[0]; + if (firstChild.kind === "text" && firstChild.content.trim() === "") { + n.children.shift(); + changed = true; + } + if (n.children.length === 0) { + break; + } + const lastChild = n.children[n.children.length - 1]; + if (lastChild.kind === "text" && lastChild.content.trim() === "") { + n.children.pop(); + changed = true; + } + } + }); +} + +function transformLinkLikeToAnchorElement(doc: Document) { + forEachChildRecursively(doc.root, (n) => { + if ( + n.kind !== "element" || n.name === "a" || n.name === "code" || + n.name === "codeblock" + ) { + return; + } + + processTextNodesInElement(n, (content) => { + const nodes: Node[] = []; + let restContent = content; + while (restContent !== "") { + const match = /^(.*?)(https?:\/\/[^ \n]+)(.*)$/s.exec(restContent); + if (!match) { + nodes.push(text(restContent)); + restContent = ""; + break; + } + const [_, prefix, url, suffix] = match; + nodes.push(text(prefix)); + nodes.push(elem("a", { href: url, class: "url" }, text(url))); + restContent = suffix; + } + return nodes; + }); + }); +} + +function transformSectionIdAttribute(doc: Document) { + const sectionStack: string[] = []; + const usedIds = new Set(); + + const processNode = (n: Node) => { + if (n.kind !== "element") { + return; + } + + if (n.name === "section") { + const idAttr = n.attributes.id; + if (!idAttr) { + return; + } + + let newId: string; + if (sectionStack.length === 0) { + newId = `section--${idAttr}`; + } else { + newId = `section--${sectionStack.join("--")}--${idAttr}`; + } + + if (usedIds.has(newId)) { + throw new NuldocError( + `[nuldoc.tohtml] Duplicate section ID: ${newId}`, + ); + } + + usedIds.add(newId); + n.attributes.id = newId; + sectionStack.push(idAttr); + + forEachChild(n, processNode); + + sectionStack.pop(); + } else { + forEachChild(n, processNode); + } + }; + + forEachChild(doc.root, processNode); +} + +function setSectionTitleAnchor(doc: Document) { + const sectionStack: Element[] = []; + const g = (c: Node) => { + if (c.kind !== "element") { + return; + } + + if (c.name === "section") { + sectionStack.push(c); + } + forEachChild(c, g); + if (c.name === "section") { + sectionStack.pop(); + } + if (c.name === "h") { + const currentSection = sectionStack[sectionStack.length - 1]; + if (!currentSection) { + throw new NuldocError( + "[nuldoc.tohtml] element must be inside
", + ); + } + const sectionId = currentSection.attributes.id; + const aElement = elem("a", undefined, ...c.children); + aElement.attributes.href = `#${sectionId}`; + c.children = [aElement]; + } + }; + forEachChild(doc.root, g); +} + +function transformSectionTitleElement(doc: Document) { + let sectionLevel = 1; + const g = (c: Node) => { + if (c.kind !== "element") { + return; + } + + if (c.name === "section") { + sectionLevel += 1; + c.attributes.__sectionLevel = sectionLevel.toString(); + } + forEachChild(c, g); + if (c.name === "section") { + sectionLevel -= 1; + } + if (c.name === "h") { + c.name = `h${sectionLevel}`; + } + }; + forEachChild(doc.root, g); +} + +function transformNoteElement(doc: Document) { + forEachElementOfType(doc.root, "note", (n) => { + const editatAttr = n.attributes?.editat; + const operationAttr = n.attributes?.operation; + const isEditBlock = editatAttr && operationAttr; + + const labelElement = elem( + "div", + { class: "admonition-label" }, + text(isEditBlock ? `${editatAttr} ${operationAttr}` : "NOTE"), + ); + const contentElement = elem( + "div", + { class: "admonition-content" }, + ...n.children, + ); + n.name = "div"; + addClass(n, "admonition"); + n.children = [labelElement, contentElement]; + }); +} + +function addAttributesToExternalLinkElement(doc: Document) { + forEachElementOfType(doc.root, "a", (n) => { + const href = n.attributes.href ?? ""; + if (!href.startsWith("http")) { + return; + } + n.attributes.target = "_blank"; + n.attributes.rel = "noreferrer"; + }); +} + +function traverseFootnotes(doc: Document) { + let footnoteCounter = 0; + const footnoteMap = new Map(); + + forEachElementOfType(doc.root, "footnoteref", (n) => { + const reference = n.attributes.reference; + if (!reference) { + return; + } + + let footnoteNumber: number; + if (footnoteMap.has(reference)) { + footnoteNumber = footnoteMap.get(reference)!; + } else { + footnoteNumber = ++footnoteCounter; + footnoteMap.set(reference, footnoteNumber); + } + + n.name = "sup"; + delete n.attributes.reference; + n.attributes.class = "footnote"; + n.children = [ + elem( + "a", + { + id: `footnoteref--${reference}`, + class: "footnote", + href: `#footnote--${reference}`, + }, + text(`[${footnoteNumber}]`), + ), + ]; + }); + + forEachElementOfType(doc.root, "footnote", (n) => { + const id = n.attributes.id; + if (!id || !footnoteMap.has(id)) { + n.name = "span"; + n.children = []; + return; + } + + const footnoteNumber = footnoteMap.get(id)!; + + n.name = "div"; + delete n.attributes.id; + n.attributes.class = "footnote"; + n.attributes.id = `footnote--${id}`; + + n.children = [ + elem( + "a", + { href: `#footnoteref--${id}` }, + text(`${footnoteNumber}. `), + ), + ...n.children, + ]; + }); +} + +function removeUnnecessaryParagraphNode(doc: Document) { + forEachChildRecursively(doc.root, (n) => { + if (n.kind !== "element" || (n.name !== "ul" && n.name !== "ol")) { + return; + } + + const isTight = n.attributes.__tight === "true"; + if (!isTight) { + return; + } + + for (const child of n.children) { + if (child.kind !== "element" || child.name !== "li") { + continue; + } + const newGrandChildren: Node[] = []; + for (const grandChild of child.children) { + if (grandChild.kind === "element" && grandChild.name === "p") { + newGrandChildren.push(...grandChild.children); + } else { + newGrandChildren.push(grandChild); + } + } + child.children = newGrandChildren; + } + }); +} + +async function transformAndHighlightCodeBlockElement(doc: Document) { + await forEachChildRecursivelyAsync(doc.root, async (n) => { + if (n.kind !== "element" || n.name !== "codeblock") { + return; + } + + const language = n.attributes.language || "text"; + const filename = n.attributes.filename; + const numbered = n.attributes.numbered; + const sourceCodeNode = n.children[0] as Text | RawHTML; + const sourceCode = sourceCodeNode.kind === "text" + ? sourceCodeNode.content.trimEnd() + : sourceCodeNode.html.trimEnd(); + + const highlighted = await codeToHtml(sourceCode, { + lang: language in bundledLanguages ? language as BundledLanguage : "text", + theme: "github-light", + colorReplacements: { + "#fff": "#f5f5f5", + }, + }); + + n.name = "div"; + n.attributes.class = "codeblock"; + delete n.attributes.language; + + if (numbered === "true") { + delete n.attributes.numbered; + addClass(n, "numbered"); + } + if (filename) { + delete n.attributes.filename; + + n.children = [ + elem("div", { class: "filename" }, text(filename)), + rawHTML(highlighted), + ]; + } else { + if (sourceCodeNode.kind === "text") { + n.children[0] = rawHTML(highlighted); + } else { + sourceCodeNode.html = highlighted; + } + } + }); +} + +function generateTableOfContents(doc: Document) { + if (!doc.isTocEnabled) { + return; + } + const tocEntries: TocEntry[] = []; + const stack: TocEntry[] = []; + const excludedLevels: number[] = []; // Track levels to exclude + + const processNode = (node: Node) => { + if (node.kind !== "element") { + return; + } + + const match = node.name.match(/^h(\d+)$/); + if (match) { + const level = parseInt(match[1]); + + let parentSection: Element | null = null; + const findParentSection = (n: Node, target: Node): Element | null => { + if (n.kind !== "element") return null; + + for (const child of n.children) { + if (child === target && n.name === "section") { + return n; + } + const result = findParentSection(child, target); + if (result) return result; + } + return null; + }; + + parentSection = findParentSection(doc.root, node); + if (!parentSection) return; + + // Check if this section has toc=false attribute + const tocAttribute = parentSection.attributes.toc; + if (tocAttribute === "false") { + // Add this level to excluded levels and remove deeper levels + excludedLevels.length = 0; + excludedLevels.push(level); + return; + } + + // Check if this header should be excluded based on parent exclusion + const shouldExclude = excludedLevels.some((excludedLevel) => + level > excludedLevel + ); + if (shouldExclude) { + return; + } + + // Clean up excluded levels that are now at same or deeper level + while ( + excludedLevels.length > 0 && + excludedLevels[excludedLevels.length - 1] >= level + ) { + excludedLevels.pop(); + } + + const sectionId = parentSection.attributes.id; + if (!sectionId) return; + + let headingText = ""; + for (const child of node.children) { + if (child.kind === "element" && child.name === "a") { + headingText = innerText(child); + } + } + + const entry: TocEntry = { + id: sectionId, + text: headingText, + level: level, + children: [], + }; + + while (stack.length > 0 && stack[stack.length - 1].level >= level) { + stack.pop(); + } + + if (stack.length === 0) { + tocEntries.push(entry); + } else { + stack[stack.length - 1].children.push(entry); + } + + stack.push(entry); + } + + forEachChild(node, processNode); + }; + + forEachChild(doc.root, processNode); + + // Don't generate TOC if there's only one top-level section with no children + if (tocEntries.length === 1 && tocEntries[0].children.length === 0) { + return; + } + + doc.toc = { + entries: tocEntries, + }; +} + +function removeTocAttributes(doc: Document) { + forEachChildRecursively(doc.root, (node) => { + if (node.kind === "element" && node.name === "section") { + delete node.attributes.toc; + } + }); +} diff --git a/services/nuldoc/nuldoc-src/pages/PostPage.ts b/services/nuldoc/nuldoc-src/pages/PostPage.ts index 84f58c3..fe67089 100644 --- a/services/nuldoc/nuldoc-src/pages/PostPage.ts +++ b/services/nuldoc/nuldoc-src/pages/PostPage.ts @@ -4,7 +4,7 @@ import PageLayout from "../components/PageLayout.ts"; import TableOfContents from "../components/TableOfContents.ts"; import { Config, getTagLabel } from "../config.ts"; import { elem, Element } from "../dom.ts"; -import { Document } from "../djot/document.ts"; +import { Document } from "../markdown/document.ts"; import { dateToString } from "../revision.ts"; import { getPostPublishedDate } from "../generators/post.ts"; -- cgit v1.2.3-70-g09d2