aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/server/anki/parser.ts
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-12-15 22:29:21 +0900
committernsfisis <nsfisis@gmail.com>2025-12-15 22:29:21 +0900
commitced08d592e3d277044eb9bbfea1bef0e4e4285e3 (patch)
tree8f0e1be5d84eed3d011308762bf71d4f3f069b30 /src/server/anki/parser.ts
parentbca2f725ff6a2abf9454c8bf6568f95105b4f350 (diff)
downloadkioku-ced08d592e3d277044eb9bbfea1bef0e4e4285e3.tar.gz
kioku-ced08d592e3d277044eb9bbfea1bef0e4e4285e3.tar.zst
kioku-ced08d592e3d277044eb9bbfea1bef0e4e4285e3.zip
feat(anki): add parser for Anki .apkg files
Implement ZIP extraction and SQLite database reading for Anki package files. The parser extracts notes, cards, decks, and models from .apkg files using Node.js native node:sqlite module. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat (limited to 'src/server/anki/parser.ts')
-rw-r--r--src/server/anki/parser.ts374
1 files changed, 374 insertions, 0 deletions
diff --git a/src/server/anki/parser.ts b/src/server/anki/parser.ts
new file mode 100644
index 0000000..c317ce7
--- /dev/null
+++ b/src/server/anki/parser.ts
@@ -0,0 +1,374 @@
+import { randomBytes } from "node:crypto";
+import { existsSync } from "node:fs";
+import { mkdir, open, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { DatabaseSync } from "node:sqlite";
+import { createInflateRaw } from "node:zlib";
+
+/**
+ * Represents a note from an Anki database
+ */
+export interface AnkiNote {
+ id: number;
+ guid: string;
+ mid: number; // model/notetype id
+ mod: number;
+ tags: string[];
+ fields: string[]; // fields separated by 0x1f in the database
+ sfld: string; // sort field
+}
+
+/**
+ * Represents a card from an Anki database
+ */
+export interface AnkiCard {
+ id: number;
+ nid: number; // note id
+ did: number; // deck id
+ ord: number; // ordinal (which template/cloze)
+ mod: number;
+ type: number; // 0=new, 1=learning, 2=review, 3=relearning
+ queue: number;
+ due: number;
+ ivl: number; // interval
+ factor: number;
+ reps: number;
+ lapses: number;
+}
+
+/**
+ * Represents a deck from an Anki database
+ */
+export interface AnkiDeck {
+ id: number;
+ name: string;
+ description: string;
+}
+
+/**
+ * Represents a model (note type) from an Anki database
+ */
+export interface AnkiModel {
+ id: number;
+ name: string;
+ fields: string[];
+ templates: {
+ name: string;
+ qfmt: string; // question format
+ afmt: string; // answer format
+ }[];
+}
+
+/**
+ * Represents the parsed contents of an Anki package
+ */
+export interface AnkiPackage {
+ notes: AnkiNote[];
+ cards: AnkiCard[];
+ decks: AnkiDeck[];
+ models: AnkiModel[];
+}
+
+// Local file header signature
+const LOCAL_FILE_HEADER_SIG = 0x04034b50;
+const CENTRAL_DIR_SIG = 0x02014b50;
+const END_CENTRAL_DIR_SIG = 0x06054b50;
+
+/**
+ * Parse a ZIP file and extract entries
+ * This is a minimal implementation for .apkg files
+ */
+async function parseZip(filePath: string): Promise<Map<string, Buffer>> {
+ const fileHandle = await open(filePath, "r");
+ const stat = await fileHandle.stat();
+ const fileSize = stat.size;
+
+ try {
+ const entries = new Map<string, Buffer>();
+
+ // Read the entire file for simplicity (apkg files are typically small)
+ const buffer = Buffer.alloc(fileSize);
+ await fileHandle.read(buffer, 0, fileSize, 0);
+
+ let offset = 0;
+
+ while (offset < fileSize) {
+ // Read signature
+ const sig = buffer.readUInt32LE(offset);
+
+ if (sig === LOCAL_FILE_HEADER_SIG) {
+ // Local file header
+ const compressionMethod = buffer.readUInt16LE(offset + 8);
+ const compressedSize = buffer.readUInt32LE(offset + 18);
+ const fileNameLength = buffer.readUInt16LE(offset + 26);
+ const extraFieldLength = buffer.readUInt16LE(offset + 28);
+
+ const fileName = buffer
+ .subarray(offset + 30, offset + 30 + fileNameLength)
+ .toString("utf8");
+ const dataOffset = offset + 30 + fileNameLength + extraFieldLength;
+
+ // Extract the data
+ const compressedData = buffer.subarray(
+ dataOffset,
+ dataOffset + compressedSize,
+ );
+
+ let data: Buffer;
+ if (compressionMethod === 0) {
+ // Stored (no compression)
+ data = compressedData;
+ } else if (compressionMethod === 8) {
+ // Deflate
+ data = await inflateBuffer(compressedData);
+ } else {
+ throw new Error(
+ `Unsupported compression method: ${compressionMethod}`,
+ );
+ }
+
+ entries.set(fileName, data);
+
+ offset = dataOffset + compressedSize;
+ } else if (sig === CENTRAL_DIR_SIG || sig === END_CENTRAL_DIR_SIG) {
+ // We've reached the central directory, stop parsing
+ break;
+ } else {
+ // Unknown signature, try to move forward
+ offset++;
+ }
+ }
+
+ return entries;
+ } finally {
+ await fileHandle.close();
+ }
+}
+
+/**
+ * Inflate a deflate-compressed buffer
+ */
+function inflateBuffer(data: Buffer): Promise<Buffer> {
+ return new Promise((resolve, reject) => {
+ const chunks: Buffer[] = [];
+ const inflate = createInflateRaw();
+
+ inflate.on("data", (chunk) => chunks.push(chunk));
+ inflate.on("end", () => resolve(Buffer.concat(chunks)));
+ inflate.on("error", reject);
+
+ inflate.write(data);
+ inflate.end();
+ });
+}
+
+/**
+ * Extract and parse an Anki package file (.apkg)
+ */
+export async function parseAnkiPackage(filePath: string): Promise<AnkiPackage> {
+ if (!existsSync(filePath)) {
+ throw new Error(`File not found: ${filePath}`);
+ }
+
+ // Extract ZIP contents
+ const entries = await parseZip(filePath);
+
+ // Find the database file
+ let dbBuffer: Buffer | undefined;
+ let dbFormat: "anki2" | "anki21" | "anki21b" | undefined;
+
+ // Check for different database formats (newest first)
+ if (entries.has("collection.anki21b")) {
+ dbBuffer = entries.get("collection.anki21b");
+ dbFormat = "anki21b";
+ } else if (entries.has("collection.anki21")) {
+ dbBuffer = entries.get("collection.anki21");
+ dbFormat = "anki21";
+ } else if (entries.has("collection.anki2")) {
+ dbBuffer = entries.get("collection.anki2");
+ dbFormat = "anki2";
+ }
+
+ if (!dbBuffer || !dbFormat) {
+ const availableFiles = Array.from(entries.keys()).join(", ");
+ throw new Error(
+ `No Anki database found in package. Available files: ${availableFiles}`,
+ );
+ }
+
+ // For anki21b format, the database is zstd compressed
+ if (dbFormat === "anki21b") {
+ throw new Error(
+ "anki21b format (zstd compressed) is not yet supported. Please export from Anki using the legacy format.",
+ );
+ }
+
+ // Write database to temp file (node:sqlite requires a file path)
+ const tempDir = join(
+ tmpdir(),
+ `kioku-anki-${randomBytes(8).toString("hex")}`,
+ );
+ await mkdir(tempDir, { recursive: true });
+ const tempDbPath = join(tempDir, "collection.db");
+
+ try {
+ await writeFile(tempDbPath, dbBuffer);
+
+ // Parse the SQLite database
+ return parseAnkiDatabase(tempDbPath);
+ } finally {
+ // Clean up temp files
+ await rm(tempDir, { recursive: true, force: true });
+ }
+}
+
+/**
+ * Parse an Anki SQLite database
+ */
+function parseAnkiDatabase(dbPath: string): AnkiPackage {
+ const db = new DatabaseSync(dbPath, { open: true });
+
+ try {
+ // Parse notes
+ const notes = parseNotes(db);
+
+ // Parse cards
+ const cards = parseCards(db);
+
+ // Parse decks and models from the col table
+ const { decks, models } = parseCollection(db);
+
+ return { notes, cards, decks, models };
+ } finally {
+ db.close();
+ }
+}
+
+/**
+ * Parse notes from the database
+ */
+function parseNotes(db: DatabaseSync): AnkiNote[] {
+ const stmt = db.prepare(
+ "SELECT id, guid, mid, mod, tags, flds, sfld FROM notes",
+ );
+ const rows = stmt.all() as Array<{
+ id: number;
+ guid: string;
+ mid: number;
+ mod: number;
+ tags: string;
+ flds: string;
+ sfld: string;
+ }>;
+
+ return rows.map((row) => ({
+ id: row.id,
+ guid: row.guid,
+ mid: row.mid,
+ mod: row.mod,
+ tags: row.tags
+ .trim()
+ .split(/\s+/)
+ .filter((t) => t.length > 0),
+ fields: row.flds.split("\x1f"),
+ sfld: row.sfld,
+ }));
+}
+
+/**
+ * Parse cards from the database
+ */
+function parseCards(db: DatabaseSync): AnkiCard[] {
+ const stmt = db.prepare(
+ "SELECT id, nid, did, ord, mod, type, queue, due, ivl, factor, reps, lapses FROM cards",
+ );
+ const rows = stmt.all() as Array<{
+ id: number;
+ nid: number;
+ did: number;
+ ord: number;
+ mod: number;
+ type: number;
+ queue: number;
+ due: number;
+ ivl: number;
+ factor: number;
+ reps: number;
+ lapses: number;
+ }>;
+
+ return rows.map((row) => ({
+ id: row.id,
+ nid: row.nid,
+ did: row.did,
+ ord: row.ord,
+ mod: row.mod,
+ type: row.type,
+ queue: row.queue,
+ due: row.due,
+ ivl: row.ivl,
+ factor: row.factor,
+ reps: row.reps,
+ lapses: row.lapses,
+ }));
+}
+
+/**
+ * Parse collection metadata (decks and models)
+ */
+function parseCollection(db: DatabaseSync): {
+ decks: AnkiDeck[];
+ models: AnkiModel[];
+} {
+ const stmt = db.prepare("SELECT decks, models FROM col LIMIT 1");
+ const row = stmt.get() as { decks: string; models: string } | undefined;
+
+ if (!row) {
+ throw new Error("No collection data found in database");
+ }
+
+ // Parse decks JSON
+ const decksJson = JSON.parse(row.decks) as Record<
+ string,
+ { id: number; name: string; desc?: string }
+ >;
+ const decks: AnkiDeck[] = Object.values(decksJson).map((d) => ({
+ id: d.id,
+ name: d.name,
+ description: d.desc || "",
+ }));
+
+ // Parse models JSON
+ const modelsJson = JSON.parse(row.models) as Record<
+ string,
+ {
+ id: number;
+ name: string;
+ flds: Array<{ name: string }>;
+ tmpls: Array<{ name: string; qfmt: string; afmt: string }>;
+ }
+ >;
+ const models: AnkiModel[] = Object.values(modelsJson).map((m) => ({
+ id: m.id,
+ name: m.name,
+ fields: m.flds.map((f) => f.name),
+ templates: m.tmpls.map((t) => ({
+ name: t.name,
+ qfmt: t.qfmt,
+ afmt: t.afmt,
+ })),
+ }));
+
+ return { decks, models };
+}
+
+/**
+ * Get the list of files in a ZIP archive
+ */
+export async function listAnkiPackageContents(
+ filePath: string,
+): Promise<string[]> {
+ const entries = await parseZip(filePath);
+ return Array.from(entries.keys());
+}