/** * MDX processing utilities for enhanced markdown content */ /** * Format date consistently across the markdown pipeline * Uses "Month Year" format (e.g., "April 2025") */ export function formatDate(dateString) { const date = new Date(dateString); return date.toLocaleDateString("en-US", { year: "numeric", month: "long", }); } /** * Process markdown content and extract metadata * @param {string} markdown - Raw markdown content * @returns {object} Processed content with metadata */ export function processMarkdown(markdown) { if (!markdown) { return { content: "", htmlContent: "", headings: [], links: [], images: [], }; } // Extract headings for table of contents const headings = extractHeadings(markdown); // Extract links const links = extractLinks(markdown); // Extract images const images = extractImages(markdown); // Convert markdown to HTML const htmlContent = markdownToHtml(markdown); return { content: markdown, htmlContent, headings, links, images, }; } /** * Extract all headings from markdown content * @param {string} markdown - Raw markdown content * @returns {Array} Array of heading objects with level, text, and id */ function extractHeadings(markdown) { const headingRegex = /^(#{1,6})\s+(.+)$/gm; const headings = []; let match; while ((match = headingRegex.exec(markdown)) !== null) { const level = match[1].length; const text = match[2].trim(); const id = generateHeadingId(text); headings.push({ level, text, id, line: markdown.substring(0, match.index).split("\n").length, }); } return headings; } /** * Extract all links from markdown content * @param {string} markdown - Raw markdown content * @returns {Array} Array of link objects */ function extractLinks(markdown) { const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; const links = []; let match; while ((match = linkRegex.exec(markdown)) !== null) { links.push({ text: match[1], url: match[2], index: match.index, }); } return links; } /** * Extract all images from markdown content * @param {string} markdown - Raw markdown content * @returns {Array} Array of image objects */ function extractImages(markdown) { const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; const images = []; let match; while ((match = imageRegex.exec(markdown)) !== null) { images.push({ alt: match[1], src: match[2], index: match.index, }); } return images; } /** * Generate a unique ID for a heading * @param {string} text - Heading text * @returns {string} Unique ID */ function generateHeadingId(text) { return text .toLowerCase() .replace(/[^\w\s-]/g, "") .replace(/\s+/g, "-") .replace(/-+/g, "-") .trim(); } /** * Convert markdown to HTML with enhanced formatting * - Preserves extra blank lines between paragraphs as visible gaps * (each extra blank line becomes
) * @param {string} markdown - Raw markdown content * @returns {string} HTML content */ function markdownToHtml(markdown) { if (!markdown) return ""; // Normalize line endings const GAP_TOKEN = "
${code}`,
)
.replace(/`([^`]+)`/g, "$1")
// Bold and italic (strong before em to avoid overlap issues)
.replace(/\*\*(.+?)\*\*/g, "$1")
.replace(/\*(.+?)\*/g, "$1")
// Links and images
.replace(
/!\[([^\]]*)\]\(([^)\s]+)(?:\s+"([^"]+)")?\)/g,
(m, alt, src, title = "") =>
``; }) // Lists (ul/ol) .replace(/^(\s*[-*]\s.+(?:\n\s*[-*]\s.+)*)/gim, (m) => { const items = m .trim() .split(/\n/) .map((l) => l.replace(/^\s*[-*]\s+/, "")) .map((t) => `${inner.replace( /\n{2,}/g, "
", )}
")
// 2) Convert single line breaks to
tags within paragraphs
.replace(/(?")
// 3) Wrap remaining bare lines that are not already block-level elements.
// (Also skip our GAP_TOKEN so we can turn it into gap paragraphs later.)
.replace(
/^(?!\s*<(h[1-6]|ul|ol|li|blockquote|hr|pre|code|table|img)\b)(?!\s*<\/)(?!\s*
$2
", ) // Clean up truly empty paragraphs but keep gap paragraphs .replace(/\s*<\/p>/g, "")
// Turn counted GAP tokens into explicit, styleable gap elements
.replace(
/