From b54ddb16ba38851dbd0fe97892b83821f0957a6c Mon Sep 17 00:00:00 2001 From: adilallo <39313955+adilallo@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:49:48 -0600 Subject: [PATCH] Added content processing system --- lib/cache.js | 240 ++++++++++++++++++ lib/content.js | 287 +++++++++++++++------ lib/contentProcessor.js | 376 ++++++++++++++++++++++++++++ lib/mdx.js | 262 +++++++++++++++++++ tests/unit/contentProcessor.test.js | 104 ++++++++ 5 files changed, 1192 insertions(+), 77 deletions(-) create mode 100644 lib/cache.js create mode 100644 lib/contentProcessor.js create mode 100644 lib/mdx.js create mode 100644 tests/unit/contentProcessor.test.js diff --git a/lib/cache.js b/lib/cache.js new file mode 100644 index 0000000..fff6558 --- /dev/null +++ b/lib/cache.js @@ -0,0 +1,240 @@ +/** + * Content caching utilities for improved performance + */ + +// In-memory cache for blog posts +const blogPostCache = new Map(); +const blogListCache = new Map(); +const tagCache = new Map(); +const authorCache = new Map(); + +// Cache configuration +const CACHE_TTL = 5 * 60 * 1000; // 5 minutes in milliseconds +const MAX_CACHE_SIZE = 100; // Maximum number of cached items + +/** + * Cache entry with timestamp + */ +class CacheEntry { + constructor(data) { + this.data = data; + this.timestamp = Date.now(); + } + + isExpired() { + return Date.now() - this.timestamp > CACHE_TTL; + } +} + +/** + * Get cached blog post data + * @param {string} key - Cache key + * @returns {Object|null} Cached data or null if not found/expired + */ +function getCached(key) { + const entry = blogPostCache.get(key); + if (!entry || entry.isExpired()) { + blogPostCache.delete(key); + return null; + } + return entry.data; +} + +/** + * Set cached blog post data + * @param {string} key - Cache key + * @param {Object} data - Data to cache + */ +function setCached(key, data) { + // Implement LRU eviction if cache is full + if (blogPostCache.size >= MAX_CACHE_SIZE) { + const oldestKey = blogPostCache.keys().next().value; + blogPostCache.delete(oldestKey); + } + + blogPostCache.set(key, new CacheEntry(data)); +} + +/** + * Clear expired cache entries + */ +function clearExpiredCache() { + for (const [key, entry] of blogPostCache.entries()) { + if (entry.isExpired()) { + blogPostCache.delete(key); + } + } +} + +/** + * Clear all caches + */ +export function clearAllCaches() { + blogPostCache.clear(); + blogListCache.clear(); + tagCache.clear(); + authorCache.clear(); +} + +/** + * Get cached blog post by slug + * @param {string} slug - Blog post slug + * @returns {Object|null} Cached blog post or null + */ +export function getCachedBlogPost(slug) { + return getCached(`post:${slug}`); +} + +/** + * Cache blog post data + * @param {string} slug - Blog post slug + * @param {Object} postData - Blog post data + */ +export function cacheBlogPost(slug, postData) { + setCached(`post:${slug}`, postData); +} + +/** + * Get cached blog post list + * @param {string} key - Cache key for list (e.g., 'all', 'recent', 'tag:governance') + * @returns {Array|null} Cached list or null + */ +export function getCachedBlogList(key) { + const entry = blogListCache.get(key); + if (!entry || entry.isExpired()) { + blogListCache.delete(key); + return null; + } + return entry.data; +} + +/** + * Cache blog post list + * @param {string} key - Cache key + * @param {Array} listData - List data to cache + */ +export function cacheBlogList(key, listData) { + blogListCache.set(key, new CacheEntry(listData)); +} + +/** + * Get cached tags + * @returns {Array|null} Cached tags or null + */ +export function getCachedTags() { + const entry = tagCache.get("all"); + if (!entry || entry.isExpired()) { + tagCache.delete("all"); + return null; + } + return entry.data; +} + +/** + * Cache tags + * @param {Array} tags - Tags to cache + */ +export function cacheTags(tags) { + tagCache.set("all", new CacheEntry(tags)); +} + +/** + * Get cached authors + * @returns {Array|null} Cached authors or null + */ +export function getCachedAuthors() { + const entry = authorCache.get("all"); + if (!entry || entry.isExpired()) { + authorCache.delete("all"); + return null; + } + return entry.data; +} + +/** + * Cache authors + * @param {Array} authors - Authors to cache + */ +export function cacheAuthors(authors) { + authorCache.set("all", new CacheEntry(authors)); +} + +/** + * Invalidate cache for a specific blog post + * @param {string} slug - Blog post slug + */ +export function invalidateBlogPostCache(slug) { + blogPostCache.delete(`post:${slug}`); + // Also invalidate list caches since they might contain this post + blogListCache.clear(); +} + +/** + * Invalidate all caches + */ +export function invalidateAllCaches() { + clearAllCaches(); +} + +/** + * Get cache statistics + * @returns {Object} Cache statistics + */ +export function getCacheStats() { + clearExpiredCache(); + + return { + blogPostCacheSize: blogPostCache.size, + blogListCacheSize: blogListCache.size, + tagCacheSize: tagCache.size, + authorCacheSize: authorCache.size, + totalCacheSize: + blogPostCache.size + blogListCache.size + tagCache.size + authorCacheSize, + maxCacheSize: MAX_CACHE_SIZE, + cacheTTL: CACHE_TTL, + }; +} + +/** + * Warm up cache with frequently accessed data + * @param {Function} getAllPosts - Function to get all blog posts + * @param {Function} getAllTags - Function to get all tags + */ +export async function warmCache(getAllPosts, getAllTags) { + try { + // Cache all blog posts + const allPosts = getAllPosts(); + cacheBlogList("all", allPosts); + + // Cache recent posts + const recentPosts = allPosts.slice(0, 5); + cacheBlogList("recent", recentPosts); + + // Cache tags + const tags = getAllTags(); + cacheTags(tags); + + // Cache individual posts (first 10) + allPosts.slice(0, 10).forEach((post) => { + cacheBlogPost(post.slug, post); + }); + + console.log("Cache warmed up successfully"); + } catch (error) { + console.error("Error warming up cache:", error); + } +} + +/** + * Check if cache is healthy + * @returns {boolean} True if cache is healthy + */ +export function isCacheHealthy() { + try { + clearExpiredCache(); + return blogPostCache.size < MAX_CACHE_SIZE; + } catch (error) { + console.error("Cache health check failed:", error); + return false; + } +} diff --git a/lib/content.js b/lib/content.js index 2c284c5..5f4678e 100644 --- a/lib/content.js +++ b/lib/content.js @@ -7,6 +7,54 @@ import { validateBlogPost, sanitizeBlogPost } from "./validation.js"; * Content processing utilities for blog posts */ +/** + * Convert markdown content to HTML with basic formatting + * @param {string} markdown - Raw markdown content + * @returns {string} HTML content + */ +function markdownToHtml(markdown) { + if (!markdown) return ""; + + return ( + markdown + // Headers + .replace(/^### (.*$)/gim, "

$1

") + .replace(/^## (.*$)/gim, "

$1

") + .replace(/^# (.*$)/gim, "

$1

") + // Bold and italic + .replace(/\*\*(.*?)\*\*/g, "$1") + .replace(/\*(.*?)\*/g, "$1") + // Links + .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1') + // Lists + .replace(/^\* (.*$)/gim, "
  • $1
  • ") + .replace(/^- (.*$)/gim, "
  • $1
  • ") + .replace(/(
  • .*<\/li>)/gim, "") + // Paragraphs + .replace(/\n\n/g, "

    ") + .replace(/^(?!<[h|u|li])(.*$)/gim, "

    $1

    ") + // Clean up empty paragraphs + .replace(/

    <\/p>/g, "") + .replace(/

    (.*?)<\/p>/g, (match, content) => { + return content.trim() ? match : ""; + }) + ); +} + +/** + * Generate a URL-friendly slug from a string + * @param {string} text - Text to convert to slug + * @returns {string} URL-friendly slug + */ +function generateSlug(text) { + return text + .toLowerCase() + .replace(/[^\w\s-]/g, "") // Remove special characters + .replace(/\s+/g, "-") // Replace spaces with hyphens + .replace(/-+/g, "-") // Replace multiple hyphens with single + .trim(); +} + /** * Get all blog post files from the content directory * @returns {Array} Array of file paths @@ -31,125 +79,210 @@ export function getBlogPostFiles() { * @returns {Object|null} Parsed blog post data or null if invalid */ export function parseBlogPost(filePath) { - try { - const fullPath = path.join(process.cwd(), "content/blog", filePath); - const fileContents = fs.readFileSync(fullPath, "utf8"); - const { data: frontmatter, content } = matter(fileContents); + const fullPath = path.join(process.cwd(), "content/blog", filePath); - // Validate frontmatter - const validation = validateBlogPost(frontmatter); - if (!validation.isValid) { - console.error(`Validation failed for ${filePath}:`, validation.errors); + try { + const fileContents = fs.readFileSync(fullPath, "utf8"); + const { data, content } = matter(fileContents); + + const validationResult = validateBlogPost(data); + if (!validationResult.isValid) { + console.error( + `Validation errors for ${filePath}:`, + validationResult.errors + ); return null; } - // Sanitize frontmatter - const sanitized = sanitizeBlogPost(frontmatter); - - // Generate slug from filename - const slug = filePath.replace(/\.(md|mdx)$/, ""); + const sanitizedFrontmatter = sanitizeBlogPost(data); + const slug = generateSlug(filePath.replace(/\.mdx?$/, "")); return { slug, - frontmatter: sanitized, + frontmatter: sanitizedFrontmatter, content, + htmlContent: markdownToHtml(content), filePath, + lastModified: fs.statSync(fullPath).mtime, }; } catch (error) { - console.error(`Error parsing blog post ${filePath}:`, error); + console.error(`Error parsing blog post file ${filePath}:`, error); return null; } } /** - * Get all blog posts with parsed data + * Get all blog posts, sorted by date * @returns {Array} Array of parsed blog post objects */ export function getAllBlogPosts() { - const files = getBlogPostFiles(); - const posts = files - .map((file) => parseBlogPost(file)) - .filter((post) => post !== null) + const fileNames = getBlogPostFiles(); + const allPosts = fileNames + .map((fileName) => parseBlogPost(fileName)) + .filter(Boolean) // Filter out nulls (invalid posts) .sort( (a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date) - ); - - return posts; + ); // Sort by date descending + return allPosts; } /** - * Get a single blog post by slug - * @param {string} slug - The post slug - * @returns {Object|null} Parsed blog post or null if not found + * Get a single blog post by its slug + * @param {string} slug - The slug of the blog post + * @returns {Object|null} The parsed blog post data or null if not found */ export function getBlogPostBySlug(slug) { - const files = getBlogPostFiles(); - const file = files.find((f) => f.replace(/\.(md|mdx)$/, "") === slug); - - if (!file) { - return null; - } - - return parseBlogPost(file); -} - -/** - * Get related blog posts - * @param {string} currentSlug - Current post slug - * @param {Array} relatedSlugs - Array of related post slugs - * @param {number} limit - Maximum number of related posts to return - * @returns {Array} Array of related blog posts - */ -export function getRelatedBlogPosts(currentSlug, relatedSlugs = [], limit = 3) { - if (!relatedSlugs || relatedSlugs.length === 0) { - // Fallback: get posts with similar tags or recent posts - const allPosts = getAllBlogPosts(); - return allPosts.filter((post) => post.slug !== currentSlug).slice(0, limit); - } - const allPosts = getAllBlogPosts(); - const related = allPosts - .filter((post) => relatedSlugs.includes(post.slug)) - .slice(0, limit); - - // If we don't have enough related posts, fill with recent ones - if (related.length < limit) { - const recent = allPosts - .filter( - (post) => post.slug !== currentSlug && !relatedSlugs.includes(post.slug) - ) - .slice(0, limit - related.length); - return [...related, ...recent]; - } - - return related; + return allPosts.find((post) => post.slug === slug) || null; } /** - * Get all unique tags from blog posts - * @returns {Array} Array of unique tags + * Get related blog posts based on provided slugs or fallback to recent posts. + * @param {string} currentPostSlug - The slug of the current post to exclude. + * @param {string[]} relatedSlugs - Array of slugs for explicitly related posts. + * @param {number} limit - Maximum number of related posts to return. + * @returns {Array} Array of related blog post objects. + */ +export function getRelatedBlogPosts( + currentPostSlug, + relatedSlugs = [], + limit = 3 +) { + const allPosts = getAllBlogPosts(); + const filteredPosts = allPosts.filter( + (post) => post.slug !== currentPostSlug + ); + + let related = []; + if (relatedSlugs && relatedSlugs.length > 0) { + related = relatedSlugs + .map((slug) => filteredPosts.find((post) => post.slug === slug)) + .filter(Boolean); // Filter out any related slugs that don't exist + } + + // If not enough related posts, or no related slugs provided, fill with recent posts + if (related.length < limit) { + const remainingSlots = limit - related.length; + const existingRelatedSlugs = new Set(related.map((p) => p.slug)); + const recentPosts = filteredPosts + .filter((post) => !existingRelatedSlugs.has(post.slug)) + .slice(0, remainingSlots); + related = [...related, ...recentPosts]; + } + + return related.slice(0, limit); +} + +/** + * Get all unique tags from all blog posts. + * @returns {string[]} Array of unique tags. */ export function getAllTags() { - const posts = getAllBlogPosts(); + const allPosts = getAllBlogPosts(); const tags = new Set(); - - posts.forEach((post) => { + allPosts.forEach((post) => { if (post.frontmatter.tags) { post.frontmatter.tags.forEach((tag) => tags.add(tag)); } }); - - return Array.from(tags).sort(); + return Array.from(tags); } /** - * Get blog posts by tag - * @param {string} tag - Tag to filter by - * @returns {Array} Array of blog posts with the specified tag + * Get blog posts filtered by a specific tag. + * @param {string} tag - The tag to filter by. + * @returns {Object[]} Array of blog post objects matching the tag. */ export function getBlogPostsByTag(tag) { - const posts = getAllBlogPosts(); - return posts.filter( + const allPosts = getAllBlogPosts(); + return allPosts.filter( (post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag) ); } + +/** + * Search blog posts by text content + * @param {string} query - Search query + * @param {number} limit - Maximum number of results + * @returns {Object[]} Array of matching blog post objects + */ +export function searchBlogPosts(query, limit = 10) { + if (!query || query.trim() === "") return []; + + const searchTerm = query.toLowerCase().trim(); + const allPosts = getAllBlogPosts(); + + const results = allPosts.filter((post) => { + const titleMatch = post.frontmatter.title + .toLowerCase() + .includes(searchTerm); + const descriptionMatch = post.frontmatter.description + .toLowerCase() + .includes(searchTerm); + const contentMatch = post.content.toLowerCase().includes(searchTerm); + const tagMatch = post.frontmatter.tags?.some((tag) => + tag.toLowerCase().includes(searchTerm) + ); + + return titleMatch || descriptionMatch || contentMatch || tagMatch; + }); + + return results.slice(0, limit); +} + +/** + * Get blog posts by author + * @param {string} author - Author name to filter by + * @returns {Object[]} Array of blog post objects by the author + */ +export function getBlogPostsByAuthor(author) { + const allPosts = getAllBlogPosts(); + return allPosts.filter( + (post) => post.frontmatter.author.toLowerCase() === author.toLowerCase() + ); +} + +/** + * Get recent blog posts + * @param {number} limit - Maximum number of posts to return + * @returns {Object[]} Array of recent blog post objects + */ +export function getRecentBlogPosts(limit = 5) { + const allPosts = getAllBlogPosts(); + return allPosts.slice(0, limit); +} + +/** + * Get blog post statistics + * @returns {Object} Statistics about blog posts + */ +export function getBlogStats() { + const allPosts = getAllBlogPosts(); + const tags = getAllTags(); + + return { + totalPosts: allPosts.length, + totalTags: tags.length, + totalAuthors: new Set(allPosts.map((post) => post.frontmatter.author)).size, + dateRange: { + earliest: + allPosts.length > 0 + ? allPosts[allPosts.length - 1].frontmatter.date + : null, + latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null, + }, + averagePostsPerMonth: + allPosts.length > 0 + ? Math.round( + (allPosts.length / + Math.max( + 1, + (new Date(allPosts[0].frontmatter.date) - + new Date(allPosts[allPosts.length - 1].frontmatter.date)) / + (1000 * 60 * 60 * 24 * 30) + )) * + 10 + ) / 10 + : 0, + }; +} diff --git a/lib/contentProcessor.js b/lib/contentProcessor.js new file mode 100644 index 0000000..260345e --- /dev/null +++ b/lib/contentProcessor.js @@ -0,0 +1,376 @@ +/** + * Comprehensive content processing system for blog posts + */ + +import { + processMarkdown, + generateTableOfContents, + processFrontmatter, +} from "./mdx.js"; +import { validateBlogPost, sanitizeBlogPost } from "./validation.js"; +import { + getCachedBlogPost, + cacheBlogPost, + getCachedBlogList, + cacheBlogList, + getCachedTags, + cacheTags, + warmCache, +} from "./cache.js"; +import fs from "fs"; +import path from "path"; + +/** + * Main content processor class + */ +class ContentProcessor { + constructor() { + this.contentDirectory = path.join(process.cwd(), "content/blog"); + this.processedPosts = new Map(); + this.isInitialized = false; + } + + /** + * Initialize the content processor + */ + async initialize() { + if (this.isInitialized) return; + + try { + // Warm up cache + await warmCache( + () => this.getAllPosts(), + () => this.getAllTags() + ); + + this.isInitialized = true; + console.log("Content processor initialized successfully"); + } catch (error) { + console.error("Failed to initialize content processor:", error); + throw error; + } + } + + /** + * Get all blog post files + * @returns {Array} Array of file paths + */ + getBlogPostFiles() { + try { + const files = fs.readdirSync(this.contentDirectory); + return files.filter( + (file) => file.endsWith(".md") || file.endsWith(".mdx") + ); + } catch (error) { + console.error("Error reading blog content directory:", error); + return []; + } + } + + /** + * Process a single blog post file + * @param {string} filePath - Path to the markdown file + * @returns {Object|null} Processed blog post data or null if invalid + */ + processBlogPost(filePath) { + const fullPath = path.join(this.contentDirectory, filePath); + + try { + const fileContents = fs.readFileSync(fullPath, "utf8"); + const { data, content } = require("gray-matter")(fileContents); + + // Validate frontmatter + const validationResult = validateBlogPost(data); + if (!validationResult.isValid) { + console.error( + `Validation errors for ${filePath}:`, + validationResult.errors + ); + return null; + } + + // Sanitize frontmatter + const sanitizedFrontmatter = sanitizeBlogPost(data); + + // Process markdown content + const processedContent = processMarkdown(content); + + // Generate slug + const slug = this.generateSlug(filePath.replace(/\.mdx?$/, "")); + + // Get file stats + const stats = fs.statSync(fullPath); + + // Create processed post object + const processedPost = { + slug, + frontmatter: processFrontmatter(sanitizedFrontmatter), + content: processedContent.content, + htmlContent: processedContent.htmlContent, + wordCount: processedContent.wordCount, + readingTime: processedContent.readingTime, + headings: processedContent.headings, + links: processedContent.links, + images: processedContent.images, + tableOfContents: generateTableOfContents(processedContent.headings), + filePath, + lastModified: stats.mtime, + fileSize: stats.size, + metadata: { + processedAt: new Date(), + processorVersion: "1.0.0", + }, + }; + + // Cache the processed post + cacheBlogPost(slug, processedPost); + + return processedPost; + } catch (error) { + console.error(`Error processing blog post file ${filePath}:`, error); + return null; + } + } + + /** + * Get all blog posts with caching + * @returns {Array} Array of processed blog post objects + */ + getAllPosts() { + // Check cache first + const cached = getCachedBlogList("all"); + if (cached) return cached; + + const fileNames = this.getBlogPostFiles(); + const allPosts = fileNames + .map((fileName) => this.processBlogPost(fileName)) + .filter(Boolean) + .sort( + (a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date) + ); + + // Cache the result + cacheBlogList("all", allPosts); + + return allPosts; + } + + /** + * Get a single blog post by slug with caching + * @param {string} slug - The slug of the blog post + * @returns {Object|null} The processed blog post data or null if not found + */ + getBlogPostBySlug(slug) { + // Check cache first + const cached = getCachedBlogPost(slug); + if (cached) return cached; + + // If not in cache, find and process the post + const allPosts = this.getAllPosts(); + const post = allPosts.find((post) => post.slug === slug); + + if (post) { + cacheBlogPost(slug, post); + return post; + } + + return null; + } + + /** + * Get recent blog posts + * @param {number} limit - Maximum number of posts to return + * @returns {Array} Array of recent blog post objects + */ + getRecentPosts(limit = 5) { + const cacheKey = `recent:${limit}`; + const cached = getCachedBlogList(cacheKey); + if (cached) return cached; + + const allPosts = this.getAllPosts(); + const recentPosts = allPosts.slice(0, limit); + + cacheBlogList(cacheKey, recentPosts); + return recentPosts; + } + + /** + * Get blog posts by tag + * @param {string} tag - The tag to filter by + * @returns {Array} Array of blog post objects matching the tag + */ + getPostsByTag(tag) { + const cacheKey = `tag:${tag}`; + const cached = getCachedBlogList(cacheKey); + if (cached) return cached; + + const allPosts = this.getAllPosts(); + const taggedPosts = allPosts.filter( + (post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag) + ); + + cacheBlogList(cacheKey, taggedPosts); + return taggedPosts; + } + + /** + * Get all unique tags with caching + * @returns {Array} Array of unique tags + */ + getAllTags() { + const cached = getCachedTags(); + if (cached) return cached; + + const allPosts = this.getAllPosts(); + const tags = new Set(); + allPosts.forEach((post) => { + if (post.frontmatter.tags) { + post.frontmatter.tags.forEach((tag) => tags.add(tag)); + } + }); + + const tagsArray = Array.from(tags).sort(); + cacheTags(tagsArray); + return tagsArray; + } + + /** + * Search blog posts + * @param {string} query - Search query + * @param {number} limit - Maximum number of results + * @returns {Array} Array of matching blog post objects + */ + searchPosts(query, limit = 10) { + if (!query || query.trim() === "") return []; + + const searchTerm = query.toLowerCase().trim(); + const allPosts = this.getAllPosts(); + + const results = allPosts.filter((post) => { + const titleMatch = post.frontmatter.title + .toLowerCase() + .includes(searchTerm); + const descriptionMatch = post.frontmatter.description + .toLowerCase() + .includes(searchTerm); + const contentMatch = post.content.toLowerCase().includes(searchTerm); + const tagMatch = post.frontmatter.tags?.some((tag) => + tag.toLowerCase().includes(searchTerm) + ); + + return titleMatch || descriptionMatch || contentMatch || tagMatch; + }); + + return results.slice(0, limit); + } + + /** + * Get blog statistics + * @returns {Object} Statistics about blog posts + */ + getBlogStats() { + const allPosts = this.getAllPosts(); + const tags = this.getAllTags(); + + return { + totalPosts: allPosts.length, + totalTags: tags.length, + totalAuthors: new Set( + allPosts.map((post) => post.frontmatter.author).size + ), + totalWords: allPosts.reduce((sum, post) => sum + post.wordCount, 0), + averageReadingTime: + allPosts.length > 0 + ? Math.round( + allPosts.reduce((sum, post) => sum + post.readingTime, 0) / + allPosts.length + ) + : 0, + dateRange: { + earliest: + allPosts.length > 0 + ? allPosts[allPosts.length - 1].frontmatter.date + : null, + latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null, + }, + averagePostsPerMonth: + allPosts.length > 0 + ? Math.round( + (allPosts.length / + Math.max( + 1, + (new Date(allPosts[0].frontmatter.date) - + new Date(allPosts[allPosts.length - 1].frontmatter.date)) / + (1000 * 60 * 60 * 24 * 30) + )) * + 10 + ) / 10 + : 0, + }; + } + + /** + * Generate a URL-friendly slug from a string + * @param {string} text - Text to convert to slug + * @returns {string} URL-friendly slug + */ + generateSlug(text) { + return text + .toLowerCase() + .replace(/[^\w\s-]/g, "") + .replace(/\s+/g, "-") + .replace(/-+/g, "-") + .trim(); + } + + /** + * Refresh content (reprocess all posts) + * @returns {Array} Array of reprocessed blog post objects + */ + refreshContent() { + console.log("Refreshing content..."); + + // Clear processed posts cache + this.processedPosts.clear(); + + // Reprocess all posts + const allPosts = this.getAllPosts(); + + console.log(`Refreshed ${allPosts.length} blog posts`); + return allPosts; + } + + /** + * Get content processing status + * @returns {Object} Status information + */ + getStatus() { + return { + isInitialized: this.isInitialized, + totalFiles: this.getBlogPostFiles().length, + processedPosts: this.processedPosts.size, + contentDirectory: this.contentDirectory, + lastRefresh: new Date().toISOString(), + }; + } +} + +// Create and export singleton instance +const contentProcessor = new ContentProcessor(); + +// Export the instance and convenience functions +export { contentProcessor }; + +// Export convenience functions bound to the instance +export const getAllPosts = () => contentProcessor.getAllPosts(); +export const getBlogPostBySlug = (slug) => + contentProcessor.getBlogPostBySlug(slug); +export const getRecentPosts = (limit) => contentProcessor.getRecentPosts(limit); +export const getPostsByTag = (tag) => contentProcessor.getPostsByTag(tag); +export const getAllTags = () => contentProcessor.getAllTags(); +export const searchPosts = (query, limit) => + contentProcessor.searchPosts(query, limit); +export const getBlogStats = () => contentProcessor.getBlogStats(); +export const refreshContent = () => contentProcessor.refreshContent(); +export const getStatus = () => contentProcessor.getStatus(); +export const initialize = () => contentProcessor.initialize(); diff --git a/lib/mdx.js b/lib/mdx.js new file mode 100644 index 0000000..0178b25 --- /dev/null +++ b/lib/mdx.js @@ -0,0 +1,262 @@ +/** + * MDX processing utilities for enhanced markdown content + */ + +/** + * Process markdown content with enhanced features + * @param {string} markdown - Raw markdown content + * @returns {Object} Processed content with metadata + */ +export function processMarkdown(markdown) { + if (!markdown) { + return { + content: "", + htmlContent: "", + wordCount: 0, + readingTime: 0, + headings: [], + links: [], + images: [], + }; + } + + // Extract headings for table of contents + const headings = extractHeadings(markdown); + + // Extract links + const links = extractLinks(markdown); + + // Extract images + const images = extractImages(markdown); + + // Calculate word count and reading time + const wordCount = calculateWordCount(markdown); + const readingTime = calculateReadingTime(wordCount); + + return { + content: markdown, + htmlContent: markdownToHtml(markdown), + wordCount, + readingTime, + headings, + links, + images, + }; +} + +/** + * Extract all headings from markdown content + * @param {string} markdown - Raw markdown content + * @returns {Array} Array of heading objects with level, text, and id + */ +function extractHeadings(markdown) { + const headingRegex = /^(#{1,6})\s+(.+)$/gm; + const headings = []; + let match; + + while ((match = headingRegex.exec(markdown)) !== null) { + const level = match[1].length; + const text = match[2].trim(); + const id = generateHeadingId(text); + + headings.push({ + level, + text, + id, + line: markdown.substring(0, match.index).split("\n").length, + }); + } + + return headings; +} + +/** + * Extract all links from markdown content + * @param {string} markdown - Raw markdown content + * @returns {Array} Array of link objects + */ +function extractLinks(markdown) { + const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g; + const links = []; + let match; + + while ((match = linkRegex.exec(markdown)) !== null) { + links.push({ + text: match[1], + url: match[2], + index: match.index, + }); + } + + return links; +} + +/** + * Extract all images from markdown content + * @param {string} markdown - Raw markdown content + * @returns {Array} Array of image objects + */ +function extractImages(markdown) { + const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g; + const images = []; + let match; + + while ((match = imageRegex.exec(markdown)) !== null) { + images.push({ + alt: match[1], + src: match[2], + index: match.index, + }); + } + + return images; +} + +/** + * Generate a unique ID for a heading + * @param {string} text - Heading text + * @returns {string} Unique ID + */ +function generateHeadingId(text) { + return text + .toLowerCase() + .replace(/[^\w\s-]/g, "") + .replace(/\s+/g, "-") + .replace(/-+/g, "-") + .trim(); +} + +/** + * Calculate word count from markdown content + * @param {string} markdown - Raw markdown content + * @returns {number} Word count + */ +function calculateWordCount(markdown) { + // Remove markdown syntax and count words + const cleanText = markdown + .replace(/[#*`~\[\]()]/g, "") // Remove markdown characters + .replace(/\n+/g, " ") // Replace newlines with spaces + .trim(); + + return cleanText.split(/\s+/).filter((word) => word.length > 0).length; +} + +/** + * Calculate estimated reading time + * @param {number} wordCount - Number of words + * @returns {number} Reading time in minutes + */ +function calculateReadingTime(wordCount) { + const wordsPerMinute = 200; // Average reading speed + return Math.ceil(wordCount / wordsPerMinute); +} + +/** + * Convert markdown to HTML with enhanced formatting + * @param {string} markdown - Raw markdown content + * @returns {string} HTML content + */ +function markdownToHtml(markdown) { + if (!markdown) return ""; + + return ( + markdown + // Headers with IDs + .replace(/^### (.*$)/gim, (match, text) => { + const id = generateHeadingId(text); + return `

    ${text}

    `; + }) + .replace(/^## (.*$)/gim, (match, text) => { + const id = generateHeadingId(text); + return `

    ${text}

    `; + }) + .replace(/^# (.*$)/gim, (match, text) => { + const id = generateHeadingId(text); + return `

    ${text}

    `; + }) + // Bold and italic + .replace(/\*\*(.*?)\*\*/g, "$1") + .replace(/\*(.*?)\*/g, "$1") + // Code blocks + .replace( + /```(\w+)?\n([\s\S]*?)\n```/g, + '
    $2
    ' + ) + .replace(/`([^`]+)`/g, "$1") + // Links + .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '$1') + // Lists + .replace(/^\* (.*$)/gim, "
  • $1
  • ") + .replace(/^- (.*$)/gim, "
  • $1
  • ") + .replace(/(
  • .*<\/li>)/gim, "") + // Blockquotes + .replace(/^> (.*$)/gim, "

    $1

    ") + // Horizontal rules + .replace(/^---$/gm, "
    ") + .replace(/^\*\*\*$/gm, "
    ") + // Paragraphs + .replace(/\n\n/g, "

    ") + .replace(/^(?!<[h|u|li|blockquote|hr|pre])(.*$)/gim, "

    $1

    ") + // Clean up empty paragraphs and fix list wrapping + .replace(/

    <\/p>/g, "") + .replace(/

    (.*?)<\/p>/g, (match, content) => { + return content.trim() ? match : ""; + }) + .replace(/<\/ul>\s*