From b54ddb16ba38851dbd0fe97892b83821f0957a6c Mon Sep 17 00:00:00 2001
From: adilallo <39313955+adilallo@users.noreply.github.com>
Date: Thu, 4 Sep 2025 10:49:48 -0600
Subject: [PATCH] Added content processing system

---
 lib/cache.js                        | 240 ++++++++++++++++++
 lib/content.js                      | 287 +++++++++++++++------
 lib/contentProcessor.js             | 376 ++++++++++++++++++++++++++++
 lib/mdx.js                          | 262 +++++++++++++++++++
 tests/unit/contentProcessor.test.js | 104 ++++++++
 5 files changed, 1192 insertions(+), 77 deletions(-)
 create mode 100644 lib/cache.js
 create mode 100644 lib/contentProcessor.js
 create mode 100644 lib/mdx.js
 create mode 100644 tests/unit/contentProcessor.test.js

diff --git a/lib/cache.js b/lib/cache.js
new file mode 100644
index 0000000..fff6558
--- /dev/null
+++ b/lib/cache.js
@@ -0,0 +1,240 @@
+/**
+ * Content caching utilities for improved performance
+ */
+
+// In-memory cache for blog posts
+const blogPostCache = new Map();
+const blogListCache = new Map();
+const tagCache = new Map();
+const authorCache = new Map();
+
+// Cache configuration
+const CACHE_TTL = 5 * 60 * 1000; // 5 minutes in milliseconds
+const MAX_CACHE_SIZE = 100; // Maximum number of cached items
+
+/**
+ * Cache entry with timestamp
+ */
+class CacheEntry {
+  constructor(data) {
+    this.data = data;
+    this.timestamp = Date.now();
+  }
+
+  isExpired() {
+    return Date.now() - this.timestamp > CACHE_TTL;
+  }
+}
+
+/**
+ * Get cached blog post data
+ * @param {string} key - Cache key
+ * @returns {Object|null} Cached data or null if not found/expired
+ */
+function getCached(key) {
+  const entry = blogPostCache.get(key);
+  if (!entry || entry.isExpired()) {
+    blogPostCache.delete(key);
+    return null;
+  }
+  return entry.data;
+}
+
+/**
+ * Set cached blog post data
+ * @param {string} key - Cache key
+ * @param {Object} data - Data to cache
+ */
+function setCached(key, data) {
+  // Implement LRU eviction if cache is full
+  if (blogPostCache.size >= MAX_CACHE_SIZE) {
+    const oldestKey = blogPostCache.keys().next().value;
+    blogPostCache.delete(oldestKey);
+  }
+
+  blogPostCache.set(key, new CacheEntry(data));
+}
+
+/**
+ * Clear expired cache entries
+ */
+function clearExpiredCache() {
+  for (const [key, entry] of blogPostCache.entries()) {
+    if (entry.isExpired()) {
+      blogPostCache.delete(key);
+    }
+  }
+}
+
+/**
+ * Clear all caches
+ */
+export function clearAllCaches() {
+  blogPostCache.clear();
+  blogListCache.clear();
+  tagCache.clear();
+  authorCache.clear();
+}
+
+/**
+ * Get cached blog post by slug
+ * @param {string} slug - Blog post slug
+ * @returns {Object|null} Cached blog post or null
+ */
+export function getCachedBlogPost(slug) {
+  return getCached(`post:${slug}`);
+}
+
+/**
+ * Cache blog post data
+ * @param {string} slug - Blog post slug
+ * @param {Object} postData - Blog post data
+ */
+export function cacheBlogPost(slug, postData) {
+  setCached(`post:${slug}`, postData);
+}
+
+/**
+ * Get cached blog post list
+ * @param {string} key - Cache key for list (e.g., 'all', 'recent', 'tag:governance')
+ * @returns {Array|null} Cached list or null
+ */
+export function getCachedBlogList(key) {
+  const entry = blogListCache.get(key);
+  if (!entry || entry.isExpired()) {
+    blogListCache.delete(key);
+    return null;
+  }
+  return entry.data;
+}
+
+/**
+ * Cache blog post list
+ * @param {string} key - Cache key
+ * @param {Array} listData - List data to cache
+ */
+export function cacheBlogList(key, listData) {
+  blogListCache.set(key, new CacheEntry(listData));
+}
+
+/**
+ * Get cached tags
+ * @returns {Array|null} Cached tags or null
+ */
+export function getCachedTags() {
+  const entry = tagCache.get("all");
+  if (!entry || entry.isExpired()) {
+    tagCache.delete("all");
+    return null;
+  }
+  return entry.data;
+}
+
+/**
+ * Cache tags
+ * @param {Array} tags - Tags to cache
+ */
+export function cacheTags(tags) {
+  tagCache.set("all", new CacheEntry(tags));
+}
+
+/**
+ * Get cached authors
+ * @returns {Array|null} Cached authors or null
+ */
+export function getCachedAuthors() {
+  const entry = authorCache.get("all");
+  if (!entry || entry.isExpired()) {
+    authorCache.delete("all");
+    return null;
+  }
+  return entry.data;
+}
+
+/**
+ * Cache authors
+ * @param {Array} authors - Authors to cache
+ */
+export function cacheAuthors(authors) {
+  authorCache.set("all", new CacheEntry(authors));
+}
+
+/**
+ * Invalidate cache for a specific blog post
+ * @param {string} slug - Blog post slug
+ */
+export function invalidateBlogPostCache(slug) {
+  blogPostCache.delete(`post:${slug}`);
+  // Also invalidate list caches since they might contain this post
+  blogListCache.clear();
+}
+
+/**
+ * Invalidate all caches
+ */
+export function invalidateAllCaches() {
+  clearAllCaches();
+}
+
+/**
+ * Get cache statistics
+ * @returns {Object} Cache statistics
+ */
+export function getCacheStats() {
+  clearExpiredCache();
+
+  return {
+    blogPostCacheSize: blogPostCache.size,
+    blogListCacheSize: blogListCache.size,
+    tagCacheSize: tagCache.size,
+    authorCacheSize: authorCache.size,
+    totalCacheSize:
+      blogPostCache.size + blogListCache.size + tagCache.size + authorCacheSize,
+    maxCacheSize: MAX_CACHE_SIZE,
+    cacheTTL: CACHE_TTL,
+  };
+}
+
+/**
+ * Warm up cache with frequently accessed data
+ * @param {Function} getAllPosts - Function to get all blog posts
+ * @param {Function} getAllTags - Function to get all tags
+ */
+export async function warmCache(getAllPosts, getAllTags) {
+  try {
+    // Cache all blog posts
+    const allPosts = getAllPosts();
+    cacheBlogList("all", allPosts);
+
+    // Cache recent posts
+    const recentPosts = allPosts.slice(0, 5);
+    cacheBlogList("recent", recentPosts);
+
+    // Cache tags
+    const tags = getAllTags();
+    cacheTags(tags);
+
+    // Cache individual posts (first 10)
+    allPosts.slice(0, 10).forEach((post) => {
+      cacheBlogPost(post.slug, post);
+    });
+
+    console.log("Cache warmed up successfully");
+  } catch (error) {
+    console.error("Error warming up cache:", error);
+  }
+}
+
+/**
+ * Check if cache is healthy
+ * @returns {boolean} True if cache is healthy
+ */
+export function isCacheHealthy() {
+  try {
+    clearExpiredCache();
+    return blogPostCache.size < MAX_CACHE_SIZE;
+  } catch (error) {
+    console.error("Cache health check failed:", error);
+    return false;
+  }
+}
diff --git a/lib/content.js b/lib/content.js
index 2c284c5..5f4678e 100644
--- a/lib/content.js
+++ b/lib/content.js
@@ -7,6 +7,54 @@ import { validateBlogPost, sanitizeBlogPost } from "./validation.js";
  * Content processing utilities for blog posts
  */
 
+/**
+ * Convert markdown content to HTML with basic formatting
+ * @param {string} markdown - Raw markdown content
+ * @returns {string} HTML content
+ */
+function markdownToHtml(markdown) {
+  if (!markdown) return "";
+
+  return (
+    markdown
+      // Headers
+      .replace(/^### (.*$)/gim, "<h3>$1</h3>")
+      .replace(/^## (.*$)/gim, "<h2>$1</h2>")
+      .replace(/^# (.*$)/gim, "<h1>$1</h1>")
+      // Bold and italic
+      .replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>")
+      .replace(/\*(.*?)\*/g, "<em>$1</em>")
+      // Links
+      .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
+      // Lists
+      .replace(/^\* (.*$)/gim, "<li>$1</li>")
+      .replace(/^- (.*$)/gim, "<li>$1</li>")
+      .replace(/(<li>.*<\/li>)/gim, "<ul>$1</ul>")
+      // Paragraphs
+      .replace(/\n\n/g, "</p><p>")
+      .replace(/^(?!<[h|u|li])(.*$)/gim, "<p>$1</p>")
+      // Clean up empty paragraphs
+      .replace(/<p><\/p>/g, "")
+      .replace(/<p>(.*?)<\/p>/g, (match, content) => {
+        return content.trim() ? match : "";
+      })
+  );
+}
+
+/**
+ * Generate a URL-friendly slug from a string
+ * @param {string} text - Text to convert to slug
+ * @returns {string} URL-friendly slug
+ */
+function generateSlug(text) {
+  return text
+    .toLowerCase()
+    .replace(/[^\w\s-]/g, "") // Remove special characters
+    .replace(/\s+/g, "-") // Replace spaces with hyphens
+    .replace(/-+/g, "-") // Replace multiple hyphens with single
+    .trim();
+}
+
 /**
  * Get all blog post files from the content directory
  * @returns {Array} Array of file paths
@@ -31,125 +79,210 @@ export function getBlogPostFiles() {
  * @returns {Object|null} Parsed blog post data or null if invalid
  */
 export function parseBlogPost(filePath) {
-  try {
-    const fullPath = path.join(process.cwd(), "content/blog", filePath);
-    const fileContents = fs.readFileSync(fullPath, "utf8");
-    const { data: frontmatter, content } = matter(fileContents);
+  const fullPath = path.join(process.cwd(), "content/blog", filePath);
 
-    // Validate frontmatter
-    const validation = validateBlogPost(frontmatter);
-    if (!validation.isValid) {
-      console.error(`Validation failed for ${filePath}:`, validation.errors);
+  try {
+    const fileContents = fs.readFileSync(fullPath, "utf8");
+    const { data, content } = matter(fileContents);
+
+    const validationResult = validateBlogPost(data);
+    if (!validationResult.isValid) {
+      console.error(
+        `Validation errors for ${filePath}:`,
+        validationResult.errors
+      );
       return null;
     }
 
-    // Sanitize frontmatter
-    const sanitized = sanitizeBlogPost(frontmatter);
-
-    // Generate slug from filename
-    const slug = filePath.replace(/\.(md|mdx)$/, "");
+    const sanitizedFrontmatter = sanitizeBlogPost(data);
+    const slug = generateSlug(filePath.replace(/\.mdx?$/, ""));
 
     return {
       slug,
-      frontmatter: sanitized,
+      frontmatter: sanitizedFrontmatter,
       content,
+      htmlContent: markdownToHtml(content),
       filePath,
+      lastModified: fs.statSync(fullPath).mtime,
     };
   } catch (error) {
-    console.error(`Error parsing blog post ${filePath}:`, error);
+    console.error(`Error parsing blog post file ${filePath}:`, error);
     return null;
   }
 }
 
 /**
- * Get all blog posts with parsed data
+ * Get all blog posts, sorted by date
  * @returns {Array} Array of parsed blog post objects
  */
 export function getAllBlogPosts() {
-  const files = getBlogPostFiles();
-  const posts = files
-    .map((file) => parseBlogPost(file))
-    .filter((post) => post !== null)
+  const fileNames = getBlogPostFiles();
+  const allPosts = fileNames
+    .map((fileName) => parseBlogPost(fileName))
+    .filter(Boolean) // Filter out nulls (invalid posts)
     .sort(
       (a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date)
-    );
-
-  return posts;
+    ); // Sort by date descending
+  return allPosts;
 }
 
 /**
- * Get a single blog post by slug
- * @param {string} slug - The post slug
- * @returns {Object|null} Parsed blog post or null if not found
+ * Get a single blog post by its slug
+ * @param {string} slug - The slug of the blog post
+ * @returns {Object|null} The parsed blog post data or null if not found
  */
 export function getBlogPostBySlug(slug) {
-  const files = getBlogPostFiles();
-  const file = files.find((f) => f.replace(/\.(md|mdx)$/, "") === slug);
-
-  if (!file) {
-    return null;
-  }
-
-  return parseBlogPost(file);
-}
-
-/**
- * Get related blog posts
- * @param {string} currentSlug - Current post slug
- * @param {Array} relatedSlugs - Array of related post slugs
- * @param {number} limit - Maximum number of related posts to return
- * @returns {Array} Array of related blog posts
- */
-export function getRelatedBlogPosts(currentSlug, relatedSlugs = [], limit = 3) {
-  if (!relatedSlugs || relatedSlugs.length === 0) {
-    // Fallback: get posts with similar tags or recent posts
-    const allPosts = getAllBlogPosts();
-    return allPosts.filter((post) => post.slug !== currentSlug).slice(0, limit);
-  }
-
   const allPosts = getAllBlogPosts();
-  const related = allPosts
-    .filter((post) => relatedSlugs.includes(post.slug))
-    .slice(0, limit);
-
-  // If we don't have enough related posts, fill with recent ones
-  if (related.length < limit) {
-    const recent = allPosts
-      .filter(
-        (post) => post.slug !== currentSlug && !relatedSlugs.includes(post.slug)
-      )
-      .slice(0, limit - related.length);
-    return [...related, ...recent];
-  }
-
-  return related;
+  return allPosts.find((post) => post.slug === slug) || null;
 }
 
 /**
- * Get all unique tags from blog posts
- * @returns {Array} Array of unique tags
+ * Get related blog posts based on provided slugs or fallback to recent posts.
+ * @param {string} currentPostSlug - The slug of the current post to exclude.
+ * @param {string[]} relatedSlugs - Array of slugs for explicitly related posts.
+ * @param {number} limit - Maximum number of related posts to return.
+ * @returns {Array} Array of related blog post objects.
+ */
+export function getRelatedBlogPosts(
+  currentPostSlug,
+  relatedSlugs = [],
+  limit = 3
+) {
+  const allPosts = getAllBlogPosts();
+  const filteredPosts = allPosts.filter(
+    (post) => post.slug !== currentPostSlug
+  );
+
+  let related = [];
+  if (relatedSlugs && relatedSlugs.length > 0) {
+    related = relatedSlugs
+      .map((slug) => filteredPosts.find((post) => post.slug === slug))
+      .filter(Boolean); // Filter out any related slugs that don't exist
+  }
+
+  // If not enough related posts, or no related slugs provided, fill with recent posts
+  if (related.length < limit) {
+    const remainingSlots = limit - related.length;
+    const existingRelatedSlugs = new Set(related.map((p) => p.slug));
+    const recentPosts = filteredPosts
+      .filter((post) => !existingRelatedSlugs.has(post.slug))
+      .slice(0, remainingSlots);
+    related = [...related, ...recentPosts];
+  }
+
+  return related.slice(0, limit);
+}
+
+/**
+ * Get all unique tags from all blog posts.
+ * @returns {string[]} Array of unique tags.
  */
 export function getAllTags() {
-  const posts = getAllBlogPosts();
+  const allPosts = getAllBlogPosts();
   const tags = new Set();
-
-  posts.forEach((post) => {
+  allPosts.forEach((post) => {
     if (post.frontmatter.tags) {
       post.frontmatter.tags.forEach((tag) => tags.add(tag));
     }
   });
-
-  return Array.from(tags).sort();
+  return Array.from(tags);
 }
 
 /**
- * Get blog posts by tag
- * @param {string} tag - Tag to filter by
- * @returns {Array} Array of blog posts with the specified tag
+ * Get blog posts filtered by a specific tag.
+ * @param {string} tag - The tag to filter by.
+ * @returns {Object[]} Array of blog post objects matching the tag.
  */
 export function getBlogPostsByTag(tag) {
-  const posts = getAllBlogPosts();
-  return posts.filter(
+  const allPosts = getAllBlogPosts();
+  return allPosts.filter(
     (post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag)
   );
 }
+
+/**
+ * Search blog posts by text content
+ * @param {string} query - Search query
+ * @param {number} limit - Maximum number of results
+ * @returns {Object[]} Array of matching blog post objects
+ */
+export function searchBlogPosts(query, limit = 10) {
+  if (!query || query.trim() === "") return [];
+
+  const searchTerm = query.toLowerCase().trim();
+  const allPosts = getAllBlogPosts();
+
+  const results = allPosts.filter((post) => {
+    const titleMatch = post.frontmatter.title
+      .toLowerCase()
+      .includes(searchTerm);
+    const descriptionMatch = post.frontmatter.description
+      .toLowerCase()
+      .includes(searchTerm);
+    const contentMatch = post.content.toLowerCase().includes(searchTerm);
+    const tagMatch = post.frontmatter.tags?.some((tag) =>
+      tag.toLowerCase().includes(searchTerm)
+    );
+
+    return titleMatch || descriptionMatch || contentMatch || tagMatch;
+  });
+
+  return results.slice(0, limit);
+}
+
+/**
+ * Get blog posts by author
+ * @param {string} author - Author name to filter by
+ * @returns {Object[]} Array of blog post objects by the author
+ */
+export function getBlogPostsByAuthor(author) {
+  const allPosts = getAllBlogPosts();
+  return allPosts.filter(
+    (post) => post.frontmatter.author.toLowerCase() === author.toLowerCase()
+  );
+}
+
+/**
+ * Get recent blog posts
+ * @param {number} limit - Maximum number of posts to return
+ * @returns {Object[]} Array of recent blog post objects
+ */
+export function getRecentBlogPosts(limit = 5) {
+  const allPosts = getAllBlogPosts();
+  return allPosts.slice(0, limit);
+}
+
+/**
+ * Get blog post statistics
+ * @returns {Object} Statistics about blog posts
+ */
+export function getBlogStats() {
+  const allPosts = getAllBlogPosts();
+  const tags = getAllTags();
+
+  return {
+    totalPosts: allPosts.length,
+    totalTags: tags.length,
+    totalAuthors: new Set(allPosts.map((post) => post.frontmatter.author)).size,
+    dateRange: {
+      earliest:
+        allPosts.length > 0
+          ? allPosts[allPosts.length - 1].frontmatter.date
+          : null,
+      latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null,
+    },
+    averagePostsPerMonth:
+      allPosts.length > 0
+        ? Math.round(
+            (allPosts.length /
+              Math.max(
+                1,
+                (new Date(allPosts[0].frontmatter.date) -
+                  new Date(allPosts[allPosts.length - 1].frontmatter.date)) /
+                  (1000 * 60 * 60 * 24 * 30)
+              )) *
+              10
+          ) / 10
+        : 0,
+  };
+}
diff --git a/lib/contentProcessor.js b/lib/contentProcessor.js
new file mode 100644
index 0000000..260345e
--- /dev/null
+++ b/lib/contentProcessor.js
@@ -0,0 +1,376 @@
+/**
+ * Comprehensive content processing system for blog posts
+ */
+
+import {
+  processMarkdown,
+  generateTableOfContents,
+  processFrontmatter,
+} from "./mdx.js";
+import { validateBlogPost, sanitizeBlogPost } from "./validation.js";
+import {
+  getCachedBlogPost,
+  cacheBlogPost,
+  getCachedBlogList,
+  cacheBlogList,
+  getCachedTags,
+  cacheTags,
+  warmCache,
+} from "./cache.js";
+import fs from "fs";
+import path from "path";
+
+/**
+ * Main content processor class
+ */
+class ContentProcessor {
+  constructor() {
+    this.contentDirectory = path.join(process.cwd(), "content/blog");
+    this.processedPosts = new Map();
+    this.isInitialized = false;
+  }
+
+  /**
+   * Initialize the content processor
+   */
+  async initialize() {
+    if (this.isInitialized) return;
+
+    try {
+      // Warm up cache
+      await warmCache(
+        () => this.getAllPosts(),
+        () => this.getAllTags()
+      );
+
+      this.isInitialized = true;
+      console.log("Content processor initialized successfully");
+    } catch (error) {
+      console.error("Failed to initialize content processor:", error);
+      throw error;
+    }
+  }
+
+  /**
+   * Get all blog post files
+   * @returns {Array} Array of file paths
+   */
+  getBlogPostFiles() {
+    try {
+      const files = fs.readdirSync(this.contentDirectory);
+      return files.filter(
+        (file) => file.endsWith(".md") || file.endsWith(".mdx")
+      );
+    } catch (error) {
+      console.error("Error reading blog content directory:", error);
+      return [];
+    }
+  }
+
+  /**
+   * Process a single blog post file
+   * @param {string} filePath - Path to the markdown file
+   * @returns {Object|null} Processed blog post data or null if invalid
+   */
+  processBlogPost(filePath) {
+    const fullPath = path.join(this.contentDirectory, filePath);
+
+    try {
+      const fileContents = fs.readFileSync(fullPath, "utf8");
+      const { data, content } = require("gray-matter")(fileContents);
+
+      // Validate frontmatter
+      const validationResult = validateBlogPost(data);
+      if (!validationResult.isValid) {
+        console.error(
+          `Validation errors for ${filePath}:`,
+          validationResult.errors
+        );
+        return null;
+      }
+
+      // Sanitize frontmatter
+      const sanitizedFrontmatter = sanitizeBlogPost(data);
+
+      // Process markdown content
+      const processedContent = processMarkdown(content);
+
+      // Generate slug
+      const slug = this.generateSlug(filePath.replace(/\.mdx?$/, ""));
+
+      // Get file stats
+      const stats = fs.statSync(fullPath);
+
+      // Create processed post object
+      const processedPost = {
+        slug,
+        frontmatter: processFrontmatter(sanitizedFrontmatter),
+        content: processedContent.content,
+        htmlContent: processedContent.htmlContent,
+        wordCount: processedContent.wordCount,
+        readingTime: processedContent.readingTime,
+        headings: processedContent.headings,
+        links: processedContent.links,
+        images: processedContent.images,
+        tableOfContents: generateTableOfContents(processedContent.headings),
+        filePath,
+        lastModified: stats.mtime,
+        fileSize: stats.size,
+        metadata: {
+          processedAt: new Date(),
+          processorVersion: "1.0.0",
+        },
+      };
+
+      // Cache the processed post
+      cacheBlogPost(slug, processedPost);
+
+      return processedPost;
+    } catch (error) {
+      console.error(`Error processing blog post file ${filePath}:`, error);
+      return null;
+    }
+  }
+
+  /**
+   * Get all blog posts with caching
+   * @returns {Array} Array of processed blog post objects
+   */
+  getAllPosts() {
+    // Check cache first
+    const cached = getCachedBlogList("all");
+    if (cached) return cached;
+
+    const fileNames = this.getBlogPostFiles();
+    const allPosts = fileNames
+      .map((fileName) => this.processBlogPost(fileName))
+      .filter(Boolean)
+      .sort(
+        (a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date)
+      );
+
+    // Cache the result
+    cacheBlogList("all", allPosts);
+
+    return allPosts;
+  }
+
+  /**
+   * Get a single blog post by slug with caching
+   * @param {string} slug - The slug of the blog post
+   * @returns {Object|null} The processed blog post data or null if not found
+   */
+  getBlogPostBySlug(slug) {
+    // Check cache first
+    const cached = getCachedBlogPost(slug);
+    if (cached) return cached;
+
+    // If not in cache, find and process the post
+    const allPosts = this.getAllPosts();
+    const post = allPosts.find((post) => post.slug === slug);
+
+    if (post) {
+      cacheBlogPost(slug, post);
+      return post;
+    }
+
+    return null;
+  }
+
+  /**
+   * Get recent blog posts
+   * @param {number} limit - Maximum number of posts to return
+   * @returns {Array} Array of recent blog post objects
+   */
+  getRecentPosts(limit = 5) {
+    const cacheKey = `recent:${limit}`;
+    const cached = getCachedBlogList(cacheKey);
+    if (cached) return cached;
+
+    const allPosts = this.getAllPosts();
+    const recentPosts = allPosts.slice(0, limit);
+
+    cacheBlogList(cacheKey, recentPosts);
+    return recentPosts;
+  }
+
+  /**
+   * Get blog posts by tag
+   * @param {string} tag - The tag to filter by
+   * @returns {Array} Array of blog post objects matching the tag
+   */
+  getPostsByTag(tag) {
+    const cacheKey = `tag:${tag}`;
+    const cached = getCachedBlogList(cacheKey);
+    if (cached) return cached;
+
+    const allPosts = this.getAllPosts();
+    const taggedPosts = allPosts.filter(
+      (post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag)
+    );
+
+    cacheBlogList(cacheKey, taggedPosts);
+    return taggedPosts;
+  }
+
+  /**
+   * Get all unique tags with caching
+   * @returns {Array} Array of unique tags
+   */
+  getAllTags() {
+    const cached = getCachedTags();
+    if (cached) return cached;
+
+    const allPosts = this.getAllPosts();
+    const tags = new Set();
+    allPosts.forEach((post) => {
+      if (post.frontmatter.tags) {
+        post.frontmatter.tags.forEach((tag) => tags.add(tag));
+      }
+    });
+
+    const tagsArray = Array.from(tags).sort();
+    cacheTags(tagsArray);
+    return tagsArray;
+  }
+
+  /**
+   * Search blog posts
+   * @param {string} query - Search query
+   * @param {number} limit - Maximum number of results
+   * @returns {Array} Array of matching blog post objects
+   */
+  searchPosts(query, limit = 10) {
+    if (!query || query.trim() === "") return [];
+
+    const searchTerm = query.toLowerCase().trim();
+    const allPosts = this.getAllPosts();
+
+    const results = allPosts.filter((post) => {
+      const titleMatch = post.frontmatter.title
+        .toLowerCase()
+        .includes(searchTerm);
+      const descriptionMatch = post.frontmatter.description
+        .toLowerCase()
+        .includes(searchTerm);
+      const contentMatch = post.content.toLowerCase().includes(searchTerm);
+      const tagMatch = post.frontmatter.tags?.some((tag) =>
+        tag.toLowerCase().includes(searchTerm)
+      );
+
+      return titleMatch || descriptionMatch || contentMatch || tagMatch;
+    });
+
+    return results.slice(0, limit);
+  }
+
+  /**
+   * Get blog statistics
+   * @returns {Object} Statistics about blog posts
+   */
+  getBlogStats() {
+    const allPosts = this.getAllPosts();
+    const tags = this.getAllTags();
+
+    return {
+      totalPosts: allPosts.length,
+      totalTags: tags.length,
+      totalAuthors: new Set(
+        allPosts.map((post) => post.frontmatter.author).size
+      ),
+      totalWords: allPosts.reduce((sum, post) => sum + post.wordCount, 0),
+      averageReadingTime:
+        allPosts.length > 0
+          ? Math.round(
+              allPosts.reduce((sum, post) => sum + post.readingTime, 0) /
+                allPosts.length
+            )
+          : 0,
+      dateRange: {
+        earliest:
+          allPosts.length > 0
+            ? allPosts[allPosts.length - 1].frontmatter.date
+            : null,
+        latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null,
+      },
+      averagePostsPerMonth:
+        allPosts.length > 0
+          ? Math.round(
+              (allPosts.length /
+                Math.max(
+                  1,
+                  (new Date(allPosts[0].frontmatter.date) -
+                    new Date(allPosts[allPosts.length - 1].frontmatter.date)) /
+                    (1000 * 60 * 60 * 24 * 30)
+                )) *
+                10
+            ) / 10
+          : 0,
+    };
+  }
+
+  /**
+   * Generate a URL-friendly slug from a string
+   * @param {string} text - Text to convert to slug
+   * @returns {string} URL-friendly slug
+   */
+  generateSlug(text) {
+    return text
+      .toLowerCase()
+      .replace(/[^\w\s-]/g, "")
+      .replace(/\s+/g, "-")
+      .replace(/-+/g, "-")
+      .trim();
+  }
+
+  /**
+   * Refresh content (reprocess all posts)
+   * @returns {Array} Array of reprocessed blog post objects
+   */
+  refreshContent() {
+    console.log("Refreshing content...");
+
+    // Clear processed posts cache
+    this.processedPosts.clear();
+
+    // Reprocess all posts
+    const allPosts = this.getAllPosts();
+
+    console.log(`Refreshed ${allPosts.length} blog posts`);
+    return allPosts;
+  }
+
+  /**
+   * Get content processing status
+   * @returns {Object} Status information
+   */
+  getStatus() {
+    return {
+      isInitialized: this.isInitialized,
+      totalFiles: this.getBlogPostFiles().length,
+      processedPosts: this.processedPosts.size,
+      contentDirectory: this.contentDirectory,
+      lastRefresh: new Date().toISOString(),
+    };
+  }
+}
+
+// Create and export singleton instance
+const contentProcessor = new ContentProcessor();
+
+// Export the instance and convenience functions
+export { contentProcessor };
+
+// Export convenience functions bound to the instance
+export const getAllPosts = () => contentProcessor.getAllPosts();
+export const getBlogPostBySlug = (slug) =>
+  contentProcessor.getBlogPostBySlug(slug);
+export const getRecentPosts = (limit) => contentProcessor.getRecentPosts(limit);
+export const getPostsByTag = (tag) => contentProcessor.getPostsByTag(tag);
+export const getAllTags = () => contentProcessor.getAllTags();
+export const searchPosts = (query, limit) =>
+  contentProcessor.searchPosts(query, limit);
+export const getBlogStats = () => contentProcessor.getBlogStats();
+export const refreshContent = () => contentProcessor.refreshContent();
+export const getStatus = () => contentProcessor.getStatus();
+export const initialize = () => contentProcessor.initialize();
diff --git a/lib/mdx.js b/lib/mdx.js
new file mode 100644
index 0000000..0178b25
--- /dev/null
+++ b/lib/mdx.js
@@ -0,0 +1,262 @@
+/**
+ * MDX processing utilities for enhanced markdown content
+ */
+
+/**
+ * Process markdown content with enhanced features
+ * @param {string} markdown - Raw markdown content
+ * @returns {Object} Processed content with metadata
+ */
+export function processMarkdown(markdown) {
+  if (!markdown) {
+    return {
+      content: "",
+      htmlContent: "",
+      wordCount: 0,
+      readingTime: 0,
+      headings: [],
+      links: [],
+      images: [],
+    };
+  }
+
+  // Extract headings for table of contents
+  const headings = extractHeadings(markdown);
+
+  // Extract links
+  const links = extractLinks(markdown);
+
+  // Extract images
+  const images = extractImages(markdown);
+
+  // Calculate word count and reading time
+  const wordCount = calculateWordCount(markdown);
+  const readingTime = calculateReadingTime(wordCount);
+
+  return {
+    content: markdown,
+    htmlContent: markdownToHtml(markdown),
+    wordCount,
+    readingTime,
+    headings,
+    links,
+    images,
+  };
+}
+
+/**
+ * Extract all headings from markdown content
+ * @param {string} markdown - Raw markdown content
+ * @returns {Array} Array of heading objects with level, text, and id
+ */
+function extractHeadings(markdown) {
+  const headingRegex = /^(#{1,6})\s+(.+)$/gm;
+  const headings = [];
+  let match;
+
+  while ((match = headingRegex.exec(markdown)) !== null) {
+    const level = match[1].length;
+    const text = match[2].trim();
+    const id = generateHeadingId(text);
+
+    headings.push({
+      level,
+      text,
+      id,
+      line: markdown.substring(0, match.index).split("\n").length,
+    });
+  }
+
+  return headings;
+}
+
+/**
+ * Extract all links from markdown content
+ * @param {string} markdown - Raw markdown content
+ * @returns {Array} Array of link objects
+ */
+function extractLinks(markdown) {
+  const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
+  const links = [];
+  let match;
+
+  while ((match = linkRegex.exec(markdown)) !== null) {
+    links.push({
+      text: match[1],
+      url: match[2],
+      index: match.index,
+    });
+  }
+
+  return links;
+}
+
+/**
+ * Extract all images from markdown content
+ * @param {string} markdown - Raw markdown content
+ * @returns {Array} Array of image objects
+ */
+function extractImages(markdown) {
+  const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
+  const images = [];
+  let match;
+
+  while ((match = imageRegex.exec(markdown)) !== null) {
+    images.push({
+      alt: match[1],
+      src: match[2],
+      index: match.index,
+    });
+  }
+
+  return images;
+}
+
+/**
+ * Generate a unique ID for a heading
+ * @param {string} text - Heading text
+ * @returns {string} Unique ID
+ */
+function generateHeadingId(text) {
+  return text
+    .toLowerCase()
+    .replace(/[^\w\s-]/g, "")
+    .replace(/\s+/g, "-")
+    .replace(/-+/g, "-")
+    .trim();
+}
+
+/**
+ * Calculate word count from markdown content
+ * @param {string} markdown - Raw markdown content
+ * @returns {number} Word count
+ */
+function calculateWordCount(markdown) {
+  // Remove markdown syntax and count words
+  const cleanText = markdown
+    .replace(/[#*`~\[\]()]/g, "") // Remove markdown characters
+    .replace(/\n+/g, " ") // Replace newlines with spaces
+    .trim();
+
+  return cleanText.split(/\s+/).filter((word) => word.length > 0).length;
+}
+
+/**
+ * Calculate estimated reading time
+ * @param {number} wordCount - Number of words
+ * @returns {number} Reading time in minutes
+ */
+function calculateReadingTime(wordCount) {
+  const wordsPerMinute = 200; // Average reading speed
+  return Math.ceil(wordCount / wordsPerMinute);
+}
+
+/**
+ * Convert markdown to HTML with enhanced formatting
+ * @param {string} markdown - Raw markdown content
+ * @returns {string} HTML content
+ */
+function markdownToHtml(markdown) {
+  if (!markdown) return "";
+
+  return (
+    markdown
+      // Headers with IDs
+      .replace(/^### (.*$)/gim, (match, text) => {
+        const id = generateHeadingId(text);
+        return `<h3 id="${id}">${text}</h3>`;
+      })
+      .replace(/^## (.*$)/gim, (match, text) => {
+        const id = generateHeadingId(text);
+        return `<h2 id="${id}">${text}</h2>`;
+      })
+      .replace(/^# (.*$)/gim, (match, text) => {
+        const id = generateHeadingId(text);
+        return `<h1 id="${id}">${text}</h1>`;
+      })
+      // Bold and italic
+      .replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>")
+      .replace(/\*(.*?)\*/g, "<em>$1</em>")
+      // Code blocks
+      .replace(
+        /```(\w+)?\n([\s\S]*?)\n```/g,
+        '<pre><code class="language-$1">$2</code></pre>'
+      )
+      .replace(/`([^`]+)`/g, "<code>$1</code>")
+      // Links
+      .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
+      // Lists
+      .replace(/^\* (.*$)/gim, "<li>$1</li>")
+      .replace(/^- (.*$)/gim, "<li>$1</li>")
+      .replace(/(<li>.*<\/li>)/gim, "<ul>$1</ul>")
+      // Blockquotes
+      .replace(/^> (.*$)/gim, "<blockquote><p>$1</p></blockquote>")
+      // Horizontal rules
+      .replace(/^---$/gm, "<hr>")
+      .replace(/^\*\*\*$/gm, "<hr>")
+      // Paragraphs
+      .replace(/\n\n/g, "</p><p>")
+      .replace(/^(?!<[h|u|li|blockquote|hr|pre])(.*$)/gim, "<p>$1</p>")
+      // Clean up empty paragraphs and fix list wrapping
+      .replace(/<p><\/p>/g, "")
+      .replace(/<p>(.*?)<\/p>/g, (match, content) => {
+        return content.trim() ? match : "";
+      })
+      .replace(/<\/ul>\s*<ul>/g, "") // Merge consecutive ul elements
+      .replace(/<ul>\s*<\/ul>/g, "")
+  ); // Remove empty ul elements
+}
+
+/**
+ * Generate a table of contents from headings
+ * @param {Array} headings - Array of heading objects
+ * @returns {string} HTML table of contents
+ */
+export function generateTableOfContents(headings) {
+  if (!headings || headings.length === 0) return "";
+
+  let toc = '<nav class="table-of-contents"><h4>Table of Contents</h4><ul>';
+
+  headings.forEach((heading) => {
+    const indent = (heading.level - 1) * 20;
+    toc += `<li style="margin-left: ${indent}px"><a href="#${heading.id}">${heading.text}</a></li>`;
+  });
+
+  toc += "</ul></nav>";
+  return toc;
+}
+
+/**
+ * Process frontmatter with enhanced validation
+ * @param {Object} frontmatter - Raw frontmatter data
+ * @returns {Object} Processed and validated frontmatter
+ */
+export function processFrontmatter(frontmatter) {
+  // Add computed fields
+  const processed = {
+    ...frontmatter,
+    publishedDate: new Date(frontmatter.date),
+    year: new Date(frontmatter.date).getFullYear(),
+    month: new Date(frontmatter.date).getMonth() + 1,
+    day: new Date(frontmatter.date).getDate(),
+    isRecent: isRecentPost(frontmatter.date),
+    readingTime: frontmatter.content
+      ? calculateReadingTime(calculateWordCount(frontmatter.content))
+      : 0,
+  };
+
+  return processed;
+}
+
+/**
+ * Check if a post is recent (within last 30 days)
+ * @param {string} date - Post date string
+ * @returns {boolean} True if post is recent
+ */
+function isRecentPost(date) {
+  const postDate = new Date(date);
+  const thirtyDaysAgo = new Date();
+  thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
+
+  return postDate > thirtyDaysAgo;
+}
diff --git a/tests/unit/contentProcessor.test.js b/tests/unit/contentProcessor.test.js
new file mode 100644
index 0000000..96e5c90
--- /dev/null
+++ b/tests/unit/contentProcessor.test.js
@@ -0,0 +1,104 @@
+import { describe, it, expect, beforeAll } from "vitest";
+import {
+  contentProcessor,
+  getAllPosts,
+  getBlogStats,
+  getAllTags,
+} from "../../lib/contentProcessor.js";
+
+describe("Content Processor", () => {
+  beforeAll(async () => {
+    await contentProcessor.initialize();
+  });
+
+  describe("Basic Functionality", () => {
+    it("should initialize successfully", () => {
+      expect(contentProcessor.isInitialized).toBe(true);
+    });
+
+    it("should process blog posts", () => {
+      const posts = getAllPosts();
+      expect(Array.isArray(posts)).toBe(true);
+      expect(posts.length).toBeGreaterThan(0);
+    });
+
+    it("should extract blog statistics", () => {
+      const stats = getBlogStats();
+      expect(stats.totalPosts).toBeGreaterThan(0);
+      expect(stats.totalTags).toBeGreaterThan(0);
+      expect(stats.totalWords).toBeGreaterThan(0);
+    });
+
+    it("should extract tags from posts", () => {
+      const tags = getAllTags();
+      expect(Array.isArray(tags)).toBe(true);
+      expect(tags.length).toBeGreaterThan(0);
+    });
+  });
+
+  describe("Post Processing", () => {
+    it("should process markdown content correctly", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(firstPost).toHaveProperty("frontmatter");
+      expect(firstPost).toHaveProperty("content");
+      expect(firstPost).toHaveProperty("htmlContent");
+      expect(firstPost).toHaveProperty("wordCount");
+      expect(firstPost).toHaveProperty("readingTime");
+      expect(firstPost).toHaveProperty("headings");
+      expect(firstPost).toHaveProperty("tableOfContents");
+    });
+
+    it("should generate proper slugs", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(firstPost.slug).toBeDefined();
+      expect(typeof firstPost.slug).toBe("string");
+      expect(firstPost.slug.length).toBeGreaterThan(0);
+    });
+
+    it("should calculate word count and reading time", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(firstPost.wordCount).toBeGreaterThan(0);
+      expect(firstPost.readingTime).toBeGreaterThan(0);
+      expect(typeof firstPost.wordCount).toBe("number");
+      expect(typeof firstPost.readingTime).toBe("number");
+    });
+  });
+
+  describe("Content Enhancement", () => {
+    it("should extract headings for table of contents", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(Array.isArray(firstPost.headings)).toBe(true);
+      if (firstPost.headings.length > 0) {
+        expect(firstPost.headings[0]).toHaveProperty("level");
+        expect(firstPost.headings[0]).toHaveProperty("text");
+        expect(firstPost.headings[0]).toHaveProperty("id");
+      }
+    });
+
+    it("should generate HTML content", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(firstPost.htmlContent).toBeDefined();
+      expect(typeof firstPost.htmlContent).toBe("string");
+      expect(firstPost.htmlContent.length).toBeGreaterThan(0);
+      expect(firstPost.htmlContent).toContain("<");
+    });
+
+    it("should generate table of contents", () => {
+      const posts = getAllPosts();
+      const firstPost = posts[0];
+
+      expect(firstPost.tableOfContents).toBeDefined();
+      expect(typeof firstPost.tableOfContents).toBe("string");
+    });
+  });
+});