Added content processing system

This commit is contained in:
adilallo
2025-09-04 10:49:48 -06:00
parent 3d6d4ed251
commit b54ddb16ba
5 changed files with 1192 additions and 77 deletions
+240
View File
@@ -0,0 +1,240 @@
/**
* Content caching utilities for improved performance
*/
// In-memory cache for blog posts
const blogPostCache = new Map();
const blogListCache = new Map();
const tagCache = new Map();
const authorCache = new Map();
// Cache configuration
const CACHE_TTL = 5 * 60 * 1000; // 5 minutes in milliseconds
const MAX_CACHE_SIZE = 100; // Maximum number of cached items
/**
* Cache entry with timestamp
*/
class CacheEntry {
constructor(data) {
this.data = data;
this.timestamp = Date.now();
}
isExpired() {
return Date.now() - this.timestamp > CACHE_TTL;
}
}
/**
* Get cached blog post data
* @param {string} key - Cache key
* @returns {Object|null} Cached data or null if not found/expired
*/
function getCached(key) {
const entry = blogPostCache.get(key);
if (!entry || entry.isExpired()) {
blogPostCache.delete(key);
return null;
}
return entry.data;
}
/**
* Set cached blog post data
* @param {string} key - Cache key
* @param {Object} data - Data to cache
*/
function setCached(key, data) {
// Implement LRU eviction if cache is full
if (blogPostCache.size >= MAX_CACHE_SIZE) {
const oldestKey = blogPostCache.keys().next().value;
blogPostCache.delete(oldestKey);
}
blogPostCache.set(key, new CacheEntry(data));
}
/**
* Clear expired cache entries
*/
function clearExpiredCache() {
for (const [key, entry] of blogPostCache.entries()) {
if (entry.isExpired()) {
blogPostCache.delete(key);
}
}
}
/**
* Clear all caches
*/
export function clearAllCaches() {
blogPostCache.clear();
blogListCache.clear();
tagCache.clear();
authorCache.clear();
}
/**
* Get cached blog post by slug
* @param {string} slug - Blog post slug
* @returns {Object|null} Cached blog post or null
*/
export function getCachedBlogPost(slug) {
return getCached(`post:${slug}`);
}
/**
* Cache blog post data
* @param {string} slug - Blog post slug
* @param {Object} postData - Blog post data
*/
export function cacheBlogPost(slug, postData) {
setCached(`post:${slug}`, postData);
}
/**
* Get cached blog post list
* @param {string} key - Cache key for list (e.g., 'all', 'recent', 'tag:governance')
* @returns {Array|null} Cached list or null
*/
export function getCachedBlogList(key) {
const entry = blogListCache.get(key);
if (!entry || entry.isExpired()) {
blogListCache.delete(key);
return null;
}
return entry.data;
}
/**
* Cache blog post list
* @param {string} key - Cache key
* @param {Array} listData - List data to cache
*/
export function cacheBlogList(key, listData) {
blogListCache.set(key, new CacheEntry(listData));
}
/**
* Get cached tags
* @returns {Array|null} Cached tags or null
*/
export function getCachedTags() {
const entry = tagCache.get("all");
if (!entry || entry.isExpired()) {
tagCache.delete("all");
return null;
}
return entry.data;
}
/**
* Cache tags
* @param {Array} tags - Tags to cache
*/
export function cacheTags(tags) {
tagCache.set("all", new CacheEntry(tags));
}
/**
* Get cached authors
* @returns {Array|null} Cached authors or null
*/
export function getCachedAuthors() {
const entry = authorCache.get("all");
if (!entry || entry.isExpired()) {
authorCache.delete("all");
return null;
}
return entry.data;
}
/**
* Cache authors
* @param {Array} authors - Authors to cache
*/
export function cacheAuthors(authors) {
authorCache.set("all", new CacheEntry(authors));
}
/**
* Invalidate cache for a specific blog post
* @param {string} slug - Blog post slug
*/
export function invalidateBlogPostCache(slug) {
blogPostCache.delete(`post:${slug}`);
// Also invalidate list caches since they might contain this post
blogListCache.clear();
}
/**
* Invalidate all caches
*/
export function invalidateAllCaches() {
clearAllCaches();
}
/**
* Get cache statistics
* @returns {Object} Cache statistics
*/
export function getCacheStats() {
clearExpiredCache();
return {
blogPostCacheSize: blogPostCache.size,
blogListCacheSize: blogListCache.size,
tagCacheSize: tagCache.size,
authorCacheSize: authorCache.size,
totalCacheSize:
blogPostCache.size + blogListCache.size + tagCache.size + authorCacheSize,
maxCacheSize: MAX_CACHE_SIZE,
cacheTTL: CACHE_TTL,
};
}
/**
* Warm up cache with frequently accessed data
* @param {Function} getAllPosts - Function to get all blog posts
* @param {Function} getAllTags - Function to get all tags
*/
export async function warmCache(getAllPosts, getAllTags) {
try {
// Cache all blog posts
const allPosts = getAllPosts();
cacheBlogList("all", allPosts);
// Cache recent posts
const recentPosts = allPosts.slice(0, 5);
cacheBlogList("recent", recentPosts);
// Cache tags
const tags = getAllTags();
cacheTags(tags);
// Cache individual posts (first 10)
allPosts.slice(0, 10).forEach((post) => {
cacheBlogPost(post.slug, post);
});
console.log("Cache warmed up successfully");
} catch (error) {
console.error("Error warming up cache:", error);
}
}
/**
* Check if cache is healthy
* @returns {boolean} True if cache is healthy
*/
export function isCacheHealthy() {
try {
clearExpiredCache();
return blogPostCache.size < MAX_CACHE_SIZE;
} catch (error) {
console.error("Cache health check failed:", error);
return false;
}
}
+210 -77
View File
@@ -7,6 +7,54 @@ import { validateBlogPost, sanitizeBlogPost } from "./validation.js";
* Content processing utilities for blog posts
*/
/**
* Convert markdown content to HTML with basic formatting
* @param {string} markdown - Raw markdown content
* @returns {string} HTML content
*/
function markdownToHtml(markdown) {
if (!markdown) return "";
return (
markdown
// Headers
.replace(/^### (.*$)/gim, "<h3>$1</h3>")
.replace(/^## (.*$)/gim, "<h2>$1</h2>")
.replace(/^# (.*$)/gim, "<h1>$1</h1>")
// Bold and italic
.replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>")
.replace(/\*(.*?)\*/g, "<em>$1</em>")
// Links
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
// Lists
.replace(/^\* (.*$)/gim, "<li>$1</li>")
.replace(/^- (.*$)/gim, "<li>$1</li>")
.replace(/(<li>.*<\/li>)/gim, "<ul>$1</ul>")
// Paragraphs
.replace(/\n\n/g, "</p><p>")
.replace(/^(?!<[h|u|li])(.*$)/gim, "<p>$1</p>")
// Clean up empty paragraphs
.replace(/<p><\/p>/g, "")
.replace(/<p>(.*?)<\/p>/g, (match, content) => {
return content.trim() ? match : "";
})
);
}
/**
* Generate a URL-friendly slug from a string
* @param {string} text - Text to convert to slug
* @returns {string} URL-friendly slug
*/
function generateSlug(text) {
return text
.toLowerCase()
.replace(/[^\w\s-]/g, "") // Remove special characters
.replace(/\s+/g, "-") // Replace spaces with hyphens
.replace(/-+/g, "-") // Replace multiple hyphens with single
.trim();
}
/**
* Get all blog post files from the content directory
* @returns {Array} Array of file paths
@@ -31,125 +79,210 @@ export function getBlogPostFiles() {
* @returns {Object|null} Parsed blog post data or null if invalid
*/
export function parseBlogPost(filePath) {
try {
const fullPath = path.join(process.cwd(), "content/blog", filePath);
const fileContents = fs.readFileSync(fullPath, "utf8");
const { data: frontmatter, content } = matter(fileContents);
const fullPath = path.join(process.cwd(), "content/blog", filePath);
// Validate frontmatter
const validation = validateBlogPost(frontmatter);
if (!validation.isValid) {
console.error(`Validation failed for ${filePath}:`, validation.errors);
try {
const fileContents = fs.readFileSync(fullPath, "utf8");
const { data, content } = matter(fileContents);
const validationResult = validateBlogPost(data);
if (!validationResult.isValid) {
console.error(
`Validation errors for ${filePath}:`,
validationResult.errors
);
return null;
}
// Sanitize frontmatter
const sanitized = sanitizeBlogPost(frontmatter);
// Generate slug from filename
const slug = filePath.replace(/\.(md|mdx)$/, "");
const sanitizedFrontmatter = sanitizeBlogPost(data);
const slug = generateSlug(filePath.replace(/\.mdx?$/, ""));
return {
slug,
frontmatter: sanitized,
frontmatter: sanitizedFrontmatter,
content,
htmlContent: markdownToHtml(content),
filePath,
lastModified: fs.statSync(fullPath).mtime,
};
} catch (error) {
console.error(`Error parsing blog post ${filePath}:`, error);
console.error(`Error parsing blog post file ${filePath}:`, error);
return null;
}
}
/**
* Get all blog posts with parsed data
* Get all blog posts, sorted by date
* @returns {Array} Array of parsed blog post objects
*/
export function getAllBlogPosts() {
const files = getBlogPostFiles();
const posts = files
.map((file) => parseBlogPost(file))
.filter((post) => post !== null)
const fileNames = getBlogPostFiles();
const allPosts = fileNames
.map((fileName) => parseBlogPost(fileName))
.filter(Boolean) // Filter out nulls (invalid posts)
.sort(
(a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date)
);
return posts;
); // Sort by date descending
return allPosts;
}
/**
* Get a single blog post by slug
* @param {string} slug - The post slug
* @returns {Object|null} Parsed blog post or null if not found
* Get a single blog post by its slug
* @param {string} slug - The slug of the blog post
* @returns {Object|null} The parsed blog post data or null if not found
*/
export function getBlogPostBySlug(slug) {
const files = getBlogPostFiles();
const file = files.find((f) => f.replace(/\.(md|mdx)$/, "") === slug);
if (!file) {
return null;
}
return parseBlogPost(file);
}
/**
* Get related blog posts
* @param {string} currentSlug - Current post slug
* @param {Array} relatedSlugs - Array of related post slugs
* @param {number} limit - Maximum number of related posts to return
* @returns {Array} Array of related blog posts
*/
export function getRelatedBlogPosts(currentSlug, relatedSlugs = [], limit = 3) {
if (!relatedSlugs || relatedSlugs.length === 0) {
// Fallback: get posts with similar tags or recent posts
const allPosts = getAllBlogPosts();
return allPosts.filter((post) => post.slug !== currentSlug).slice(0, limit);
}
const allPosts = getAllBlogPosts();
const related = allPosts
.filter((post) => relatedSlugs.includes(post.slug))
.slice(0, limit);
// If we don't have enough related posts, fill with recent ones
if (related.length < limit) {
const recent = allPosts
.filter(
(post) => post.slug !== currentSlug && !relatedSlugs.includes(post.slug)
)
.slice(0, limit - related.length);
return [...related, ...recent];
}
return related;
return allPosts.find((post) => post.slug === slug) || null;
}
/**
* Get all unique tags from blog posts
* @returns {Array} Array of unique tags
* Get related blog posts based on provided slugs or fallback to recent posts.
* @param {string} currentPostSlug - The slug of the current post to exclude.
* @param {string[]} relatedSlugs - Array of slugs for explicitly related posts.
* @param {number} limit - Maximum number of related posts to return.
* @returns {Array} Array of related blog post objects.
*/
export function getRelatedBlogPosts(
currentPostSlug,
relatedSlugs = [],
limit = 3
) {
const allPosts = getAllBlogPosts();
const filteredPosts = allPosts.filter(
(post) => post.slug !== currentPostSlug
);
let related = [];
if (relatedSlugs && relatedSlugs.length > 0) {
related = relatedSlugs
.map((slug) => filteredPosts.find((post) => post.slug === slug))
.filter(Boolean); // Filter out any related slugs that don't exist
}
// If not enough related posts, or no related slugs provided, fill with recent posts
if (related.length < limit) {
const remainingSlots = limit - related.length;
const existingRelatedSlugs = new Set(related.map((p) => p.slug));
const recentPosts = filteredPosts
.filter((post) => !existingRelatedSlugs.has(post.slug))
.slice(0, remainingSlots);
related = [...related, ...recentPosts];
}
return related.slice(0, limit);
}
/**
* Get all unique tags from all blog posts.
* @returns {string[]} Array of unique tags.
*/
export function getAllTags() {
const posts = getAllBlogPosts();
const allPosts = getAllBlogPosts();
const tags = new Set();
posts.forEach((post) => {
allPosts.forEach((post) => {
if (post.frontmatter.tags) {
post.frontmatter.tags.forEach((tag) => tags.add(tag));
}
});
return Array.from(tags).sort();
return Array.from(tags);
}
/**
* Get blog posts by tag
* @param {string} tag - Tag to filter by
* @returns {Array} Array of blog posts with the specified tag
* Get blog posts filtered by a specific tag.
* @param {string} tag - The tag to filter by.
* @returns {Object[]} Array of blog post objects matching the tag.
*/
export function getBlogPostsByTag(tag) {
const posts = getAllBlogPosts();
return posts.filter(
const allPosts = getAllBlogPosts();
return allPosts.filter(
(post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag)
);
}
/**
* Search blog posts by text content
* @param {string} query - Search query
* @param {number} limit - Maximum number of results
* @returns {Object[]} Array of matching blog post objects
*/
export function searchBlogPosts(query, limit = 10) {
if (!query || query.trim() === "") return [];
const searchTerm = query.toLowerCase().trim();
const allPosts = getAllBlogPosts();
const results = allPosts.filter((post) => {
const titleMatch = post.frontmatter.title
.toLowerCase()
.includes(searchTerm);
const descriptionMatch = post.frontmatter.description
.toLowerCase()
.includes(searchTerm);
const contentMatch = post.content.toLowerCase().includes(searchTerm);
const tagMatch = post.frontmatter.tags?.some((tag) =>
tag.toLowerCase().includes(searchTerm)
);
return titleMatch || descriptionMatch || contentMatch || tagMatch;
});
return results.slice(0, limit);
}
/**
* Get blog posts by author
* @param {string} author - Author name to filter by
* @returns {Object[]} Array of blog post objects by the author
*/
export function getBlogPostsByAuthor(author) {
const allPosts = getAllBlogPosts();
return allPosts.filter(
(post) => post.frontmatter.author.toLowerCase() === author.toLowerCase()
);
}
/**
* Get recent blog posts
* @param {number} limit - Maximum number of posts to return
* @returns {Object[]} Array of recent blog post objects
*/
export function getRecentBlogPosts(limit = 5) {
const allPosts = getAllBlogPosts();
return allPosts.slice(0, limit);
}
/**
* Get blog post statistics
* @returns {Object} Statistics about blog posts
*/
export function getBlogStats() {
const allPosts = getAllBlogPosts();
const tags = getAllTags();
return {
totalPosts: allPosts.length,
totalTags: tags.length,
totalAuthors: new Set(allPosts.map((post) => post.frontmatter.author)).size,
dateRange: {
earliest:
allPosts.length > 0
? allPosts[allPosts.length - 1].frontmatter.date
: null,
latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null,
},
averagePostsPerMonth:
allPosts.length > 0
? Math.round(
(allPosts.length /
Math.max(
1,
(new Date(allPosts[0].frontmatter.date) -
new Date(allPosts[allPosts.length - 1].frontmatter.date)) /
(1000 * 60 * 60 * 24 * 30)
)) *
10
) / 10
: 0,
};
}
+376
View File
@@ -0,0 +1,376 @@
/**
* Comprehensive content processing system for blog posts
*/
import {
processMarkdown,
generateTableOfContents,
processFrontmatter,
} from "./mdx.js";
import { validateBlogPost, sanitizeBlogPost } from "./validation.js";
import {
getCachedBlogPost,
cacheBlogPost,
getCachedBlogList,
cacheBlogList,
getCachedTags,
cacheTags,
warmCache,
} from "./cache.js";
import fs from "fs";
import path from "path";
/**
* Main content processor class
*/
class ContentProcessor {
constructor() {
this.contentDirectory = path.join(process.cwd(), "content/blog");
this.processedPosts = new Map();
this.isInitialized = false;
}
/**
* Initialize the content processor
*/
async initialize() {
if (this.isInitialized) return;
try {
// Warm up cache
await warmCache(
() => this.getAllPosts(),
() => this.getAllTags()
);
this.isInitialized = true;
console.log("Content processor initialized successfully");
} catch (error) {
console.error("Failed to initialize content processor:", error);
throw error;
}
}
/**
* Get all blog post files
* @returns {Array} Array of file paths
*/
getBlogPostFiles() {
try {
const files = fs.readdirSync(this.contentDirectory);
return files.filter(
(file) => file.endsWith(".md") || file.endsWith(".mdx")
);
} catch (error) {
console.error("Error reading blog content directory:", error);
return [];
}
}
/**
* Process a single blog post file
* @param {string} filePath - Path to the markdown file
* @returns {Object|null} Processed blog post data or null if invalid
*/
processBlogPost(filePath) {
const fullPath = path.join(this.contentDirectory, filePath);
try {
const fileContents = fs.readFileSync(fullPath, "utf8");
const { data, content } = require("gray-matter")(fileContents);
// Validate frontmatter
const validationResult = validateBlogPost(data);
if (!validationResult.isValid) {
console.error(
`Validation errors for ${filePath}:`,
validationResult.errors
);
return null;
}
// Sanitize frontmatter
const sanitizedFrontmatter = sanitizeBlogPost(data);
// Process markdown content
const processedContent = processMarkdown(content);
// Generate slug
const slug = this.generateSlug(filePath.replace(/\.mdx?$/, ""));
// Get file stats
const stats = fs.statSync(fullPath);
// Create processed post object
const processedPost = {
slug,
frontmatter: processFrontmatter(sanitizedFrontmatter),
content: processedContent.content,
htmlContent: processedContent.htmlContent,
wordCount: processedContent.wordCount,
readingTime: processedContent.readingTime,
headings: processedContent.headings,
links: processedContent.links,
images: processedContent.images,
tableOfContents: generateTableOfContents(processedContent.headings),
filePath,
lastModified: stats.mtime,
fileSize: stats.size,
metadata: {
processedAt: new Date(),
processorVersion: "1.0.0",
},
};
// Cache the processed post
cacheBlogPost(slug, processedPost);
return processedPost;
} catch (error) {
console.error(`Error processing blog post file ${filePath}:`, error);
return null;
}
}
/**
* Get all blog posts with caching
* @returns {Array} Array of processed blog post objects
*/
getAllPosts() {
// Check cache first
const cached = getCachedBlogList("all");
if (cached) return cached;
const fileNames = this.getBlogPostFiles();
const allPosts = fileNames
.map((fileName) => this.processBlogPost(fileName))
.filter(Boolean)
.sort(
(a, b) => new Date(b.frontmatter.date) - new Date(a.frontmatter.date)
);
// Cache the result
cacheBlogList("all", allPosts);
return allPosts;
}
/**
* Get a single blog post by slug with caching
* @param {string} slug - The slug of the blog post
* @returns {Object|null} The processed blog post data or null if not found
*/
getBlogPostBySlug(slug) {
// Check cache first
const cached = getCachedBlogPost(slug);
if (cached) return cached;
// If not in cache, find and process the post
const allPosts = this.getAllPosts();
const post = allPosts.find((post) => post.slug === slug);
if (post) {
cacheBlogPost(slug, post);
return post;
}
return null;
}
/**
* Get recent blog posts
* @param {number} limit - Maximum number of posts to return
* @returns {Array} Array of recent blog post objects
*/
getRecentPosts(limit = 5) {
const cacheKey = `recent:${limit}`;
const cached = getCachedBlogList(cacheKey);
if (cached) return cached;
const allPosts = this.getAllPosts();
const recentPosts = allPosts.slice(0, limit);
cacheBlogList(cacheKey, recentPosts);
return recentPosts;
}
/**
* Get blog posts by tag
* @param {string} tag - The tag to filter by
* @returns {Array} Array of blog post objects matching the tag
*/
getPostsByTag(tag) {
const cacheKey = `tag:${tag}`;
const cached = getCachedBlogList(cacheKey);
if (cached) return cached;
const allPosts = this.getAllPosts();
const taggedPosts = allPosts.filter(
(post) => post.frontmatter.tags && post.frontmatter.tags.includes(tag)
);
cacheBlogList(cacheKey, taggedPosts);
return taggedPosts;
}
/**
* Get all unique tags with caching
* @returns {Array} Array of unique tags
*/
getAllTags() {
const cached = getCachedTags();
if (cached) return cached;
const allPosts = this.getAllPosts();
const tags = new Set();
allPosts.forEach((post) => {
if (post.frontmatter.tags) {
post.frontmatter.tags.forEach((tag) => tags.add(tag));
}
});
const tagsArray = Array.from(tags).sort();
cacheTags(tagsArray);
return tagsArray;
}
/**
* Search blog posts
* @param {string} query - Search query
* @param {number} limit - Maximum number of results
* @returns {Array} Array of matching blog post objects
*/
searchPosts(query, limit = 10) {
if (!query || query.trim() === "") return [];
const searchTerm = query.toLowerCase().trim();
const allPosts = this.getAllPosts();
const results = allPosts.filter((post) => {
const titleMatch = post.frontmatter.title
.toLowerCase()
.includes(searchTerm);
const descriptionMatch = post.frontmatter.description
.toLowerCase()
.includes(searchTerm);
const contentMatch = post.content.toLowerCase().includes(searchTerm);
const tagMatch = post.frontmatter.tags?.some((tag) =>
tag.toLowerCase().includes(searchTerm)
);
return titleMatch || descriptionMatch || contentMatch || tagMatch;
});
return results.slice(0, limit);
}
/**
* Get blog statistics
* @returns {Object} Statistics about blog posts
*/
getBlogStats() {
const allPosts = this.getAllPosts();
const tags = this.getAllTags();
return {
totalPosts: allPosts.length,
totalTags: tags.length,
totalAuthors: new Set(
allPosts.map((post) => post.frontmatter.author).size
),
totalWords: allPosts.reduce((sum, post) => sum + post.wordCount, 0),
averageReadingTime:
allPosts.length > 0
? Math.round(
allPosts.reduce((sum, post) => sum + post.readingTime, 0) /
allPosts.length
)
: 0,
dateRange: {
earliest:
allPosts.length > 0
? allPosts[allPosts.length - 1].frontmatter.date
: null,
latest: allPosts.length > 0 ? allPosts[0].frontmatter.date : null,
},
averagePostsPerMonth:
allPosts.length > 0
? Math.round(
(allPosts.length /
Math.max(
1,
(new Date(allPosts[0].frontmatter.date) -
new Date(allPosts[allPosts.length - 1].frontmatter.date)) /
(1000 * 60 * 60 * 24 * 30)
)) *
10
) / 10
: 0,
};
}
/**
* Generate a URL-friendly slug from a string
* @param {string} text - Text to convert to slug
* @returns {string} URL-friendly slug
*/
generateSlug(text) {
return text
.toLowerCase()
.replace(/[^\w\s-]/g, "")
.replace(/\s+/g, "-")
.replace(/-+/g, "-")
.trim();
}
/**
* Refresh content (reprocess all posts)
* @returns {Array} Array of reprocessed blog post objects
*/
refreshContent() {
console.log("Refreshing content...");
// Clear processed posts cache
this.processedPosts.clear();
// Reprocess all posts
const allPosts = this.getAllPosts();
console.log(`Refreshed ${allPosts.length} blog posts`);
return allPosts;
}
/**
* Get content processing status
* @returns {Object} Status information
*/
getStatus() {
return {
isInitialized: this.isInitialized,
totalFiles: this.getBlogPostFiles().length,
processedPosts: this.processedPosts.size,
contentDirectory: this.contentDirectory,
lastRefresh: new Date().toISOString(),
};
}
}
// Create and export singleton instance
const contentProcessor = new ContentProcessor();
// Export the instance and convenience functions
export { contentProcessor };
// Export convenience functions bound to the instance
export const getAllPosts = () => contentProcessor.getAllPosts();
export const getBlogPostBySlug = (slug) =>
contentProcessor.getBlogPostBySlug(slug);
export const getRecentPosts = (limit) => contentProcessor.getRecentPosts(limit);
export const getPostsByTag = (tag) => contentProcessor.getPostsByTag(tag);
export const getAllTags = () => contentProcessor.getAllTags();
export const searchPosts = (query, limit) =>
contentProcessor.searchPosts(query, limit);
export const getBlogStats = () => contentProcessor.getBlogStats();
export const refreshContent = () => contentProcessor.refreshContent();
export const getStatus = () => contentProcessor.getStatus();
export const initialize = () => contentProcessor.initialize();
+262
View File
@@ -0,0 +1,262 @@
/**
* MDX processing utilities for enhanced markdown content
*/
/**
* Process markdown content with enhanced features
* @param {string} markdown - Raw markdown content
* @returns {Object} Processed content with metadata
*/
export function processMarkdown(markdown) {
if (!markdown) {
return {
content: "",
htmlContent: "",
wordCount: 0,
readingTime: 0,
headings: [],
links: [],
images: [],
};
}
// Extract headings for table of contents
const headings = extractHeadings(markdown);
// Extract links
const links = extractLinks(markdown);
// Extract images
const images = extractImages(markdown);
// Calculate word count and reading time
const wordCount = calculateWordCount(markdown);
const readingTime = calculateReadingTime(wordCount);
return {
content: markdown,
htmlContent: markdownToHtml(markdown),
wordCount,
readingTime,
headings,
links,
images,
};
}
/**
* Extract all headings from markdown content
* @param {string} markdown - Raw markdown content
* @returns {Array} Array of heading objects with level, text, and id
*/
function extractHeadings(markdown) {
const headingRegex = /^(#{1,6})\s+(.+)$/gm;
const headings = [];
let match;
while ((match = headingRegex.exec(markdown)) !== null) {
const level = match[1].length;
const text = match[2].trim();
const id = generateHeadingId(text);
headings.push({
level,
text,
id,
line: markdown.substring(0, match.index).split("\n").length,
});
}
return headings;
}
/**
* Extract all links from markdown content
* @param {string} markdown - Raw markdown content
* @returns {Array} Array of link objects
*/
function extractLinks(markdown) {
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
const links = [];
let match;
while ((match = linkRegex.exec(markdown)) !== null) {
links.push({
text: match[1],
url: match[2],
index: match.index,
});
}
return links;
}
/**
* Extract all images from markdown content
* @param {string} markdown - Raw markdown content
* @returns {Array} Array of image objects
*/
function extractImages(markdown) {
const imageRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
const images = [];
let match;
while ((match = imageRegex.exec(markdown)) !== null) {
images.push({
alt: match[1],
src: match[2],
index: match.index,
});
}
return images;
}
/**
* Generate a unique ID for a heading
* @param {string} text - Heading text
* @returns {string} Unique ID
*/
function generateHeadingId(text) {
return text
.toLowerCase()
.replace(/[^\w\s-]/g, "")
.replace(/\s+/g, "-")
.replace(/-+/g, "-")
.trim();
}
/**
* Calculate word count from markdown content
* @param {string} markdown - Raw markdown content
* @returns {number} Word count
*/
function calculateWordCount(markdown) {
// Remove markdown syntax and count words
const cleanText = markdown
.replace(/[#*`~\[\]()]/g, "") // Remove markdown characters
.replace(/\n+/g, " ") // Replace newlines with spaces
.trim();
return cleanText.split(/\s+/).filter((word) => word.length > 0).length;
}
/**
* Calculate estimated reading time
* @param {number} wordCount - Number of words
* @returns {number} Reading time in minutes
*/
function calculateReadingTime(wordCount) {
const wordsPerMinute = 200; // Average reading speed
return Math.ceil(wordCount / wordsPerMinute);
}
/**
* Convert markdown to HTML with enhanced formatting
* @param {string} markdown - Raw markdown content
* @returns {string} HTML content
*/
function markdownToHtml(markdown) {
if (!markdown) return "";
return (
markdown
// Headers with IDs
.replace(/^### (.*$)/gim, (match, text) => {
const id = generateHeadingId(text);
return `<h3 id="${id}">${text}</h3>`;
})
.replace(/^## (.*$)/gim, (match, text) => {
const id = generateHeadingId(text);
return `<h2 id="${id}">${text}</h2>`;
})
.replace(/^# (.*$)/gim, (match, text) => {
const id = generateHeadingId(text);
return `<h1 id="${id}">${text}</h1>`;
})
// Bold and italic
.replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>")
.replace(/\*(.*?)\*/g, "<em>$1</em>")
// Code blocks
.replace(
/```(\w+)?\n([\s\S]*?)\n```/g,
'<pre><code class="language-$1">$2</code></pre>'
)
.replace(/`([^`]+)`/g, "<code>$1</code>")
// Links
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
// Lists
.replace(/^\* (.*$)/gim, "<li>$1</li>")
.replace(/^- (.*$)/gim, "<li>$1</li>")
.replace(/(<li>.*<\/li>)/gim, "<ul>$1</ul>")
// Blockquotes
.replace(/^> (.*$)/gim, "<blockquote><p>$1</p></blockquote>")
// Horizontal rules
.replace(/^---$/gm, "<hr>")
.replace(/^\*\*\*$/gm, "<hr>")
// Paragraphs
.replace(/\n\n/g, "</p><p>")
.replace(/^(?!<[h|u|li|blockquote|hr|pre])(.*$)/gim, "<p>$1</p>")
// Clean up empty paragraphs and fix list wrapping
.replace(/<p><\/p>/g, "")
.replace(/<p>(.*?)<\/p>/g, (match, content) => {
return content.trim() ? match : "";
})
.replace(/<\/ul>\s*<ul>/g, "") // Merge consecutive ul elements
.replace(/<ul>\s*<\/ul>/g, "")
); // Remove empty ul elements
}
/**
* Generate a table of contents from headings
* @param {Array} headings - Array of heading objects
* @returns {string} HTML table of contents
*/
export function generateTableOfContents(headings) {
if (!headings || headings.length === 0) return "";
let toc = '<nav class="table-of-contents"><h4>Table of Contents</h4><ul>';
headings.forEach((heading) => {
const indent = (heading.level - 1) * 20;
toc += `<li style="margin-left: ${indent}px"><a href="#${heading.id}">${heading.text}</a></li>`;
});
toc += "</ul></nav>";
return toc;
}
/**
* Process frontmatter with enhanced validation
* @param {Object} frontmatter - Raw frontmatter data
* @returns {Object} Processed and validated frontmatter
*/
export function processFrontmatter(frontmatter) {
// Add computed fields
const processed = {
...frontmatter,
publishedDate: new Date(frontmatter.date),
year: new Date(frontmatter.date).getFullYear(),
month: new Date(frontmatter.date).getMonth() + 1,
day: new Date(frontmatter.date).getDate(),
isRecent: isRecentPost(frontmatter.date),
readingTime: frontmatter.content
? calculateReadingTime(calculateWordCount(frontmatter.content))
: 0,
};
return processed;
}
/**
* Check if a post is recent (within last 30 days)
* @param {string} date - Post date string
* @returns {boolean} True if post is recent
*/
function isRecentPost(date) {
const postDate = new Date(date);
const thirtyDaysAgo = new Date();
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
return postDate > thirtyDaysAgo;
}
+104
View File
@@ -0,0 +1,104 @@
import { describe, it, expect, beforeAll } from "vitest";
import {
contentProcessor,
getAllPosts,
getBlogStats,
getAllTags,
} from "../../lib/contentProcessor.js";
describe("Content Processor", () => {
beforeAll(async () => {
await contentProcessor.initialize();
});
describe("Basic Functionality", () => {
it("should initialize successfully", () => {
expect(contentProcessor.isInitialized).toBe(true);
});
it("should process blog posts", () => {
const posts = getAllPosts();
expect(Array.isArray(posts)).toBe(true);
expect(posts.length).toBeGreaterThan(0);
});
it("should extract blog statistics", () => {
const stats = getBlogStats();
expect(stats.totalPosts).toBeGreaterThan(0);
expect(stats.totalTags).toBeGreaterThan(0);
expect(stats.totalWords).toBeGreaterThan(0);
});
it("should extract tags from posts", () => {
const tags = getAllTags();
expect(Array.isArray(tags)).toBe(true);
expect(tags.length).toBeGreaterThan(0);
});
});
describe("Post Processing", () => {
it("should process markdown content correctly", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(firstPost).toHaveProperty("frontmatter");
expect(firstPost).toHaveProperty("content");
expect(firstPost).toHaveProperty("htmlContent");
expect(firstPost).toHaveProperty("wordCount");
expect(firstPost).toHaveProperty("readingTime");
expect(firstPost).toHaveProperty("headings");
expect(firstPost).toHaveProperty("tableOfContents");
});
it("should generate proper slugs", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(firstPost.slug).toBeDefined();
expect(typeof firstPost.slug).toBe("string");
expect(firstPost.slug.length).toBeGreaterThan(0);
});
it("should calculate word count and reading time", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(firstPost.wordCount).toBeGreaterThan(0);
expect(firstPost.readingTime).toBeGreaterThan(0);
expect(typeof firstPost.wordCount).toBe("number");
expect(typeof firstPost.readingTime).toBe("number");
});
});
describe("Content Enhancement", () => {
it("should extract headings for table of contents", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(Array.isArray(firstPost.headings)).toBe(true);
if (firstPost.headings.length > 0) {
expect(firstPost.headings[0]).toHaveProperty("level");
expect(firstPost.headings[0]).toHaveProperty("text");
expect(firstPost.headings[0]).toHaveProperty("id");
}
});
it("should generate HTML content", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(firstPost.htmlContent).toBeDefined();
expect(typeof firstPost.htmlContent).toBe("string");
expect(firstPost.htmlContent.length).toBeGreaterThan(0);
expect(firstPost.htmlContent).toContain("<");
});
it("should generate table of contents", () => {
const posts = getAllPosts();
const firstPost = posts[0];
expect(firstPost.tableOfContents).toBeDefined();
expect(typeof firstPost.tableOfContents).toBe("string");
});
});
});