mirror of
https://github.com/kogakure/website-astro-stefanimhoff.de.git
synced 2026-02-03 12:05:28 +00:00
feat: replace RSS handling
This replaces the RSS hack with a custom helper that replaces MDX components manually.
This commit is contained in:
committed by
Stefan Imhoff
parent
f4e41cb807
commit
ceadc30005
510
src/utils/rss-parser.ts
Normal file
510
src/utils/rss-parser.ts
Normal file
@@ -0,0 +1,510 @@
|
||||
/**
|
||||
* Helper utilities for RSS parsing and content normalization.
|
||||
*
|
||||
* Extracted from src/pages/rss-test.xml.js to centralize helpers.
|
||||
*
|
||||
* Functions:
|
||||
* - stripMarkdown(text): Remove simple markdown formatting.
|
||||
* - makeAbsolute(url, siteUrl): Convert relative URLs to absolute using siteUrl.
|
||||
* - fixImagePaths(html, siteUrl): Replace <img src="..."> with absolute src.
|
||||
* - replaceImageComponent(attributes, siteUrl): Convert an MDX `<Image ... />` component into HTML.
|
||||
* - replaceAmazonBookComponent(attributes): Convert an MDX `<AmazonBook ... />` component into HTML.
|
||||
* - stripMDXComponents(text, siteUrl): Replace `<Image />`, `<AmazonBook />` and strip other MDX component tags.
|
||||
*
|
||||
* These are implemented in TypeScript with minimal dependencies so they can be
|
||||
* used from RSS builder code or other places.
|
||||
*/
|
||||
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
export function stripMarkdown(text: string): string {
|
||||
// Remove markdown links: [text](url) => text
|
||||
// Remove basic markdown formatting characters: *, _, `, ~
|
||||
return text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1').replace(/[*_`~]/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a possibly-relative url to an absolute URL using siteUrl as the base.
|
||||
* If the url is already absolute (http/https) it is returned unchanged.
|
||||
*/
|
||||
export function makeAbsolute(url: string, siteUrl: string): string {
|
||||
if (!url) return url;
|
||||
if (/^https?:\/\//i.test(url)) {
|
||||
return url;
|
||||
}
|
||||
|
||||
try {
|
||||
// The URL constructor will resolve relative URLs against the base.
|
||||
return new URL(url, siteUrl).toString();
|
||||
} catch (e) {
|
||||
// Fallback: simple concatenation with a single slash between.
|
||||
const base = siteUrl.endsWith('/') ? siteUrl.slice(0, -1) : siteUrl;
|
||||
const path = url.startsWith('/') ? url : `/${url}`;
|
||||
return `${base}${path}`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace img tags in HTML that have relative src attributes with absolute URLs.
|
||||
* This is useful after rendering markdown/html that contains <img src="...">.
|
||||
*/
|
||||
export function fixImagePaths(html: string, siteUrl: string): string {
|
||||
if (!html) return html;
|
||||
return html.replace(
|
||||
/<img([^>]*)\s+src=(?:"|')([^"']*)(?:"|')([^>]*)>/g,
|
||||
(_match, beforeSrc: string, src: string, afterSrc: string) => {
|
||||
const absoluteSrc = makeAbsolute(src, siteUrl);
|
||||
return `<img${beforeSrc} src="${absoluteSrc}"${afterSrc}>`;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract attribute value from a string like `src="value"` or `src='value'`.
|
||||
* Returns null if attribute is not found.
|
||||
*/
|
||||
function getAttr(str: string, name: string): string | null {
|
||||
const regex = new RegExp(`${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)')`, 'i');
|
||||
const match = str.match(regex);
|
||||
return match ? match[1] || match[2] : null;
|
||||
}
|
||||
|
||||
// Convert an MDX <Image ... />
|
||||
export function replaceImageComponent(attributes: string, siteUrl: string): string {
|
||||
// Extract possible props we care about
|
||||
const src = getAttr(attributes, 'src');
|
||||
const alt = getAttr(attributes, 'alt') || '';
|
||||
const caption = getAttr(attributes, 'caption');
|
||||
const source = getAttr(attributes, 'source');
|
||||
const sourceUrl = getAttr(attributes, 'sourceUrl');
|
||||
const href = getAttr(attributes, 'href');
|
||||
const width = getAttr(attributes, 'width');
|
||||
const height = getAttr(attributes, 'height');
|
||||
|
||||
if (!src) return '';
|
||||
|
||||
const absoluteSrc = makeAbsolute(src, siteUrl);
|
||||
|
||||
// Build <img> tag
|
||||
let imgHtml = `<img src="${absoluteSrc}" alt="${escapeHtmlAttr(alt)}"`;
|
||||
if (width) imgHtml += ` width="${escapeHtmlAttr(width)}"`;
|
||||
if (height) imgHtml += ` height="${escapeHtmlAttr(height)}"`;
|
||||
imgHtml += ` />`;
|
||||
|
||||
// Optionally wrap in <a>
|
||||
if (href) {
|
||||
const safeHref = escapeHtmlAttr(href);
|
||||
imgHtml = `<a href="${safeHref}">${imgHtml}</a>`;
|
||||
}
|
||||
|
||||
// Build figcaption if needed
|
||||
if (caption || source) {
|
||||
let captionContent = '';
|
||||
|
||||
if (caption) captionContent += escapeHtml(caption);
|
||||
|
||||
if (caption && source) captionContent += ' – ';
|
||||
|
||||
if (source) {
|
||||
if (sourceUrl) {
|
||||
captionContent += `<cite><a href="${escapeHtmlAttr(sourceUrl)}">${escapeHtml(source)}</a></cite>`;
|
||||
} else {
|
||||
captionContent += `<cite>${escapeHtml(source)}</cite>`;
|
||||
}
|
||||
}
|
||||
|
||||
return `<figure>${imgHtml}<figcaption>${captionContent}</figcaption></figure>`;
|
||||
}
|
||||
|
||||
return `<figure>${imgHtml}</figure>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <AmazonBook ... />
|
||||
export function replaceAmazonBookComponent(attributes: string): string {
|
||||
const asin = getAttr(attributes, 'asin');
|
||||
const alt = getAttr(attributes, 'alt') || '';
|
||||
|
||||
if (!asin) return '';
|
||||
|
||||
// Construct Amazon Image URL
|
||||
const amazonImageUrl = `https://images-na.ssl-images-amazon.com/images/P/${asin}.01.LZZZZZZZ.jpg`;
|
||||
const amazonProductUrl = `https://www.amazon.de/gp/product/${asin}`;
|
||||
|
||||
// We simplify the output to a standard anchor + img tag
|
||||
return `<a href="${amazonProductUrl}" aria-label="${escapeHtmlAttr(alt)}">
|
||||
<img src="${amazonImageUrl}" alt="${escapeHtmlAttr(alt)}" />
|
||||
</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <AppleTV ... />
|
||||
export function replaceAppleTvComponent(attributes: string): string {
|
||||
const id = getAttr(attributes, 'id');
|
||||
|
||||
if (!id) return '';
|
||||
|
||||
// The URL pattern from the component
|
||||
const url = `https://tv.apple.com/show/umc.cmc.${id}`;
|
||||
|
||||
// We preserve the inner HTML structure (spans and Apple logo)
|
||||
// but strip the complex Tailwind classes for the "pure HTML" output.
|
||||
return `<a href="${url}" title="Apple TV+">[]</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <NetflixFlag ... />
|
||||
export function replaceNetflixComponent(attributes: string): string {
|
||||
const id = getAttr(attributes, 'id');
|
||||
|
||||
if (!id) return '';
|
||||
|
||||
// The URL pattern from the component
|
||||
const url = `https://www.netflix.com/title/${id}`;
|
||||
|
||||
// We preserve the inner HTML structure (spans and Apple logo)
|
||||
// but strip the complex Tailwind classes for the "pure HTML" output.
|
||||
return `<a href="${url}" title="Netflix">[Netflix]</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <PrimeVideoFlag ... />
|
||||
export function replacePrimeVideoComponent(attributes: string): string {
|
||||
const id = getAttr(attributes, 'id');
|
||||
|
||||
if (!id) return '';
|
||||
|
||||
// The URL pattern from the component
|
||||
const url = `https://www.amazon.de/gp/video/detail/${id}`;
|
||||
|
||||
// We preserve the inner HTML structure (spans and Apple logo)
|
||||
// but strip the complex Tailwind classes for the "pure HTML" output.
|
||||
return `<a href="${url}" title="Prime Video">[Prime Video]</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <Flag ... />
|
||||
export function replaceFlagComponent(attributes: string, siteUrl: string): string {
|
||||
const label = getAttr(attributes, 'label');
|
||||
const href = getAttr(attributes, 'href');
|
||||
|
||||
if (!label) return '';
|
||||
|
||||
// Inner content with decorative brackets, mimicking the original component
|
||||
const innerHtml = `[${escapeHtml(label)}]`;
|
||||
|
||||
if (href) {
|
||||
const absoluteHref = makeAbsolute(href, siteUrl);
|
||||
return `<a href="${escapeHtmlAttr(absoluteHref)}" title="${escapeHtmlAttr(label)}">${innerHtml}</a>`;
|
||||
}
|
||||
|
||||
return `<span title="${escapeHtmlAttr(label)}">${innerHtml}</span>`;
|
||||
}
|
||||
|
||||
export function replaceBlockquoteComponent(
|
||||
attributes: string,
|
||||
content: string,
|
||||
siteUrl: string
|
||||
): string {
|
||||
const author = getAttr(attributes, 'author');
|
||||
const source = getAttr(attributes, 'source');
|
||||
const sourceUrl = getAttr(attributes, 'sourceUrl');
|
||||
const lang = getAttr(attributes, 'lang') || 'en';
|
||||
|
||||
let footerHtml = '';
|
||||
|
||||
// Build the footer if we have an author or source
|
||||
if (author || source) {
|
||||
footerHtml += '<footer>—';
|
||||
|
||||
if (author) {
|
||||
footerHtml += ` <b>${escapeHtml(author)}</b>`;
|
||||
}
|
||||
|
||||
if (author && source) {
|
||||
footerHtml += ',';
|
||||
}
|
||||
|
||||
if (source) {
|
||||
const safeSource = escapeHtml(source);
|
||||
// Add space before source
|
||||
footerHtml += ' ';
|
||||
|
||||
if (sourceUrl) {
|
||||
const absoluteUrl = makeAbsolute(sourceUrl, siteUrl);
|
||||
footerHtml += `<cite><a href="${escapeHtmlAttr(absoluteUrl)}">${safeSource}</a></cite>`;
|
||||
} else {
|
||||
footerHtml += `<cite>${safeSource}</cite>`;
|
||||
}
|
||||
}
|
||||
|
||||
footerHtml += '</footer>';
|
||||
}
|
||||
|
||||
return `<blockquote lang="${escapeHtmlAttr(lang)}">${content}${footerHtml}</blockquote>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <Pullquote ... />
|
||||
export function replacePullquoteComponent(attributes: string, siteUrl: string): string {
|
||||
const text = getAttr(attributes, 'text');
|
||||
if (!text) return '';
|
||||
|
||||
const author = getAttr(attributes, 'author');
|
||||
const source = getAttr(attributes, 'source');
|
||||
const sourceUrl = getAttr(attributes, 'sourceUrl');
|
||||
const lang = getAttr(attributes, 'lang') || 'en';
|
||||
const alignment = getAttr(attributes, 'alignment') || 'center';
|
||||
|
||||
// Map alignment to inline styles for RSS compatibility
|
||||
const style = alignment === 'left' ? 'text-align: left;' : 'text-align: center;';
|
||||
|
||||
let footerHtml = '';
|
||||
|
||||
if (author || source) {
|
||||
footerHtml += '<footer>';
|
||||
|
||||
if (author) {
|
||||
footerHtml += `<b>${escapeHtml(author)}</b>`;
|
||||
}
|
||||
|
||||
if (author && source) {
|
||||
footerHtml += ', ';
|
||||
}
|
||||
|
||||
if (source) {
|
||||
const safeSource = escapeHtml(source);
|
||||
if (sourceUrl) {
|
||||
const absoluteUrl = makeAbsolute(sourceUrl, siteUrl);
|
||||
footerHtml += `<cite><a href="${escapeHtmlAttr(absoluteUrl)}">${safeSource}</a></cite>`;
|
||||
} else {
|
||||
footerHtml += `<cite>${safeSource}</cite>`;
|
||||
}
|
||||
}
|
||||
|
||||
footerHtml += '</footer>';
|
||||
}
|
||||
|
||||
return `<blockquote lang="${escapeHtmlAttr(lang)}" style="${style}"><p>${text}</p>${footerHtml}</blockquote>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <ProductLink ... />
|
||||
export function replaceProductLinkComponent(attributes: string): string {
|
||||
const asin = getAttr(attributes, 'asin');
|
||||
const text = getAttr(attributes, 'text');
|
||||
|
||||
if (!asin || !text) return '';
|
||||
|
||||
const url = `https://www.amazon.de/gp/product/${asin}`;
|
||||
return `<a href="${url}">${escapeHtml(text)}</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <DownloadLink ... />
|
||||
export function replaceDownloadLinkComponent(attributes: string, siteUrl: string): string {
|
||||
const href = getAttr(attributes, 'href');
|
||||
const text = getAttr(attributes, 'text');
|
||||
|
||||
if (!href || !text) return '';
|
||||
|
||||
const absoluteHref = makeAbsolute(href, siteUrl);
|
||||
|
||||
return `<a href="${escapeHtmlAttr(absoluteHref)}">${escapeHtml(text)} ↓</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <MoreLink ... />
|
||||
export function replaceMoreLinkComponent(attributes: string, siteUrl: string): string {
|
||||
const href = getAttr(attributes, 'href');
|
||||
const text = getAttr(attributes, 'text');
|
||||
|
||||
if (!href || !text) return '';
|
||||
|
||||
const absoluteHref = makeAbsolute(href, siteUrl);
|
||||
|
||||
return `<a href="${escapeHtmlAttr(absoluteHref)}">${escapeHtml(text)} →</a>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <Ruby ... />
|
||||
export function replaceRubyComponent(attributes: string): string {
|
||||
const base = getAttr(attributes, 'base');
|
||||
const text = getAttr(attributes, 'text');
|
||||
|
||||
if (!base || !text) return '';
|
||||
|
||||
return `<ruby>${escapeHtml(base)}<rp>(</rp><rt>${escapeHtml(text)}</rt><rp>)</rp></ruby>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <Spotify ... />
|
||||
export function replaceSpotifyComponent(attributes: string): string {
|
||||
const id = getAttr(attributes, 'id');
|
||||
|
||||
if (!id) return '';
|
||||
|
||||
// Construct the Spotify embed URL
|
||||
const src = `https://open.spotify.com/embed/show/${id}?utm_source=generator&theme=0`;
|
||||
|
||||
return `<iframe src="${src}" width="100%" height="352" frameBorder="0" allowfullscreen="" allow="autoplay; clipboard-write; encrypted-media; fullscreen; picture-in-picture" loading="lazy"></iframe>`;
|
||||
}
|
||||
|
||||
// Convert an MDX <Figure ...>...</Figure>
|
||||
export function replaceFigureComponent(attributes: string, content: string): string {
|
||||
const caption = getAttr(attributes, 'caption');
|
||||
|
||||
let html = `<figure>${content}`;
|
||||
|
||||
if (caption) {
|
||||
html += `<figcaption>${escapeHtml(caption)}</figcaption>`;
|
||||
}
|
||||
|
||||
html += `</figure>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
// Convert an MDX <Banner ...>...</Banner>
|
||||
export function replaceBannerComponent(attributes: string, content: string): string {
|
||||
const summary = getAttr(attributes, 'summary');
|
||||
// Check for the presence of the 'open' attribute (boolean or explicitly set)
|
||||
const isOpen = /\bopen\b/i.test(attributes);
|
||||
|
||||
let html = '<aside>';
|
||||
|
||||
if (summary) {
|
||||
html += `<details${isOpen ? ' open' : ''}><summary><strong>${escapeHtml(summary)}</strong></summary>${content}</details>`;
|
||||
} else {
|
||||
html += content;
|
||||
}
|
||||
|
||||
html += '</aside>';
|
||||
return html;
|
||||
}
|
||||
|
||||
// Convert an MDX <ColorSwatch ... />
|
||||
export function replaceColorSwatchComponent(attributes: string): string {
|
||||
const color = getAttr(attributes, 'color');
|
||||
|
||||
if (!color) return '';
|
||||
|
||||
return `<div style="width: 100px; height: 100px; background-color: ${escapeHtmlAttr(color)};"></div>`;
|
||||
}
|
||||
|
||||
// Convert wrapper components (like <ColorStack> and <BookShelf>)
|
||||
export function replaceWrapperComponent(content: string): string {
|
||||
return `<div>${content}</div>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip MDX/JSX-like components from text but preserve/convert specific components.
|
||||
*/
|
||||
export function stripMDXComponents(text: string, siteUrl: string): string {
|
||||
if (!text) return text;
|
||||
|
||||
// <Image ... />
|
||||
let processed = text.replace(/<Image([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceImageComponent(attributes, siteUrl)
|
||||
);
|
||||
|
||||
// AmazonBook ... />
|
||||
processed = processed.replace(/<AmazonBook([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceAmazonBookComponent(attributes)
|
||||
);
|
||||
|
||||
// AppleTV ... />
|
||||
processed = processed.replace(
|
||||
/<Apple(Tv|TV)([\s\S]*?)\/>/g,
|
||||
(_match, _tagSuffix, attributes: string) => replaceAppleTvComponent(attributes)
|
||||
);
|
||||
|
||||
// <Netflix ... />
|
||||
processed = processed.replace(/<Netflix([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceNetflixComponent(attributes)
|
||||
);
|
||||
|
||||
// <PrimeVideo ... />
|
||||
processed = processed.replace(/<PrimeVideo([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replacePrimeVideoComponent(attributes)
|
||||
);
|
||||
|
||||
// <Flag ... />
|
||||
processed = processed.replace(/<Flag([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceFlagComponent(attributes, siteUrl)
|
||||
);
|
||||
|
||||
// <Blockquote ...>...</Blockquote>
|
||||
processed = processed.replace(
|
||||
/<Blockquote\b([^>]*)>([\s\S]*?)<\/Blockquote>/g,
|
||||
(_match, attributes: string, content: string) =>
|
||||
replaceBlockquoteComponent(attributes, content, siteUrl)
|
||||
);
|
||||
|
||||
// <Pullquote ... />
|
||||
processed = processed.replace(/<Pullquote([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replacePullquoteComponent(attributes, siteUrl)
|
||||
);
|
||||
|
||||
// <Figure ...>...</Figure>
|
||||
processed = processed.replace(
|
||||
/<Figure\b([^>]*)>([\s\S]*?)<\/Figure>/g,
|
||||
(_match, attributes: string, content: string) => replaceFigureComponent(attributes, content)
|
||||
);
|
||||
|
||||
// <Banner ...>...</Banner>
|
||||
processed = processed.replace(
|
||||
/<Banner\b([^>]*)>([\s\S]*?)<\/Banner>/g,
|
||||
(_match, attributes: string, content: string) => replaceBannerComponent(attributes, content)
|
||||
);
|
||||
|
||||
// <ProductLink ... />
|
||||
processed = processed.replace(/<ProductLink([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceProductLinkComponent(attributes)
|
||||
);
|
||||
|
||||
// <DownloadLink ... />
|
||||
processed = processed.replace(/<DownloadLink([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceDownloadLinkComponent(attributes, siteUrl)
|
||||
);
|
||||
|
||||
// <MoreLink ... />
|
||||
processed = processed.replace(/<MoreLink([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceMoreLinkComponent(attributes, siteUrl)
|
||||
);
|
||||
|
||||
// <Ruby ... />
|
||||
processed = processed.replace(/<Ruby([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceRubyComponent(attributes)
|
||||
);
|
||||
|
||||
// <Spotify ... />
|
||||
processed = processed.replace(/<Spotify([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceSpotifyComponent(attributes)
|
||||
);
|
||||
|
||||
// <ColorSwatch ... />
|
||||
processed = processed.replace(/<ColorSwatch([\s\S]*?)\/>/g, (_match, attributes: string) =>
|
||||
replaceColorSwatchComponent(attributes)
|
||||
);
|
||||
|
||||
// <ColorStack>...</ColorStack> and <BookShelf>...</BookShelf>
|
||||
processed = processed.replace(
|
||||
/<(ColorStack|Bookshelf)\b[^>]*>([\s\S]*?)<\/\1>/g,
|
||||
(_match, _tag, content: string) => replaceWrapperComponent(content)
|
||||
);
|
||||
|
||||
// Remove any other self-closing components e.g. <Foo bar="baz" />
|
||||
const removedSelfClosing = processed.replace(/<([A-Z][\w\d]*)\b[^>]*?\/>/g, '');
|
||||
|
||||
// Remove paired component tags, including their content, e.g. <Foo>...</Foo>
|
||||
const removedPaired = removedSelfClosing.replace(
|
||||
/<([A-Z][\w\d]*)\b[^>]*?>([\s\S]*?)<\/\1>/g,
|
||||
''
|
||||
);
|
||||
|
||||
return removedPaired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple helper to escape text for inclusion inside HTML text nodes.
|
||||
*/
|
||||
function escapeHtml(str: string | null | undefined): string {
|
||||
if (str == null) return '';
|
||||
return String(str).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape for attribute values (double-quoted).
|
||||
*/
|
||||
function escapeHtmlAttr(str: string | null | undefined): string {
|
||||
if (str == null) return '';
|
||||
return String(str).replace(/&/g, '&').replace(/"/g, '"').replace(/</g, '<');
|
||||
}
|
||||
Reference in New Issue
Block a user