From 630cae1a144bb62b68bb847656ec9aa17780750e Mon Sep 17 00:00:00 2001 From: Ray Blair Date: Sat, 29 Jun 2024 01:40:08 +0100 Subject: [PATCH] feat: video discovery (#301) --- .playground/nuxt.config.ts | 1 + .playground/pages/prerender-video.vue | 44 +++++ src/module.ts | 2 + src/prerender.ts | 1 + src/runtime/nitro/plugins/nuxt-content.ts | 14 +- src/runtime/types.ts | 8 +- src/util/extractSitemapMetaFromHtml.ts | 76 +++++++- test/unit/extractSitemapMetaFromHtml.test.ts | 187 ++++++++++++++++++- 8 files changed, 327 insertions(+), 6 deletions(-) create mode 100644 .playground/pages/prerender-video.vue diff --git a/.playground/nuxt.config.ts b/.playground/nuxt.config.ts index 2765044a..1ad8b228 100644 --- a/.playground/nuxt.config.ts +++ b/.playground/nuxt.config.ts @@ -61,6 +61,7 @@ export default defineNuxtConfig({ routes: [ // '/sitemap_index.xml', '/prerender', + '/prerender-video', '/should-be-in-sitemap', '/foo.bar/', '/test.doc', diff --git a/.playground/pages/prerender-video.vue b/.playground/pages/prerender-video.vue new file mode 100644 index 00000000..2fe3577b --- /dev/null +++ b/.playground/pages/prerender-video.vue @@ -0,0 +1,44 @@ + + + diff --git a/src/module.ts b/src/module.ts index 7fe4705f..d26fd771 100644 --- a/src/module.ts +++ b/src/module.ts @@ -59,6 +59,7 @@ export default defineNuxtModule({ defaultSitemapsChunkSize: 1000, autoLastmod: false, discoverImages: true, + discoverVideos: true, dynamicUrlsApiEndpoint: '/api/_sitemap-urls', urls: [], sortEntries: true, @@ -458,6 +459,7 @@ declare module 'vue-router' { debug: config.debug, // needed for nuxt/content integration and prerendering discoverImages: config.discoverImages, + discoverVideos: config.discoverVideos, /* @nuxt/content */ isNuxtContentDocumentDriven, diff --git a/src/prerender.ts b/src/prerender.ts index 3db89999..556f0eab 100644 --- a/src/prerender.ts +++ b/src/prerender.ts @@ -77,6 +77,7 @@ export function setupPrerenderHandler(options: ModuleRuntimeConfig, nuxt: Nuxt = } route._sitemap = defu(extractSitemapMetaFromHtml(html, { images: options.discoverImages, + videos: options.discoverVideos, // TODO configurable? lastmod: true, alternatives: true, diff --git a/src/runtime/nitro/plugins/nuxt-content.ts b/src/runtime/nitro/plugins/nuxt-content.ts index d4f25fec..78d72f0d 100644 --- a/src/runtime/nitro/plugins/nuxt-content.ts +++ b/src/runtime/nitro/plugins/nuxt-content.ts @@ -6,7 +6,7 @@ import { useSimpleSitemapRuntimeConfig } from '../utils' import { defineNitroPlugin } from '#imports' export default defineNitroPlugin((nitroApp: NitroApp) => { - const { discoverImages, isNuxtContentDocumentDriven } = useSimpleSitemapRuntimeConfig() + const { discoverImages, discoverVideos, isNuxtContentDocumentDriven } = useSimpleSitemapRuntimeConfig() // @ts-expect-error untyped nitroApp.hooks.hook('content:file:afterParse', async (content: ParsedContent) => { const validExtensions = ['md', 'mdx'] @@ -23,6 +23,16 @@ export default defineNitroPlugin((nitroApp: NitroApp) => { .map(i => ({ loc: i.props!.src })) || []) } + // add any top level videos + let videos: SitemapUrl['videos'] = [] + if (discoverVideos) { + videos = (content.body?.children + ?.filter(c => + c.tag && c.props?.src && ['video'].includes(c.tag.toLowerCase()), + ) + .map(i => ({ content_loc: i.props!.src })) || []) + } + const sitemapConfig = typeof content.sitemap === 'object' ? content.sitemap : {} const lastmod = content.modifiedAt || content.updatedAt const defaults: Partial = {} @@ -32,6 +42,8 @@ export default defineNitroPlugin((nitroApp: NitroApp) => { defaults.loc = content.path if (images.length > 0) defaults.images = images + if (videos.length > 0) + defaults.videos = videos if (lastmod) defaults.lastmod = lastmod const definition = defu(sitemapConfig, defaults) as Partial diff --git a/src/runtime/types.ts b/src/runtime/types.ts index be055be9..4bba7de1 100644 --- a/src/runtime/types.ts +++ b/src/runtime/types.ts @@ -75,6 +75,12 @@ export interface ModuleOptions extends SitemapDefinition { * @default true */ discoverImages: boolean + /** + * When prerendering, should videos be automatically be discovered and added to the sitemap. + * + * @default true + */ + discoverVideos: boolean /** * When chunking the sitemaps into multiple files, how many entries should each file contain. * @@ -193,7 +199,7 @@ export interface AutoI18nConfig { strategy: 'prefix' | 'prefix_except_default' | 'prefix_and_default' | 'no_prefix' } -export interface ModuleRuntimeConfig extends Pick { +export interface ModuleRuntimeConfig extends Pick { version: string isNuxtContentDocumentDriven: boolean sitemaps: { index?: Pick & { sitemaps: SitemapIndexEntry[] } } & Record & { _hasSourceChunk?: boolean }> diff --git a/src/util/extractSitemapMetaFromHtml.ts b/src/util/extractSitemapMetaFromHtml.ts index bab9aa7b..f62659b2 100644 --- a/src/util/extractSitemapMetaFromHtml.ts +++ b/src/util/extractSitemapMetaFromHtml.ts @@ -1,9 +1,9 @@ import { withSiteUrl } from 'nuxt-site-config-kit/urls' import { parseURL } from 'ufo' -import type { ResolvedSitemapUrl, SitemapUrl } from '../runtime/types' +import type { ResolvedSitemapUrl, SitemapUrl, VideoEntry } from '../runtime/types' -export function extractSitemapMetaFromHtml(html: string, options?: { images?: boolean, lastmod?: boolean, alternatives?: boolean }) { - options = options || { images: true, lastmod: true, alternatives: true } +export function extractSitemapMetaFromHtml(html: string, options?: { images?: boolean, videos?: boolean, lastmod?: boolean, alternatives?: boolean }) { + options = options || { images: true, videos: true, lastmod: true, alternatives: true } const payload: Partial = {} if (options?.images) { const images = new Set() @@ -30,6 +30,76 @@ export function extractSitemapMetaFromHtml(html: string, options?: { images?: bo payload.images = [...images].map(i => ({ loc: i })) } + if (options?.videos) { + const videos = [] + const mainRegex = /]*>([\s\S]*?)<\/main>/ + const mainMatch = mainRegex.exec(html) + + if (mainMatch?.[1] && mainMatch[1].includes(']*>([\s\S]*?)<\/video>/g + const videoAttrRegex = /]*\s+src="([^"]+)"(?:[^>]*\s+poster="([^"]+)")?/ + const videoPosterRegex = /]*\s+poster="([^"]+)"/ + const videoTitleRegex = /]*\s+data-title="([^"]+)"/ + const videoDescriptionRegex = /]*\s+data-description="([^"]+)"/ + const sourceRegex = /]*\s+src="([^"]+)"/g + + let videoMatch; + while ((videoMatch = videoRegex.exec(mainMatch[1])) !== null) { + const videoContent = videoMatch[1] + const videoTag = videoMatch[0] + + // Extract src and poster attributes from the