Skip to content

Commit

Permalink
feat: video discovery (#301)
Browse files Browse the repository at this point in the history
  • Loading branch information
rayblair06 authored Jun 29, 2024
1 parent 4e76302 commit 630cae1
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 6 deletions.
1 change: 1 addition & 0 deletions .playground/nuxt.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ export default defineNuxtConfig({
routes: [
// '/sitemap_index.xml',
'/prerender',
'/prerender-video',
'/should-be-in-sitemap',
'/foo.bar/',
'/test.doc',
Expand Down
44 changes: 44 additions & 0 deletions .playground/pages/prerender-video.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<script setup lang="ts"></script>

<template>
<div>
Pre-render Video Discovery Page

<!-- Control Video with src, should auto-discover -->
<video
controls
src="https://archive.org/download/BigBuckBunny_124/Content/big_buck_bunny_720p_surround.mp4"
poster="https://archive.org/download/BigBuckBunny_124/__ia_thumb.jpg"
width="620"
data-title="Big Buck Bunny"
data-description="Big Buck Bunny in DivX 720p."
>
Sorry, your browser doesn't support embedded videos, but don't worry, you
can
<a href="https://archive.org/details/BigBuckBunny_124">download it</a>
and watch it with your favorite video player!
</video>

<!-- Control Video with source, should auto-discover -->
<video
controls
poster="https://archive.org/download/DuckAndCover_185/__ia_thumb.jpg"
width="620"
data-title="Duck and Cover"
data-description="This film, a combination of animated cartoon and live action, shows young children what to do in case of an atomic attack."
>
<source
src="https://archive.org/download/DuckAndCover_185/CivilDefenseFilm-DuckAndCoverColdWarNuclearPropaganda_512kb.mp4"
type="video/mp4"
/>
<source
src="https://archive.org/download/DuckAndCover_185/CivilDefenseFilm-DuckAndCoverColdWarNuclearPropaganda.avi"
type="video/x-msvideo"
/>
Sorry, your browser doesn't support embedded videos, but don't worry, you
can
<a href="https://archive.org/details/DuckAndCover_185">download it</a>
and watch it with your favorite video player!
</video>
</div>
</template>
2 changes: 2 additions & 0 deletions src/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export default defineNuxtModule<ModuleOptions>({
defaultSitemapsChunkSize: 1000,
autoLastmod: false,
discoverImages: true,
discoverVideos: true,
dynamicUrlsApiEndpoint: '/api/_sitemap-urls',
urls: [],
sortEntries: true,
Expand Down Expand Up @@ -458,6 +459,7 @@ declare module 'vue-router' {
debug: config.debug,
// needed for nuxt/content integration and prerendering
discoverImages: config.discoverImages,
discoverVideos: config.discoverVideos,

/* @nuxt/content */
isNuxtContentDocumentDriven,
Expand Down
1 change: 1 addition & 0 deletions src/prerender.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export function setupPrerenderHandler(options: ModuleRuntimeConfig, nuxt: Nuxt =
}
route._sitemap = defu(extractSitemapMetaFromHtml(html, {
images: options.discoverImages,
videos: options.discoverVideos,
// TODO configurable?
lastmod: true,
alternatives: true,
Expand Down
14 changes: 13 additions & 1 deletion src/runtime/nitro/plugins/nuxt-content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { useSimpleSitemapRuntimeConfig } from '../utils'
import { defineNitroPlugin } from '#imports'

export default defineNitroPlugin((nitroApp: NitroApp) => {
const { discoverImages, isNuxtContentDocumentDriven } = useSimpleSitemapRuntimeConfig()
const { discoverImages, discoverVideos, isNuxtContentDocumentDriven } = useSimpleSitemapRuntimeConfig()
// @ts-expect-error untyped
nitroApp.hooks.hook('content:file:afterParse', async (content: ParsedContent) => {
const validExtensions = ['md', 'mdx']
Expand All @@ -23,6 +23,16 @@ export default defineNitroPlugin((nitroApp: NitroApp) => {
.map(i => ({ loc: i.props!.src })) || [])
}

// add any top level videos
let videos: SitemapUrl['videos'] = []
if (discoverVideos) {
videos = (content.body?.children
?.filter(c =>
c.tag && c.props?.src && ['video'].includes(c.tag.toLowerCase()),
)
.map(i => ({ content_loc: i.props!.src })) || [])
}

const sitemapConfig = typeof content.sitemap === 'object' ? content.sitemap : {}
const lastmod = content.modifiedAt || content.updatedAt
const defaults: Partial<SitemapUrl> = {}
Expand All @@ -32,6 +42,8 @@ export default defineNitroPlugin((nitroApp: NitroApp) => {
defaults.loc = content.path
if (images.length > 0)
defaults.images = images
if (videos.length > 0)
defaults.videos = videos
if (lastmod)
defaults.lastmod = lastmod
const definition = defu(sitemapConfig, defaults) as Partial<SitemapUrl>
Expand Down
8 changes: 7 additions & 1 deletion src/runtime/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ export interface ModuleOptions extends SitemapDefinition {
* @default true
*/
discoverImages: boolean
/**
* When prerendering, should videos be automatically be discovered and added to the sitemap.
*
* @default true
*/
discoverVideos: boolean
/**
* When chunking the sitemaps into multiple files, how many entries should each file contain.
*
Expand Down Expand Up @@ -193,7 +199,7 @@ export interface AutoI18nConfig {
strategy: 'prefix' | 'prefix_except_default' | 'prefix_and_default' | 'no_prefix'
}

export interface ModuleRuntimeConfig extends Pick<ModuleOptions, 'cacheMaxAgeSeconds' | 'sitemapName' | 'excludeAppSources' | 'sortEntries' | 'defaultSitemapsChunkSize' | 'xslColumns' | 'xslTips' | 'debug' | 'discoverImages' | 'autoLastmod' | 'xsl' | 'credits' > {
export interface ModuleRuntimeConfig extends Pick<ModuleOptions, 'cacheMaxAgeSeconds' | 'sitemapName' | 'excludeAppSources' | 'sortEntries' | 'defaultSitemapsChunkSize' | 'xslColumns' | 'xslTips' | 'debug' | 'discoverImages' | 'discoverVideos' | 'autoLastmod' | 'xsl' | 'credits' > {
version: string
isNuxtContentDocumentDriven: boolean
sitemaps: { index?: Pick<SitemapDefinition, 'sitemapName' | '_route'> & { sitemaps: SitemapIndexEntry[] } } & Record<string, Omit<SitemapDefinition, 'urls'> & { _hasSourceChunk?: boolean }>
Expand Down
76 changes: 73 additions & 3 deletions src/util/extractSitemapMetaFromHtml.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { withSiteUrl } from 'nuxt-site-config-kit/urls'
import { parseURL } from 'ufo'
import type { ResolvedSitemapUrl, SitemapUrl } from '../runtime/types'
import type { ResolvedSitemapUrl, SitemapUrl, VideoEntry } from '../runtime/types'

export function extractSitemapMetaFromHtml(html: string, options?: { images?: boolean, lastmod?: boolean, alternatives?: boolean }) {
options = options || { images: true, lastmod: true, alternatives: true }
export function extractSitemapMetaFromHtml(html: string, options?: { images?: boolean, videos?: boolean, lastmod?: boolean, alternatives?: boolean }) {
options = options || { images: true, videos: true, lastmod: true, alternatives: true }
const payload: Partial<SitemapUrl> = {}
if (options?.images) {
const images = new Set<string>()
Expand All @@ -30,6 +30,76 @@ export function extractSitemapMetaFromHtml(html: string, options?: { images?: bo
payload.images = [...images].map(i => ({ loc: i }))
}

if (options?.videos) {
const videos = []
const mainRegex = /<main[^>]*>([\s\S]*?)<\/main>/
const mainMatch = mainRegex.exec(html)

if (mainMatch?.[1] && mainMatch[1].includes('<video')) {
// Extract video src & child source attributes using regex on the HTML
const videoRegex = /<video[^>]*>([\s\S]*?)<\/video>/g
const videoAttrRegex = /<video[^>]*\s+src="([^"]+)"(?:[^>]*\s+poster="([^"]+)")?/
const videoPosterRegex = /<video[^>]*\s+poster="([^"]+)"/
const videoTitleRegex = /<video[^>]*\s+data-title="([^"]+)"/
const videoDescriptionRegex = /<video[^>]*\s+data-description="([^"]+)"/
const sourceRegex = /<source[^>]*\s+src="([^"]+)"/g

let videoMatch;
while ((videoMatch = videoRegex.exec(mainMatch[1])) !== null) {
const videoContent = videoMatch[1]
const videoTag = videoMatch[0]

// Extract src and poster attributes from the <video> tag
const videoAttrMatch = videoAttrRegex.exec(videoTag);
const videoSrc = videoAttrMatch ? videoAttrMatch[1] : ''
const poster = (videoPosterRegex.exec(videoTag) || [])[1] || ''
const title = (videoTitleRegex.exec(videoTag) || [])[1] || ''
const description = (videoDescriptionRegex.exec(videoTag) || [])[1] || ''

// Extract src attributes from child <source> elements
const sources = [];
let sourceMatch;
while ((sourceMatch = sourceRegex.exec(videoContent)) !== null) {
sources.push({
src: sourceMatch[1],
poster: poster,
title: title,
description: description,
})
}

// Add video with src attribute
if (videoSrc) {
videos.push({
src: videoSrc,
poster: poster,
title: title,
description: description,
sources: [],
})
}

// Add sources with their respective posters
if (sources.length > 0) {
videos.push(...sources)
}
}
}

// Map videos to payload
if (videos.length > 0) {
payload.videos = videos.map(video =>
({
content_loc: video.src,
thumbnail_loc: video.poster,
title: video.title,
description: video.description
}) as VideoEntry
);
}
}


if (options?.lastmod) {
// let's extract the lastmod from the html using the following tags:
const articleModifiedTime = html.match(/<meta[^>]+property="article:modified_time"[^>]+content="([^"]+)"/)?.[1]
Expand Down
Loading

0 comments on commit 630cae1

Please sign in to comment.