Skip to content

Commit

Permalink
Merge pull request #647 from City-of-Helsinki/UHF-10406
Browse files Browse the repository at this point in the history
UHF-10406: Improve helbit video handling
  • Loading branch information
hyrsky authored Jan 13, 2025
2 parents d76d987 + f0e2358 commit 70f0944
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,18 @@ function helfi_rekry_content_update_9010(): void {
$entityUsageConfig->save();
}
}

/**
* UHF-10406: Clear video migration map.
*/
function helfi_rekry_content_update_9011(): void {
$database = \Drupal::database();
$migrations = ['helfi_rekry_videos'];

foreach ($migrations as $migration) {
$table_name = 'migrate_map_' . $migration;
$database
->schema()
->dropTable($table_name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ use Drupal\helfi_platform_config\DTO\ParagraphTypeCollection;
use Drupal\helfi_rekry_content\Entity\JobListing;
use Drupal\media\OEmbed\ProviderException;
use Drupal\media\OEmbed\ResourceException;
use Drupal\media\OEmbed\ResourceFetcherInterface;
use Drupal\media\OEmbed\UrlResolverInterface;
use Drupal\migrate\MigrateSkipRowException;
use Drupal\node\NodeInterface;
use Drupal\paragraphs\ParagraphInterface;
Expand Down Expand Up @@ -148,37 +150,37 @@ function _helfi_rekry_content_get_media_image(string|NULL $fid = NULL): ?string
/**
* Validate and return video url, used in migration.
*
* @param string|null $url
* @param string $url
* The video url.
*
* @return string|null
* Valid video url or null
* @return string
* Valid video url
*
* @throws \Drupal\migrate\MigrateSkipRowException
*/
function _helfi_rekry_content_get_video_url(string|NULL $url = NULL): ?string {
function _helfi_rekry_content_get_video_url(string $url): string {
try {
/** @var \Drupal\media\OEmbed\UrlResolverInterface $resolver */
$resolver = \Drupal::service('media.oembed.url_resolver');
$resolver = \Drupal::service(UrlResolverInterface::class);

$provider = $resolver->getProviderByUrl($url);

if (!in_array($provider->getName(), ['YouTube', 'Icareus Suite'])) {
throw new MigrateSkipRowException();
throw new MigrateSkipRowException(save_to_map: FALSE);
}
}
catch (ResourceException | ProviderException $e) {
\Drupal::logger('helfi_rekry_content')
->notice('Video embed url "' . $url . '" failed validation with message: ' . $e->getMessage());

throw new MigrateSkipRowException();
throw new MigrateSkipRowException(save_to_map: FALSE);
}

// Ticket #UHF-9069 prevent migrating bad oembed links.
try {
// Use the same validation used in field validation.
$resource_url = $resolver->getResourceUrl($url);
\Drupal::service('media.oembed.resource_fetcher')
\Drupal::service(ResourceFetcherInterface::class)
->fetchResource($resource_url);
return $url;
}
Expand All @@ -187,7 +189,7 @@ function _helfi_rekry_content_get_video_url(string|NULL $url = NULL): ?string {
\Drupal::logger('helfi_rekry_content')
->error('Bad video url rejected by oembed-validation: ' . $url);

throw new MigrateSkipRowException();
throw new MigrateSkipRowException(save_to_map: FALSE);
}
}

Expand All @@ -205,43 +207,27 @@ function _helfi_rekry_content_sanitize_video_url(string $url): string {
return $url;
}

if (!str_contains($url, "://")) {
$url = "https://$url";
}

// OEmbed does not accept YouTube embed links.
if (preg_match("/youtube\.com\/embed\/([\w\-_]+)$/", $url, $matches)) {
$url = sprintf("https://youtube.com/watch?v=%s", $matches[1]);
// Some valid YouTube links are not recognized by drupal/oembed_providers
// module, which triggers additional network requests that attempt to sniff
// oembed links directly from YouTube. However, YouTube does not like
// automated traffic from datacenters, so these requests often fail in
// production.
//
// This regex tries to pick video id from following patters and
// formats the links to the expected format.
//
// Features:
// - https:// or www. missing.
// - youtube.com/v/[id].
// - youtu.be/[id] short links.
// - youtube.com/embed/[id].
if (preg_match("/youtu(?:.*\/v\/|.*v=|\.be\/|.*\/embed\/)([A-Za-z0-9_\-]{11})/", $url, $matches)) {
$url = sprintf("https://www.youtube.com/watch?v=%s", $matches[1]);
}

return $url;
}

/**
* Get video mid by video url.
*
* @param string $url
* The video url.
*
* @return string|null
* The mid or null
*/
function _helfi_rekry_content_lookup_video_mid(string $url): ?string {
$ids = \Drupal::entityQuery('media')
->condition('bundle', 'remote_video')
->condition('field_media_oembed_video', $url)
->range(0, 1)
->latestRevision()
->accessCheck(FALSE)
->execute();

if (!empty($ids)) {
return reset($ids);
}

return NULL;
}

/**
* Get node id by recruitment id.
*
Expand All @@ -268,7 +254,7 @@ function _helfi_rekry_content_lookup_job_nid(string $id): ?string {
}

/**
* Add http protocol to urls, since api response might not have themm.
* Add http protocol to urls, since api response might not have them.
*
* @param string|null $url
* The url.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,18 @@ migration_tags:
migration_group: helfi_rekry_content
label: 'HELfi Rekry - Job listing videos'
source:
ids:
id:
type: string
video:
type: string
plugin: helbit_open_jobs
track_changes: true
fields:
-
name: id
label: Id
selector: jobAdvertisement/id
-
name: video
label: Video
selector: jobAdvertisement/embedLink
-
name: title
label: Title
selector: jobAdvertisement/title
ids:
video:
type: string
langcode:
type: string
process:
name: title
field_media_oembed_video:
-
plugin: skip_on_empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,18 +198,15 @@ process:
field_original_language:
plugin: default_value
default_value: null
field_video/target_id:
-
plugin: skip_on_empty
field_video:
- plugin: skip_on_empty
method: process
source: video
-
plugin: callback
callable: _helfi_rekry_content_sanitize_video_url
source: video
-
plugin: callback
callable: _helfi_rekry_content_lookup_video_mid
- plugin: migration_lookup
migration: helfi_rekry_videos
source:
- video
- langcode
field_organization_name: organization_name
field_postal_area: postal_area
field_postal_code: postal_code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ public function getCityDescriptions() : array {
/**
* Get organization taxonomy term.
*
* @return \Drupal\taxonomy\TermInterface|bool
* @return \Drupal\taxonomy\TermInterface|false
* Returns the organization taxonomy term or false if not set.
*
* @throws \Drupal\Core\TypedData\Exception\MissingDataException
*/
public function getOrganization() : TermInterface|bool {
public function getOrganization() : TermInterface|FALSE {
$organization_id = '';

// Get the organization id from the migrated field.
Expand All @@ -153,7 +153,8 @@ public function getOrganization() : TermInterface|bool {
$organization = $this->entityTypeManager()
->getStorage('taxonomy_term')
->load($organization_id);
return $organization;

return $organization ?? FALSE;
}
catch (\Exception $e) {
return FALSE;
Expand Down Expand Up @@ -257,7 +258,10 @@ public function getOrganizationDescription() : FilteredMarkup|string {
}
// If not and the organization description is empty,
// check if the organization taxonomy description is set and use it.
elseif ($organization_description->isEmpty() && !$organization->get('description')->isEmpty()) {
elseif (
$organization_description->isEmpty() &&
$organization && !$organization->get('description')->isEmpty()
) {
$organization_description = $organization->get('description');
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
<?php

declare(strict_types=1);

namespace Drupal\Tests\helfi_rekry_content\Kernel;

use Drupal\KernelTests\KernelTestBase;
use Drupal\media\OEmbed\Provider;
use Drupal\media\OEmbed\ProviderException;
use Drupal\media\OEmbed\UrlResolverInterface;
use Drupal\migrate\MigrateSkipRowException;
use Prophecy\Argument;
use Prophecy\PhpUnit\ProphecyTrait;

/**
* Job migration tests.
*/
class JobMigrationTest extends KernelTestBase {

use ProphecyTrait;

/**
* {@inheritdoc}
*/
protected static $modules = [
'content_lock',
'helfi_rekry_content',
];

/**
* Test video URL sanitization.
*
* @dataProvider videoUrlData
*/
public function testVideoUrlSanitization(mixed $expected, array $videoUrls): void {
foreach ($videoUrls as $videoUrl) {
$this->assertEquals($expected, \_helfi_rekry_content_sanitize_video_url($videoUrl));
}
}

/**
* Test video URL validation.
*/
public function testVideoValidationExceptions(): void {
$urlResolver = $this->prophesize(UrlResolverInterface::class);
$urlResolver->getProviderByUrl(Argument::any())
->willThrow(ProviderException::class);

$this->container->set(UrlResolverInterface::class, $urlResolver->reveal());

$this->expectException(MigrateSkipRowException::class);
_helfi_rekry_content_get_video_url('some-url');
}

/**
* Test video URL validation with unknown provider.
*/
public function testVideoValidationProvider(): void {
$provider = new Provider('Some provider', 'https://example.com', [
['url' => 'https://example.com/oembed'],
]);

$urlResolver = $this->prophesize(UrlResolverInterface::class);
$urlResolver->getProviderByUrl(Argument::any())
->willReturn($provider);

$this->container->set(UrlResolverInterface::class, $urlResolver->reveal());

$this->expectException(MigrateSkipRowException::class);
_helfi_rekry_content_get_video_url('some-url');
}

/**
* Data provider for testVideoUrlSanitization().
*/
public static function videoUrlData(): array {
return [
['', [' ']],
[
'https://www.youtube.com/watch?v=g2eYKMjE8ew',
[
'youtube.com/watch?v=g2eYKMjE8ew',
'youtu.be/g2eYKMjE8ew',
'youtu.be/?v=g2eYKMjE8ew',
'https://youtube.com/embed/g2eYKMjE8ew',
'https://youtube.com/watch?v=g2eYKMjE8ew',
'https://www.youtube.com/watch?v=g2eYKMjE8ew',
'https://www.youtube.com/watch?foo=bar&v=g2eYKMjE8ew&bar=foo',
],
],
];
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
</div>
{% endif %}

{% if content.field_video|render %}
{% if content.field_video|render|spaceless %}
<div class="job-listing__video job-listing__item">
{% include '@hdbt/component/remote-video.twig' with {
video: content.field_video,
Expand Down

0 comments on commit 70f0944

Please sign in to comment.