From 7dcadc05dfb5fcb94bcf827067d2570258716507 Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Fri, 10 Jan 2025 08:49:01 +0200 Subject: [PATCH 1/3] UHF-10406: Improve video importing This ensures that removing the video from Helbit also removes the video from rekry. --- .../helfi_rekry_content.install | 15 +++ .../helfi_rekry_content.module | 70 ++++++-------- .../migrations/job_listing_videos.yml | 20 +--- .../migrations/job_listings.yml | 17 ++-- .../tests/src/Kernel/JobMigrationTest.php | 94 +++++++++++++++++++ 5 files changed, 149 insertions(+), 67 deletions(-) create mode 100644 public/modules/custom/helfi_rekry_content/tests/src/Kernel/JobMigrationTest.php diff --git a/public/modules/custom/helfi_rekry_content/helfi_rekry_content.install b/public/modules/custom/helfi_rekry_content/helfi_rekry_content.install index 471f81c5..027c192a 100644 --- a/public/modules/custom/helfi_rekry_content/helfi_rekry_content.install +++ b/public/modules/custom/helfi_rekry_content/helfi_rekry_content.install @@ -431,3 +431,18 @@ function helfi_rekry_content_update_9010(): void { $entityUsageConfig->save(); } } + +/** + * UHF-10406: Clear video migration map. + */ +function helfi_rekry_content_update_9011(): void { + $database = \Drupal::database(); + $migrations = ['helfi_rekry_videos']; + + foreach ($migrations as $migration) { + $table_name = 'migrate_map_' . $migration; + $database + ->schema() + ->dropTable($table_name); + } +} diff --git a/public/modules/custom/helfi_rekry_content/helfi_rekry_content.module b/public/modules/custom/helfi_rekry_content/helfi_rekry_content.module index 11c6efd7..913e72ad 100644 --- a/public/modules/custom/helfi_rekry_content/helfi_rekry_content.module +++ b/public/modules/custom/helfi_rekry_content/helfi_rekry_content.module @@ -15,6 +15,8 @@ use Drupal\helfi_platform_config\DTO\ParagraphTypeCollection; use Drupal\helfi_rekry_content\Entity\JobListing; use Drupal\media\OEmbed\ProviderException; use Drupal\media\OEmbed\ResourceException; +use Drupal\media\OEmbed\ResourceFetcherInterface; +use Drupal\media\OEmbed\UrlResolverInterface; use Drupal\migrate\MigrateSkipRowException; use Drupal\node\NodeInterface; use Drupal\paragraphs\ParagraphInterface; @@ -148,37 +150,37 @@ function _helfi_rekry_content_get_media_image(string|NULL $fid = NULL): ?string /** * Validate and return video url, used in migration. * - * @param string|null $url + * @param string $url * The video url. * - * @return string|null - * Valid video url or null + * @return string + * Valid video url * * @throws \Drupal\migrate\MigrateSkipRowException */ -function _helfi_rekry_content_get_video_url(string|NULL $url = NULL): ?string { +function _helfi_rekry_content_get_video_url(string $url): string { try { /** @var \Drupal\media\OEmbed\UrlResolverInterface $resolver */ - $resolver = \Drupal::service('media.oembed.url_resolver'); + $resolver = \Drupal::service(UrlResolverInterface::class); $provider = $resolver->getProviderByUrl($url); if (!in_array($provider->getName(), ['YouTube', 'Icareus Suite'])) { - throw new MigrateSkipRowException(); + throw new MigrateSkipRowException(save_to_map: FALSE); } } catch (ResourceException | ProviderException $e) { \Drupal::logger('helfi_rekry_content') ->notice('Video embed url "' . $url . '" failed validation with message: ' . $e->getMessage()); - throw new MigrateSkipRowException(); + throw new MigrateSkipRowException(save_to_map: FALSE); } // Ticket #UHF-9069 prevent migrating bad oembed links. try { // Use the same validation used in field validation. $resource_url = $resolver->getResourceUrl($url); - \Drupal::service('media.oembed.resource_fetcher') + \Drupal::service(ResourceFetcherInterface::class) ->fetchResource($resource_url); return $url; } @@ -187,7 +189,7 @@ function _helfi_rekry_content_get_video_url(string|NULL $url = NULL): ?string { \Drupal::logger('helfi_rekry_content') ->error('Bad video url rejected by oembed-validation: ' . $url); - throw new MigrateSkipRowException(); + throw new MigrateSkipRowException(save_to_map: FALSE); } } @@ -205,43 +207,27 @@ function _helfi_rekry_content_sanitize_video_url(string $url): string { return $url; } - if (!str_contains($url, "://")) { - $url = "https://$url"; - } - - // OEmbed does not accept YouTube embed links. - if (preg_match("/youtube\.com\/embed\/([\w\-_]+)$/", $url, $matches)) { - $url = sprintf("https://youtube.com/watch?v=%s", $matches[1]); + // Some valid YouTube links are not recognized by drupal/oembed_providers + // module, which triggers additional network requests that attempt to sniff + // oembed links directly from YouTube. However, YouTube does not like + // automated traffic from datacenters, so these requests often fail in + // production. + // + // This regex tries to pick video id from following patters and + // formats the links to the expected format. + // + // Features: + // - https:// or www. missing. + // - youtube.com/v/[id]. + // - youtu.be/[id] short links. + // - youtube.com/embed/[id]. + if (preg_match("/youtu(?:.*\/v\/|.*v=|\.be\/|.*\/embed\/)([A-Za-z0-9_\-]{11})/", $url, $matches)) { + $url = sprintf("https://www.youtube.com/watch?v=%s", $matches[1]); } return $url; } -/** - * Get video mid by video url. - * - * @param string $url - * The video url. - * - * @return string|null - * The mid or null - */ -function _helfi_rekry_content_lookup_video_mid(string $url): ?string { - $ids = \Drupal::entityQuery('media') - ->condition('bundle', 'remote_video') - ->condition('field_media_oembed_video', $url) - ->range(0, 1) - ->latestRevision() - ->accessCheck(FALSE) - ->execute(); - - if (!empty($ids)) { - return reset($ids); - } - - return NULL; -} - /** * Get node id by recruitment id. * @@ -268,7 +254,7 @@ function _helfi_rekry_content_lookup_job_nid(string $id): ?string { } /** - * Add http protocol to urls, since api response might not have themm. + * Add http protocol to urls, since api response might not have them. * * @param string|null $url * The url. diff --git a/public/modules/custom/helfi_rekry_content/migrations/job_listing_videos.yml b/public/modules/custom/helfi_rekry_content/migrations/job_listing_videos.yml index b654580e..3bc54fdc 100644 --- a/public/modules/custom/helfi_rekry_content/migrations/job_listing_videos.yml +++ b/public/modules/custom/helfi_rekry_content/migrations/job_listing_videos.yml @@ -11,28 +11,18 @@ migration_tags: migration_group: helfi_rekry_content label: 'HELfi Rekry - Job listing videos' source: - ids: - id: - type: string - video: - type: string plugin: helbit_open_jobs - track_changes: true fields: - - - name: id - label: Id - selector: jobAdvertisement/id - name: video label: Video selector: jobAdvertisement/embedLink - - - name: title - label: Title - selector: jobAdvertisement/title + ids: + video: + type: string + langcode: + type: string process: - name: title field_media_oembed_video: - plugin: skip_on_empty diff --git a/public/modules/custom/helfi_rekry_content/migrations/job_listings.yml b/public/modules/custom/helfi_rekry_content/migrations/job_listings.yml index 39345e2b..5dd97fee 100644 --- a/public/modules/custom/helfi_rekry_content/migrations/job_listings.yml +++ b/public/modules/custom/helfi_rekry_content/migrations/job_listings.yml @@ -198,18 +198,15 @@ process: field_original_language: plugin: default_value default_value: null - field_video/target_id: - - - plugin: skip_on_empty + field_video: + - plugin: skip_on_empty method: process source: video - - - plugin: callback - callable: _helfi_rekry_content_sanitize_video_url - source: video - - - plugin: callback - callable: _helfi_rekry_content_lookup_video_mid + - plugin: migration_lookup + migration: helfi_rekry_videos + source: + - video + - langcode field_organization_name: organization_name field_postal_area: postal_area field_postal_code: postal_code diff --git a/public/modules/custom/helfi_rekry_content/tests/src/Kernel/JobMigrationTest.php b/public/modules/custom/helfi_rekry_content/tests/src/Kernel/JobMigrationTest.php new file mode 100644 index 00000000..002b6759 --- /dev/null +++ b/public/modules/custom/helfi_rekry_content/tests/src/Kernel/JobMigrationTest.php @@ -0,0 +1,94 @@ +assertEquals($expected, \_helfi_rekry_content_sanitize_video_url($videoUrl)); + } + } + + /** + * Test video URL validation. + */ + public function testVideoValidationExceptions(): void { + $urlResolver = $this->prophesize(UrlResolverInterface::class); + $urlResolver->getProviderByUrl(Argument::any()) + ->willThrow(ProviderException::class); + + $this->container->set(UrlResolverInterface::class, $urlResolver->reveal()); + + $this->expectException(MigrateSkipRowException::class); + _helfi_rekry_content_get_video_url('some-url'); + } + + /** + * Test video URL validation with unknown provider. + */ + public function testVideoValidationProvider(): void { + $provider = new Provider('Some provider', 'https://example.com', [ + ['url' => 'https://example.com/oembed'], + ]); + + $urlResolver = $this->prophesize(UrlResolverInterface::class); + $urlResolver->getProviderByUrl(Argument::any()) + ->willReturn($provider); + + $this->container->set(UrlResolverInterface::class, $urlResolver->reveal()); + + $this->expectException(MigrateSkipRowException::class); + _helfi_rekry_content_get_video_url('some-url'); + } + + /** + * Data provider for testVideoUrlSanitization(). + */ + public static function videoUrlData(): array { + return [ + ['', [' ']], + [ + 'https://www.youtube.com/watch?v=g2eYKMjE8ew', + [ + 'youtube.com/watch?v=g2eYKMjE8ew', + 'youtu.be/g2eYKMjE8ew', + 'youtu.be/?v=g2eYKMjE8ew', + 'https://youtube.com/embed/g2eYKMjE8ew', + 'https://youtube.com/watch?v=g2eYKMjE8ew', + 'https://www.youtube.com/watch?v=g2eYKMjE8ew', + 'https://www.youtube.com/watch?foo=bar&v=g2eYKMjE8ew&bar=foo', + ], + ], + ]; + } + +} From 7bc63cce8d831740aebb2c23d70121c724255a54 Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Fri, 10 Jan 2025 08:49:02 +0200 Subject: [PATCH 2/3] UHF-10406: Fix crash when organization tag is missing --- .../helfi_rekry_content/src/Entity/JobListing.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php b/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php index e02a3321..b485353b 100644 --- a/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php +++ b/public/modules/custom/helfi_rekry_content/src/Entity/JobListing.php @@ -124,12 +124,12 @@ public function getCityDescriptions() : array { /** * Get organization taxonomy term. * - * @return \Drupal\taxonomy\TermInterface|bool + * @return \Drupal\taxonomy\TermInterface|false * Returns the organization taxonomy term or false if not set. * * @throws \Drupal\Core\TypedData\Exception\MissingDataException */ - public function getOrganization() : TermInterface|bool { + public function getOrganization() : TermInterface|FALSE { $organization_id = ''; // Get the organization id from the migrated field. @@ -153,7 +153,8 @@ public function getOrganization() : TermInterface|bool { $organization = $this->entityTypeManager() ->getStorage('taxonomy_term') ->load($organization_id); - return $organization; + + return $organization ?? FALSE; } catch (\Exception $e) { return FALSE; @@ -257,7 +258,10 @@ public function getOrganizationDescription() : FilteredMarkup|string { } // If not and the organization description is empty, // check if the organization taxonomy description is set and use it. - elseif ($organization_description->isEmpty() && !$organization->get('description')->isEmpty()) { + elseif ( + $organization_description->isEmpty() && + $organization && !$organization->get('description')->isEmpty() + ) { $organization_description = $organization->get('description'); } From f0e2358137a85b825119f266f405ac906f8482c6 Mon Sep 17 00:00:00 2001 From: Santeri Hurnanen Date: Fri, 10 Jan 2025 10:30:01 +0200 Subject: [PATCH 3/3] UHF-10406: Do not render video if oembed provider does not work --- .../hdbt_subtheme/templates/layout/node--job-listing.html.twig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/themes/custom/hdbt_subtheme/templates/layout/node--job-listing.html.twig b/public/themes/custom/hdbt_subtheme/templates/layout/node--job-listing.html.twig index e7f25c09..f7df30b0 100644 --- a/public/themes/custom/hdbt_subtheme/templates/layout/node--job-listing.html.twig +++ b/public/themes/custom/hdbt_subtheme/templates/layout/node--job-listing.html.twig @@ -181,7 +181,7 @@ {% endif %} - {% if content.field_video|render %} + {% if content.field_video|render|spaceless %}
{% include '@hdbt/component/remote-video.twig' with { video: content.field_video,