diff --git a/lib/RoadizCoreBundle/src/SearchEngine/AbstractSearchHandler.php b/lib/RoadizCoreBundle/src/SearchEngine/AbstractSearchHandler.php index 2265673a..8175a7fb 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/AbstractSearchHandler.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/AbstractSearchHandler.php @@ -303,6 +303,11 @@ protected function isQuerySingleWord(string $q): bool return preg_match('#[\s\-\'\"\–\—\’\”\‘\“\/\+\.\,]#', $q) !== 1; } + protected function formatDateTimeToUTC(\DateTimeInterface $dateTime): string + { + return gmdate('Y-m-d\TH:i:s\Z', $dateTime->getTimestamp()); + } + /** * @param array $args * diff --git a/lib/RoadizCoreBundle/src/SearchEngine/AbstractSolarium.php b/lib/RoadizCoreBundle/src/SearchEngine/AbstractSolarium.php index 5b1e50b4..7e5eb218 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/AbstractSolarium.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/AbstractSolarium.php @@ -9,7 +9,6 @@ use RZ\Roadiz\Markdown\MarkdownInterface; use Solarium\Core\Client\Client; use Solarium\Core\Query\DocumentInterface; -use Solarium\Core\Query\Result\Result; use Solarium\Core\Query\Result\ResultInterface; use Solarium\QueryType\Update\Query\Document; use Solarium\QueryType\Update\Query\Query; @@ -134,7 +133,7 @@ public function updateAndCommit(): ?ResultInterface */ public function update(Query $update): void { - $this->clean($update); + // Since Solr ID are now deterministic and composite, we don't need to remove document, just update it. $this->createEmptyDocument($update); $this->index(); // add the document to the update query @@ -197,7 +196,7 @@ public function cleanAndCommit(): void public function index(): bool { if ($this->document instanceof Document) { - $this->document->setKey('id', uniqid('', true)); + $this->document->setKey('id', $this->getCompositeIdentifier()); try { foreach ($this->getFieldsAssoc() as $key => $value) { @@ -310,4 +309,6 @@ public function cleanTextContent(?string $content, bool $stripMarkdown = true): $content = preg_replace('/[\x00-\x1F]/', '', $content); return $content; } + + abstract protected function getCompositeIdentifier(): string; } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/DocumentSearchHandler.php b/lib/RoadizCoreBundle/src/SearchEngine/DocumentSearchHandler.php index a0b9c1eb..0804fbcb 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/DocumentSearchHandler.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/DocumentSearchHandler.php @@ -72,15 +72,15 @@ protected function argFqProcess(array &$args): array } /* - * `all_tags_txt` can store all folders, even technical ones, this fields should not user-searchable. + * `all_tags_slugs_ss` can store all folders, even technical ones, this fields should not user-searchable. */ if (!empty($args['folders'])) { if ($args['folders'] instanceof Folder) { - $args["fq"][] = sprintf('all_tags_txt:"%s"', $args['folders']->getFolderName()); + $args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $args['folders']->getFolderName()); } elseif (is_array($args['folders'])) { foreach ($args['folders'] as $folder) { if ($folder instanceof Folder) { - $args["fq"][] = sprintf('all_tags_txt:"%s"', $folder->getFolderName()); + $args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $folder->getFolderName()); } } } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/GlobalNodeSourceSearchHandler.php b/lib/RoadizCoreBundle/src/SearchEngine/GlobalNodeSourceSearchHandler.php index 73da71f3..f14e4dac 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/GlobalNodeSourceSearchHandler.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/GlobalNodeSourceSearchHandler.php @@ -9,7 +9,6 @@ use Doctrine\Persistence\ObjectManager; use RZ\Roadiz\CoreBundle\Entity\NodesSources; use RZ\Roadiz\CoreBundle\Entity\Translation; -use RZ\Roadiz\CoreBundle\Repository\NodesSourcesRepository; /** * @package RZ\Roadiz\CoreBundle\SearchEngine diff --git a/lib/RoadizCoreBundle/src/SearchEngine/NodeSourceSearchHandler.php b/lib/RoadizCoreBundle/src/SearchEngine/NodeSourceSearchHandler.php index f5916111..3e41672a 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/NodeSourceSearchHandler.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/NodeSourceSearchHandler.php @@ -102,15 +102,15 @@ protected function argFqProcess(array &$args): array /* * filter by tag or tags - * `all_tags_txt` can store all tags, even technical ones, this fields should not user-searchable. + * `all_tags_slugs_ss` can store all tags, even technical ones, this fields should not user-searchable. */ if (!empty($args['tags'])) { if ($args['tags'] instanceof Tag) { - $args["fq"][] = sprintf('all_tags_txt:"%s"', $args['tags']->getTagName()); + $args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $args['tags']->getTagName()); } elseif (is_array($args['tags'])) { foreach ($args['tags'] as $tag) { if ($tag instanceof Tag) { - $args["fq"][] = sprintf('all_tags_txt:"%s"', $tag->getTagName()); + $args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $tag->getTagName()); } } } @@ -163,7 +163,7 @@ protected function argFqProcess(array &$args): array if (isset($args['publishedAt'])) { $tmp = "published_at_dt:"; if (!is_array($args['publishedAt']) && $args['publishedAt'] instanceof \DateTime) { - $tmp .= $args['publishedAt']->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z'); + $tmp .= $this->formatDateTimeToUTC($args['publishedAt']); } elseif ( isset($args['publishedAt'][0]) && $args['publishedAt'][0] === "BETWEEN" && @@ -173,23 +173,23 @@ protected function argFqProcess(array &$args): array $args['publishedAt'][2] instanceof \DateTime ) { $tmp .= "[" . - $args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . + $this->formatDateTimeToUTC($args['publishedAt'][1]) . " TO " . - $args['publishedAt'][2]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . "]"; + $this->formatDateTimeToUTC($args['publishedAt'][2]) . "]"; } elseif ( isset($args['publishedAt'][0]) && $args['publishedAt'][0] === "<=" && isset($args['publishedAt'][1]) && $args['publishedAt'][1] instanceof \DateTime ) { - $tmp .= "[* TO " . $args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . "]"; + $tmp .= "[* TO " . $this->formatDateTimeToUTC($args['publishedAt'][1]) . "]"; } elseif ( isset($args['publishedAt'][0]) && $args['publishedAt'][0] === ">=" && isset($args['publishedAt'][1]) && $args['publishedAt'][1] instanceof \DateTime ) { - $tmp .= "[" . $args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . " TO *]"; + $tmp .= "[" . $this->formatDateTimeToUTC($args['publishedAt'][1]) . " TO *]"; } unset($args['publishedAt']); $args["fq"][] = $tmp; diff --git a/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocument.php b/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocument.php index 469fbe2e..319d132f 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocument.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocument.php @@ -5,11 +5,13 @@ namespace RZ\Roadiz\CoreBundle\SearchEngine; use Psr\Log\LoggerInterface; +use RZ\Roadiz\Core\AbstractEntities\PersistableInterface; use RZ\Roadiz\CoreBundle\Entity\Document; use RZ\Roadiz\Markdown\MarkdownInterface; use Solarium\Core\Query\DocumentInterface; use Solarium\Core\Query\Result\ResultInterface; use Solarium\QueryType\Update\Query\Query; +use Symfony\Component\String\Slugger\AsciiSlugger; /** * Wrap a Solarium and a Document’ translations together to ease indexing. @@ -203,4 +205,9 @@ public function index(): bool return true; } + + protected function getCompositeIdentifier(): string + { + throw new \InvalidArgumentException('SolariumDocument should not provide any ID'); + } } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocumentTranslation.php b/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocumentTranslation.php index dc640488..5ad317de 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocumentTranslation.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/SolariumDocumentTranslation.php @@ -10,6 +10,7 @@ use RZ\Roadiz\Markdown\MarkdownInterface; use Solarium\QueryType\Update\Query\Query; use Symfony\Component\EventDispatcher\EventDispatcherInterface; +use Symfony\Component\String\Slugger\AsciiSlugger; /** * Wrap a Solarium and a DocumentTranslation together to ease indexing. @@ -65,4 +66,12 @@ public function clean(Query $update): bool return true; } + + protected function getCompositeIdentifier(): string + { + $namespace = explode('\\', get_class($this->documentTranslation)); + // get last 3 parts of namespace + $namespace = array_slice($namespace, -3); + return (new AsciiSlugger())->slug(implode(' ', $namespace))->lower()->snake() . '.' . $this->documentTranslation->getId(); + } } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/SolariumNodeSource.php b/lib/RoadizCoreBundle/src/SearchEngine/SolariumNodeSource.php index 00a8ca93..5ed06134 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/SolariumNodeSource.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/SolariumNodeSource.php @@ -10,6 +10,7 @@ use RZ\Roadiz\Markdown\MarkdownInterface; use Solarium\QueryType\Update\Query\Query; use Symfony\Component\EventDispatcher\EventDispatcherInterface; +use Symfony\Component\String\Slugger\AsciiSlugger; /** * Wrap a Solarium and a NodeSource together to ease indexing. @@ -70,4 +71,12 @@ public function clean(Query $update): bool return true; } + + protected function getCompositeIdentifier(): string + { + $namespace = explode('\\', get_class($this->nodeSource)); + // get last 3 parts of namespace + $namespace = array_slice($namespace, -3); + return (new AsciiSlugger())->slug(implode(' ', $namespace))->lower()->snake() . '.' . $this->nodeSource->getId(); + } } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php new file mode 100644 index 00000000..12aa92b0 --- /dev/null +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php @@ -0,0 +1,17 @@ +getTimestamp()); + } +} diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php index ee048e0c..b722fada 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php @@ -9,9 +9,8 @@ use RZ\Roadiz\CoreBundle\Event\Document\DocumentTranslationIndexingEvent; use RZ\Roadiz\CoreBundle\SearchEngine\AbstractSolarium; use RZ\Roadiz\CoreBundle\SearchEngine\SolariumDocumentTranslation; -use Symfony\Component\EventDispatcher\EventSubscriberInterface; -final class DefaultDocumentTranslationIndexingSubscriber implements EventSubscriberInterface +final class DefaultDocumentTranslationIndexingSubscriber extends AbstractIndexingSubscriber { /** * @inheritDoc @@ -34,25 +33,13 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void $assoc[SolariumDocumentTranslation::IDENTIFIER_KEY] = $documentTranslation->getId(); if ($document instanceof Document) { $assoc['document_id_i'] = $document->getId(); - $assoc['created_at_dt'] = $document->getCreatedAt() - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); - ; - $assoc['updated_at_dt'] = $document->getUpdatedAt() - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); - ; + $assoc['created_at_dt'] = $this->formatDateTimeToUTC($document->getCreatedAt()); + $assoc['updated_at_dt'] = $this->formatDateTimeToUTC($document->getUpdatedAt()); $copyrightValidSince = $document->getCopyrightValidSince() ?? new \DateTime('1970-01-01 00:00:00'); $copyrightValidUntil = $document->getCopyrightValidUntil() ?? new \DateTime('9999-12-31 23:59:59'); - $assoc['copyright_valid_since_dt'] = $copyrightValidSince - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); - ; - $assoc['copyright_valid_until_dt'] = $copyrightValidUntil - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); - ; + $assoc['copyright_valid_since_dt'] = $this->formatDateTimeToUTC($copyrightValidSince); + $assoc['copyright_valid_until_dt'] = $this->formatDateTimeToUTC($copyrightValidUntil); } $assoc['filename_s'] = $document->getFilename(); $assoc['mime_type_s'] = $document->getMimeType(); @@ -107,7 +94,7 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void $assoc['tags_txt_' . $lang] = implode(' ', $visibleFolderNames); /* - * `all_tags_txt` can store all folders, even technical one, this fields should not user searchable. + * `all_tags_slugs_ss` can store all folders, even technical one, this fields should not user searchable. */ $allFolders = $document->getFolders(); $allFolderNames = []; @@ -115,8 +102,8 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void foreach ($allFolders as $folder) { $allFolderNames[] = $folder->getFolderName(); } - // Use all_tags_txt to be compatible with other data types - $assoc['all_tags_txt'] = array_filter(array_unique($allFolderNames)); + // Use all_tags_slugs_ss to be compatible with other data types + $assoc['all_tags_slugs_ss'] = array_filter(array_unique($allFolderNames)); /* * Collect data in a single field diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php index f4de081f..0077e7ed 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php @@ -11,9 +11,8 @@ use RZ\Roadiz\CoreBundle\Entity\Tag; use RZ\Roadiz\CoreBundle\Event\NodesSources\NodesSourcesIndexingEvent; use RZ\Roadiz\CoreBundle\SearchEngine\SolariumNodeSource; -use Symfony\Component\EventDispatcher\EventSubscriberInterface; -final class DefaultNodesSourcesIndexingSubscriber implements EventSubscriberInterface +final class DefaultNodesSourcesIndexingSubscriber extends AbstractIndexingSubscriber { /** * @inheritDoc @@ -58,17 +57,11 @@ public function onIndexing(NodesSourcesIndexingEvent $event): void $assoc['title'] = $title; $assoc['title_txt_' . $lang] = $title; - $assoc['created_at_dt'] = $node->getCreatedAt() - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); - $assoc['updated_at_dt'] = $node->getUpdatedAt() - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); + $assoc['created_at_dt'] = $this->formatDateTimeToUTC($node->getCreatedAt()); + $assoc['updated_at_dt'] = $this->formatDateTimeToUTC($node->getUpdatedAt()); if (null !== $nodeSource->getPublishedAt()) { - $assoc['published_at_dt'] = $nodeSource->getPublishedAt() - ->setTimezone(new \DateTimeZone('UTC')) - ->format('Y-m-d\TH:i:s\Z'); + $assoc['published_at_dt'] = $this->formatDateTimeToUTC($nodeSource->getPublishedAt()); } /* @@ -109,7 +102,7 @@ function (Tag $tag) use ($event, $nodeSource) { $assoc['tags_txt_' . $lang] = implode(' ', $out); /* - * `all_tags_txt` can store all tags, even technical one, this fields should not user searchable. + * `all_tags_slugs_ss` can store all tags, even technical one, this fields should not user searchable. */ $allOut = array_map( function (Tag $tag) { @@ -118,8 +111,8 @@ function (Tag $tag) { $nodeSource->getNode()->getTags()->toArray() ); $allOut = array_filter(array_unique($allOut)); - // Use all_tags_txt to be compatible with other data types - $assoc['all_tags_txt'] = $allOut; + // Use all_tags_slugs_ss to be compatible with other data types + $assoc['all_tags_slugs_ss'] = $allOut; } $criteria = new Criteria();