From fea4ea02d4d9e7dc38ad8912a585f256c0e72bbc Mon Sep 17 00:00:00 2001 From: Ambroise Maupate Date: Tue, 26 Sep 2023 10:41:51 +0200 Subject: [PATCH] feat(Solr): Added `TreeWalkerIndexingEventSubscriber` to index node children using a TreeWalker, improved `AttributeValueIndexingSubscriber` --- lib/RoadizCoreBundle/config/services.yaml | 1 + .../Subscriber/AbstractIndexingSubscriber.php | 7 +- .../AttributeValueIndexingSubscriber.php | 79 ++++++++++---- ...tDocumentTranslationIndexingSubscriber.php | 2 +- .../DefaultNodesSourcesIndexingSubscriber.php | 33 +++--- .../TreeWalkerIndexingEventSubscriber.php | 100 ++++++++++++++++++ 6 files changed, 179 insertions(+), 43 deletions(-) rename lib/RoadizCoreBundle/src/{EventSubscriber => SearchEngine/Subscriber}/AttributeValueIndexingSubscriber.php (51%) create mode 100644 lib/RoadizCoreBundle/src/SearchEngine/Subscriber/TreeWalkerIndexingEventSubscriber.php diff --git a/lib/RoadizCoreBundle/config/services.yaml b/lib/RoadizCoreBundle/config/services.yaml index a4482a33..0dced9fd 100644 --- a/lib/RoadizCoreBundle/config/services.yaml +++ b/lib/RoadizCoreBundle/config/services.yaml @@ -42,6 +42,7 @@ services: $apiResourcesDir: '%kernel.project_dir%/config/api_resources' $debug: '%kernel.debug%' $defaultControllerClass: '%roadiz_core.default_node_source_controller%' + $defaultLocale: '%kernel.default_locale%' $webhookMessageTypes: '%roadiz_core.webhook.message_types%' $useAcceptLanguageHeader: '%roadiz_core.use_accept_language_header%' $healthCheckToken: '%roadiz_core.health_check_token%' diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php index 92ceead6..a8db300d 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AbstractIndexingSubscriber.php @@ -4,12 +4,15 @@ namespace RZ\Roadiz\CoreBundle\SearchEngine\Subscriber; -use RZ\Roadiz\Core\AbstractEntities\PersistableInterface; use Symfony\Component\EventDispatcher\EventSubscriberInterface; -use Symfony\Component\String\Slugger\AsciiSlugger; abstract class AbstractIndexingSubscriber implements EventSubscriberInterface { + protected function flattenTextCollection(array $collection): string + { + return trim(implode(PHP_EOL, array_filter($collection))); + } + protected function formatDateTimeToUTC(\DateTimeInterface $dateTime): string { return gmdate('Y-m-d\TH:i:s\Z', $dateTime->getTimestamp()); diff --git a/lib/RoadizCoreBundle/src/EventSubscriber/AttributeValueIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AttributeValueIndexingSubscriber.php similarity index 51% rename from lib/RoadizCoreBundle/src/EventSubscriber/AttributeValueIndexingSubscriber.php rename to lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AttributeValueIndexingSubscriber.php index daee6631..929a85e9 100644 --- a/lib/RoadizCoreBundle/src/EventSubscriber/AttributeValueIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/AttributeValueIndexingSubscriber.php @@ -2,15 +2,15 @@ declare(strict_types=1); -namespace RZ\Roadiz\CoreBundle\EventSubscriber; +namespace RZ\Roadiz\CoreBundle\SearchEngine\Subscriber; +use RZ\Roadiz\CoreBundle\Event\NodesSources\NodesSourcesIndexingEvent; use RZ\Roadiz\CoreBundle\Model\AttributeInterface; use RZ\Roadiz\CoreBundle\Model\AttributeValueInterface; -use RZ\Roadiz\CoreBundle\Event\NodesSources\NodesSourcesIndexingEvent; use RZ\Roadiz\CoreBundle\SearchEngine\AbstractSolarium; -use Symfony\Component\EventDispatcher\EventSubscriberInterface; +use Symfony\Component\String\Slugger\AsciiSlugger; -class AttributeValueIndexingSubscriber implements EventSubscriberInterface +final class AttributeValueIndexingSubscriber extends AbstractIndexingSubscriber { /** * {@inheritdoc} @@ -18,13 +18,13 @@ class AttributeValueIndexingSubscriber implements EventSubscriberInterface public static function getSubscribedEvents(): array { return [ - NodesSourcesIndexingEvent::class => 'onNodeSourceIndexing', + NodesSourcesIndexingEvent::class => ['onIndexing', 900], ]; } - public function onNodeSourceIndexing(NodesSourcesIndexingEvent $event): void + public function onIndexing(NodesSourcesIndexingEvent $event): void { - if ($event->getNodeSource()->getNode()->getAttributeValues()->count() === 0) { + if ($event->isSubResource()) { return; } @@ -33,6 +33,20 @@ public function onNodeSourceIndexing(NodesSourcesIndexingEvent $event): void ->getNode() ->getAttributesValuesForTranslation($event->getNodeSource()->getTranslation()); + if ($attributeValues->count() === 0) { + return; + } + + $lang = $event->getNodeSource()->getTranslation()->getLocale(); + if ( + !\in_array( + $lang, + AbstractSolarium::$availableLocalizedTextFields + ) + ) { + $lang = null; + } + /** @var AttributeValueInterface $attributeValue */ foreach ($attributeValues as $attributeValue) { if ($attributeValue->getAttribute()->isSearchable()) { @@ -45,27 +59,43 @@ public function onNodeSourceIndexing(NodesSourcesIndexingEvent $event): void : null; } if (null !== $data) { + $fieldName = (new AsciiSlugger())->slug($attributeValue->getAttribute()->getCode())->snake()->lower()->toString(); switch ($attributeValue->getType()) { + case AttributeInterface::INTEGER_T: + $fieldName .= '_i'; + $associations[$fieldName] = $data; + break; + case AttributeInterface::DECIMAL_T: + case AttributeInterface::PERCENT_T: + $fieldName .= '_f'; + $associations[$fieldName] = $data; + break; + case AttributeInterface::ENUM_T: + case AttributeInterface::COUNTRY_T: + case AttributeInterface::COLOUR_T: + case AttributeInterface::EMAIL_T: + $fieldName .= '_s'; + $content = $event->getSolariumDocument()->cleanTextContent($data); + $associations[$fieldName] = $content; + $associations['collection_txt'][] = $content; + if (null !== $lang) { + // Compile all text content into a single localized text field. + $associations['collection_txt_' . $lang] = $this->flattenTextCollection($associations['collection_txt']); + } + break; case AttributeInterface::DATETIME_T: case AttributeInterface::DATE_T: - if ($data instanceof \DateTime) { - $fieldName = $attributeValue->getAttribute()->getCode() . '_dt'; - $associations[$fieldName] = $data->format('Y-m-d\TH:i:s'); + if ($data instanceof \DateTimeInterface) { + $fieldName .= '_dt'; + $associations[$fieldName] = $this->formatDateTimeToUTC($data); } break; case AttributeInterface::STRING_T: - $fieldName = $attributeValue->getAttribute()->getCode(); /* * Use locale to create field name * with right language */ - if ( - in_array( - $event->getNodeSource()->getTranslation()->getLocale(), - AbstractSolarium::$availableLocalizedTextFields - ) - ) { - $lang = $event->getNodeSource()->getTranslation()->getLocale(); + if (null !== $lang) { $fieldName .= '_txt_' . $lang; } else { $lang = null; @@ -75,11 +105,14 @@ public function onNodeSourceIndexing(NodesSourcesIndexingEvent $event): void * Strip Markdown syntax */ $content = $event->getSolariumDocument()->cleanTextContent($data); - $associations[$fieldName] = $content; - $associations['collection_txt'][] = $content; - if (null !== $lang) { - // Compile all text content into a single localized text field. - $associations['collection_txt_' . $lang] = implode(PHP_EOL, $associations['collection_txt']); + if (null !== $content) { + $content = trim($content); + $associations[$fieldName] = $content; + $associations['collection_txt'][] = $content; + if (null !== $lang) { + // Compile all text content into a single localized text field. + $associations['collection_txt_' . $lang] = $this->flattenTextCollection($associations['collection_txt']); + } } break; } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php index c2940513..a4da9db0 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultDocumentTranslationIndexingSubscriber.php @@ -112,7 +112,7 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void */ $assoc['collection_txt'] = $collection; // Compile all text content into a single localized text field. - $assoc['collection_txt_' . $lang] = trim(implode(PHP_EOL, array_filter($collection))); + $assoc['collection_txt_' . $lang] = $this->flattenTextCollection($collection); $event->setAssociations($assoc); } } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php index 91a84c47..70a7ae00 100644 --- a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/DefaultNodesSourcesIndexingSubscriber.php @@ -4,12 +4,11 @@ namespace RZ\Roadiz\CoreBundle\SearchEngine\Subscriber; -use Doctrine\Common\Collections\Criteria; -use RZ\Roadiz\Core\AbstractEntities\AbstractField; use RZ\Roadiz\CoreBundle\Entity\NodesSources; use RZ\Roadiz\CoreBundle\Entity\NodeTypeField; use RZ\Roadiz\CoreBundle\Entity\Tag; use RZ\Roadiz\CoreBundle\Event\NodesSources\NodesSourcesIndexingEvent; +use RZ\Roadiz\CoreBundle\SearchEngine\AbstractSolarium; use RZ\Roadiz\CoreBundle\SearchEngine\SolariumNodeSource; final class DefaultNodesSourcesIndexingSubscriber extends AbstractIndexingSubscriber @@ -33,15 +32,9 @@ public function onIndexing(NodesSourcesIndexingEvent $event): void $node = $nodeSource->getNode(); // Need a documentType field - $assoc[SolariumNodeSource::TYPE_DISCRIMINATOR] = SolariumNodeSource::DOCUMENT_TYPE; + $assoc[AbstractSolarium::TYPE_DISCRIMINATOR] = SolariumNodeSource::DOCUMENT_TYPE; // Need a nodeSourceId field $assoc[SolariumNodeSource::IDENTIFIER_KEY] = $nodeSource->getId(); - $assoc['node_type_s'] = $nodeSource->getNodeTypeName(); - $assoc['node_name_s'] = $node->getNodeName(); - $assoc['slug_s'] = $node->getNodeName(); - $assoc['node_status_i'] = $node->getStatus(); - $assoc['node_visible_b'] = $node->isVisible(); - $assoc['node_reachable_b'] = $nodeSource->isReachable(); // Need a locale field $locale = $nodeSource->getTranslation()->getLocale(); @@ -55,17 +48,23 @@ public function onIndexing(NodesSourcesIndexingEvent $event): void $assoc['title'] = $title; $assoc['title_txt_' . $lang] = $title; - $assoc['created_at_dt'] = $this->formatDateTimeToUTC($node->getCreatedAt()); - $assoc['updated_at_dt'] = $this->formatDateTimeToUTC($node->getUpdatedAt()); - - if (null !== $nodeSource->getPublishedAt()) { - $assoc['published_at_dt'] = $this->formatDateTimeToUTC($nodeSource->getPublishedAt()); - } - /* * Do not index locale and tags if this is a sub-resource */ if (!$subResource) { + $assoc['node_type_s'] = $nodeSource->getNodeTypeName(); + $assoc['node_name_s'] = $node->getNodeName(); + $assoc['slug_s'] = $node->getNodeName(); + $assoc['node_status_i'] = $node->getStatus(); + $assoc['node_visible_b'] = $node->isVisible(); + $assoc['node_reachable_b'] = $nodeSource->isReachable(); + $assoc['created_at_dt'] = $this->formatDateTimeToUTC($node->getCreatedAt()); + $assoc['updated_at_dt'] = $this->formatDateTimeToUTC($node->getUpdatedAt()); + + if (null !== $nodeSource->getPublishedAt()) { + $assoc['published_at_dt'] = $this->formatDateTimeToUTC($nodeSource->getPublishedAt()); + } + if ($this->canIndexTitleInCollection($nodeSource)) { $collection[] = $title; } @@ -197,7 +196,7 @@ function (Tag $tag) { */ $assoc['collection_txt'] = $collection; // Compile all text content into a single localized text field. - $assoc['collection_txt_' . $lang] = trim(implode(PHP_EOL, array_filter($collection))); + $assoc['collection_txt_' . $lang] = $this->flattenTextCollection($collection); $event->setAssociations($assoc); } diff --git a/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/TreeWalkerIndexingEventSubscriber.php b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/TreeWalkerIndexingEventSubscriber.php new file mode 100644 index 00000000..76ccc9ef --- /dev/null +++ b/lib/RoadizCoreBundle/src/SearchEngine/Subscriber/TreeWalkerIndexingEventSubscriber.php @@ -0,0 +1,100 @@ +walkerContext = $walkerContext; + $this->solariumFactory = $solariumFactory; + $this->maxLevel = $maxLevel; + $this->defaultLocale = $defaultLocale; + } + + /** + * @inheritDoc + */ + public static function getSubscribedEvents(): array + { + return [ + NodesSourcesIndexingEvent::class => ['onIndexing', -99], + ]; + } + + public function onIndexing(NodesSourcesIndexingEvent $event): void + { + $nodeSource = $event->getNodeSource(); + if (!$nodeSource->isReachable() || $event->isSubResource()) { + return; + } + + $assoc = $event->getAssociations(); + + $blockWalker = AutoChildrenNodeSourceWalker::build( + $nodeSource, + $this->walkerContext, + $this->maxLevel + ); + + // Need a locale field + $locale = $nodeSource->getTranslation()->getLocale(); + $lang = \Locale::getPrimaryLanguage($locale) ?? $this->defaultLocale; + + try { + foreach ($blockWalker->getChildren() as $subWalker) { + $this->walkAndIndex($subWalker, $assoc, $lang); + } + } catch (\Exception $e) { + } + + $event->setAssociations($assoc); + } + + /** + * @param WalkerInterface $walker + * @param array $assoc + * @param string $locale + * @throws \Exception + */ + protected function walkAndIndex(WalkerInterface $walker, array &$assoc, string $locale): void + { + $item = $walker->getItem(); + if ($item instanceof NodesSources) { + $solarium = $this->solariumFactory->createWithNodesSources($item); + // Fetch all fields array association AS sub-resources (i.e. do not index their title, and relationships) + $childAssoc = $solarium->getFieldsAssoc(true); + $assoc['collection_txt'] = array_filter(array_merge( + $assoc['collection_txt'], + $childAssoc['collection_txt'] + )); + $assoc['collection_txt_' . $locale] = $this->flattenTextCollection($assoc['collection_txt']); + } + if ($walker->count() > 0) { + foreach ($walker->getChildren() as $subWalker) { + $this->walkAndIndex($subWalker, $assoc, $locale); + } + } + } +}