Skip to content

Commit

Permalink
feat: Updated Solr indexing tags fields for multivalued strings and u…
Browse files Browse the repository at this point in the history
…se composite ID for easy overriding
  • Loading branch information
ambroisemaupate committed Mar 13, 2023
1 parent 6676597 commit 50a04af
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,11 @@ protected function isQuerySingleWord(string $q): bool
return preg_match('#[\s\-\'\"\–\—\’\”\‘\“\/\+\.\,]#', $q) !== 1;
}

protected function formatDateTimeToUTC(\DateTimeInterface $dateTime): string
{
return gmdate('Y-m-d\TH:i:s\Z', $dateTime->getTimestamp());
}

/**
* @param array $args
*
Expand Down
7 changes: 4 additions & 3 deletions lib/RoadizCoreBundle/src/SearchEngine/AbstractSolarium.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
use RZ\Roadiz\Markdown\MarkdownInterface;
use Solarium\Core\Client\Client;
use Solarium\Core\Query\DocumentInterface;
use Solarium\Core\Query\Result\Result;
use Solarium\Core\Query\Result\ResultInterface;
use Solarium\QueryType\Update\Query\Document;
use Solarium\QueryType\Update\Query\Query;
Expand Down Expand Up @@ -134,7 +133,7 @@ public function updateAndCommit(): ?ResultInterface
*/
public function update(Query $update): void
{
$this->clean($update);
// Since Solr ID are now deterministic and composite, we don't need to remove document, just update it.
$this->createEmptyDocument($update);
$this->index();
// add the document to the update query
Expand Down Expand Up @@ -197,7 +196,7 @@ public function cleanAndCommit(): void
public function index(): bool
{
if ($this->document instanceof Document) {
$this->document->setKey('id', uniqid('', true));
$this->document->setKey('id', $this->getCompositeIdentifier());

try {
foreach ($this->getFieldsAssoc() as $key => $value) {
Expand Down Expand Up @@ -310,4 +309,6 @@ public function cleanTextContent(?string $content, bool $stripMarkdown = true):
$content = preg_replace('/[\x00-\x1F]/', '', $content);
return $content;
}

abstract protected function getCompositeIdentifier(): string;
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ protected function argFqProcess(array &$args): array
}

/*
* `all_tags_txt` can store all folders, even technical ones, this fields should not user-searchable.
* `all_tags_slugs_ss` can store all folders, even technical ones, this fields should not user-searchable.
*/
if (!empty($args['folders'])) {
if ($args['folders'] instanceof Folder) {
$args["fq"][] = sprintf('all_tags_txt:"%s"', $args['folders']->getFolderName());
$args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $args['folders']->getFolderName());
} elseif (is_array($args['folders'])) {
foreach ($args['folders'] as $folder) {
if ($folder instanceof Folder) {
$args["fq"][] = sprintf('all_tags_txt:"%s"', $folder->getFolderName());
$args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $folder->getFolderName());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
use Doctrine\Persistence\ObjectManager;
use RZ\Roadiz\CoreBundle\Entity\NodesSources;
use RZ\Roadiz\CoreBundle\Entity\Translation;
use RZ\Roadiz\CoreBundle\Repository\NodesSourcesRepository;

/**
* @package RZ\Roadiz\CoreBundle\SearchEngine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,15 @@ protected function argFqProcess(array &$args): array

/*
* filter by tag or tags
* `all_tags_txt` can store all tags, even technical ones, this fields should not user-searchable.
* `all_tags_slugs_ss` can store all tags, even technical ones, this fields should not user-searchable.
*/
if (!empty($args['tags'])) {
if ($args['tags'] instanceof Tag) {
$args["fq"][] = sprintf('all_tags_txt:"%s"', $args['tags']->getTagName());
$args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $args['tags']->getTagName());
} elseif (is_array($args['tags'])) {
foreach ($args['tags'] as $tag) {
if ($tag instanceof Tag) {
$args["fq"][] = sprintf('all_tags_txt:"%s"', $tag->getTagName());
$args["fq"][] = sprintf('all_tags_slugs_ss:"%s"', $tag->getTagName());
}
}
}
Expand Down Expand Up @@ -163,7 +163,7 @@ protected function argFqProcess(array &$args): array
if (isset($args['publishedAt'])) {
$tmp = "published_at_dt:";
if (!is_array($args['publishedAt']) && $args['publishedAt'] instanceof \DateTime) {
$tmp .= $args['publishedAt']->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z');
$tmp .= $this->formatDateTimeToUTC($args['publishedAt']);
} elseif (
isset($args['publishedAt'][0]) &&
$args['publishedAt'][0] === "BETWEEN" &&
Expand All @@ -173,23 +173,23 @@ protected function argFqProcess(array &$args): array
$args['publishedAt'][2] instanceof \DateTime
) {
$tmp .= "[" .
$args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') .
$this->formatDateTimeToUTC($args['publishedAt'][1]) .
" TO " .
$args['publishedAt'][2]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . "]";
$this->formatDateTimeToUTC($args['publishedAt'][2]) . "]";
} elseif (
isset($args['publishedAt'][0]) &&
$args['publishedAt'][0] === "<=" &&
isset($args['publishedAt'][1]) &&
$args['publishedAt'][1] instanceof \DateTime
) {
$tmp .= "[* TO " . $args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . "]";
$tmp .= "[* TO " . $this->formatDateTimeToUTC($args['publishedAt'][1]) . "]";
} elseif (
isset($args['publishedAt'][0]) &&
$args['publishedAt'][0] === ">=" &&
isset($args['publishedAt'][1]) &&
$args['publishedAt'][1] instanceof \DateTime
) {
$tmp .= "[" . $args['publishedAt'][1]->setTimezone(new \DateTimeZone('UTC'))->format('Y-m-d\TH:i:s\Z') . " TO *]";
$tmp .= "[" . $this->formatDateTimeToUTC($args['publishedAt'][1]) . " TO *]";
}
unset($args['publishedAt']);
$args["fq"][] = $tmp;
Expand Down
7 changes: 7 additions & 0 deletions lib/RoadizCoreBundle/src/SearchEngine/SolariumDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
namespace RZ\Roadiz\CoreBundle\SearchEngine;

use Psr\Log\LoggerInterface;
use RZ\Roadiz\Core\AbstractEntities\PersistableInterface;
use RZ\Roadiz\CoreBundle\Entity\Document;
use RZ\Roadiz\Markdown\MarkdownInterface;
use Solarium\Core\Query\DocumentInterface;
use Solarium\Core\Query\Result\ResultInterface;
use Solarium\QueryType\Update\Query\Query;
use Symfony\Component\String\Slugger\AsciiSlugger;

/**
* Wrap a Solarium and a Document’ translations together to ease indexing.
Expand Down Expand Up @@ -203,4 +205,9 @@ public function index(): bool

return true;
}

protected function getCompositeIdentifier(): string
{
throw new \InvalidArgumentException('SolariumDocument should not provide any ID');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use RZ\Roadiz\Markdown\MarkdownInterface;
use Solarium\QueryType\Update\Query\Query;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\String\Slugger\AsciiSlugger;

/**
* Wrap a Solarium and a DocumentTranslation together to ease indexing.
Expand Down Expand Up @@ -65,4 +66,12 @@ public function clean(Query $update): bool

return true;
}

protected function getCompositeIdentifier(): string
{
$namespace = explode('\\', get_class($this->documentTranslation));
// get last 3 parts of namespace
$namespace = array_slice($namespace, -3);
return (new AsciiSlugger())->slug(implode(' ', $namespace))->lower()->snake() . '.' . $this->documentTranslation->getId();
}
}
9 changes: 9 additions & 0 deletions lib/RoadizCoreBundle/src/SearchEngine/SolariumNodeSource.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use RZ\Roadiz\Markdown\MarkdownInterface;
use Solarium\QueryType\Update\Query\Query;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\String\Slugger\AsciiSlugger;

/**
* Wrap a Solarium and a NodeSource together to ease indexing.
Expand Down Expand Up @@ -70,4 +71,12 @@ public function clean(Query $update): bool

return true;
}

protected function getCompositeIdentifier(): string
{
$namespace = explode('\\', get_class($this->nodeSource));
// get last 3 parts of namespace
$namespace = array_slice($namespace, -3);
return (new AsciiSlugger())->slug(implode(' ', $namespace))->lower()->snake() . '.' . $this->nodeSource->getId();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

declare(strict_types=1);

namespace RZ\Roadiz\CoreBundle\SearchEngine\Subscriber;

use RZ\Roadiz\Core\AbstractEntities\PersistableInterface;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
use Symfony\Component\String\Slugger\AsciiSlugger;

abstract class AbstractIndexingSubscriber implements EventSubscriberInterface
{
protected function formatDateTimeToUTC(\DateTimeInterface $dateTime): string
{
return gmdate('Y-m-d\TH:i:s\Z', $dateTime->getTimestamp());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
use RZ\Roadiz\CoreBundle\Event\Document\DocumentTranslationIndexingEvent;
use RZ\Roadiz\CoreBundle\SearchEngine\AbstractSolarium;
use RZ\Roadiz\CoreBundle\SearchEngine\SolariumDocumentTranslation;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;

final class DefaultDocumentTranslationIndexingSubscriber implements EventSubscriberInterface
final class DefaultDocumentTranslationIndexingSubscriber extends AbstractIndexingSubscriber
{
/**
* @inheritDoc
Expand All @@ -34,25 +33,13 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void
$assoc[SolariumDocumentTranslation::IDENTIFIER_KEY] = $documentTranslation->getId();
if ($document instanceof Document) {
$assoc['document_id_i'] = $document->getId();
$assoc['created_at_dt'] = $document->getCreatedAt()
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
;
$assoc['updated_at_dt'] = $document->getUpdatedAt()
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
;
$assoc['created_at_dt'] = $this->formatDateTimeToUTC($document->getCreatedAt());
$assoc['updated_at_dt'] = $this->formatDateTimeToUTC($document->getUpdatedAt());

$copyrightValidSince = $document->getCopyrightValidSince() ?? new \DateTime('1970-01-01 00:00:00');
$copyrightValidUntil = $document->getCopyrightValidUntil() ?? new \DateTime('9999-12-31 23:59:59');
$assoc['copyright_valid_since_dt'] = $copyrightValidSince
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
;
$assoc['copyright_valid_until_dt'] = $copyrightValidUntil
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
;
$assoc['copyright_valid_since_dt'] = $this->formatDateTimeToUTC($copyrightValidSince);
$assoc['copyright_valid_until_dt'] = $this->formatDateTimeToUTC($copyrightValidUntil);
}
$assoc['filename_s'] = $document->getFilename();
$assoc['mime_type_s'] = $document->getMimeType();
Expand Down Expand Up @@ -107,16 +94,16 @@ public function onIndexing(DocumentTranslationIndexingEvent $event): void
$assoc['tags_txt_' . $lang] = implode(' ', $visibleFolderNames);

/*
* `all_tags_txt` can store all folders, even technical one, this fields should not user searchable.
* `all_tags_slugs_ss` can store all folders, even technical one, this fields should not user searchable.
*/
$allFolders = $document->getFolders();
$allFolderNames = [];
/** @var Folder $folder */
foreach ($allFolders as $folder) {
$allFolderNames[] = $folder->getFolderName();
}
// Use all_tags_txt to be compatible with other data types
$assoc['all_tags_txt'] = array_filter(array_unique($allFolderNames));
// Use all_tags_slugs_ss to be compatible with other data types
$assoc['all_tags_slugs_ss'] = array_filter(array_unique($allFolderNames));

/*
* Collect data in a single field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
use RZ\Roadiz\CoreBundle\Entity\Tag;
use RZ\Roadiz\CoreBundle\Event\NodesSources\NodesSourcesIndexingEvent;
use RZ\Roadiz\CoreBundle\SearchEngine\SolariumNodeSource;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;

final class DefaultNodesSourcesIndexingSubscriber implements EventSubscriberInterface
final class DefaultNodesSourcesIndexingSubscriber extends AbstractIndexingSubscriber
{
/**
* @inheritDoc
Expand Down Expand Up @@ -58,17 +57,11 @@ public function onIndexing(NodesSourcesIndexingEvent $event): void
$assoc['title'] = $title;
$assoc['title_txt_' . $lang] = $title;

$assoc['created_at_dt'] = $node->getCreatedAt()
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
$assoc['updated_at_dt'] = $node->getUpdatedAt()
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
$assoc['created_at_dt'] = $this->formatDateTimeToUTC($node->getCreatedAt());
$assoc['updated_at_dt'] = $this->formatDateTimeToUTC($node->getUpdatedAt());

if (null !== $nodeSource->getPublishedAt()) {
$assoc['published_at_dt'] = $nodeSource->getPublishedAt()
->setTimezone(new \DateTimeZone('UTC'))
->format('Y-m-d\TH:i:s\Z');
$assoc['published_at_dt'] = $this->formatDateTimeToUTC($nodeSource->getPublishedAt());
}

/*
Expand Down Expand Up @@ -109,7 +102,7 @@ function (Tag $tag) use ($event, $nodeSource) {
$assoc['tags_txt_' . $lang] = implode(' ', $out);

/*
* `all_tags_txt` can store all tags, even technical one, this fields should not user searchable.
* `all_tags_slugs_ss` can store all tags, even technical one, this fields should not user searchable.
*/
$allOut = array_map(
function (Tag $tag) {
Expand All @@ -118,8 +111,8 @@ function (Tag $tag) {
$nodeSource->getNode()->getTags()->toArray()
);
$allOut = array_filter(array_unique($allOut));
// Use all_tags_txt to be compatible with other data types
$assoc['all_tags_txt'] = $allOut;
// Use all_tags_slugs_ss to be compatible with other data types
$assoc['all_tags_slugs_ss'] = $allOut;
}

$criteria = new Criteria();
Expand Down

0 comments on commit 50a04af

Please sign in to comment.