From 0195768abd5401a680c25ad8f809e170ea310b69 Mon Sep 17 00:00:00 2001 From: Dominique Feyer Date: Mon, 11 Jul 2016 12:54:38 +0200 Subject: [PATCH] TASK: Improve performance and memory usage during workspace indexing job creation --- .../NodeIndexQueueCommandController.php | 25 +++++++++++++---- .../Domain/Repository/NodeDataRepository.php | 28 +++++++++++++++++-- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Command/NodeIndexQueueCommandController.php b/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Command/NodeIndexQueueCommandController.php index 573f888..ae2b0d7 100644 --- a/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Command/NodeIndexQueueCommandController.php +++ b/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Command/NodeIndexQueueCommandController.php @@ -16,6 +16,7 @@ use TYPO3\Flow\Exception; use TYPO3\Flow\Persistence\PersistenceManagerInterface; use TYPO3\Jobqueue\Common\Job\JobManager; +use TYPO3\TYPO3CR\Domain\Factory\NodeFactory; use TYPO3\TYPO3CR\Domain\Model\NodeData; use TYPO3\TYPO3CR\Domain\Model\NodeInterface; use TYPO3\TYPO3CR\Domain\Repository\WorkspaceRepository; @@ -34,6 +35,12 @@ class NodeIndexQueueCommandController extends CommandController { */ protected $jobManager; + /** + * @var PersistenceManagerInterface + * @Flow\Inject + */ + protected $persistenceManager; + /** * @Flow\Inject * @var NodeTypeMappingBuilder @@ -65,6 +72,8 @@ class NodeIndexQueueCommandController extends CommandController { protected $logger; /** + * Index all nodes by creating a new index and when everything was completed, switch the index alias. + * * @param string $workspace */ public function buildCommand($workspace = NULL) { @@ -97,22 +106,26 @@ protected function indexWorkspace($workspaceName, $indexPostfix) { $offset = 0; $batchSize = 100; while (TRUE) { - $result = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize); - if ($result === array()) { - break; - } + $iterator = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize); + $jobData = []; - foreach ($result as $data) { + + foreach ($this->nodeDataRepository->iterate($iterator) as $data) { $jobData[] = [ 'nodeIdentifier' => $data['nodeIdentifier'], 'dimensions' => $data['dimensions'] - ]; } + + if ($jobData === []) { + break; + } + $indexingJob = new IndexingJob($indexPostfix, $workspaceName, $jobData); $this->jobManager->queue('Flowpack.ElasticSearch.ContentRepositoryQueueIndexer', $indexingJob); $this->output('.'); $offset += $batchSize; + $this->persistenceManager->clearState(); } } diff --git a/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Domain/Repository/NodeDataRepository.php b/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Domain/Repository/NodeDataRepository.php index 97a2415..56a5442 100644 --- a/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Domain/Repository/NodeDataRepository.php +++ b/Classes/Flowpack/ElasticSearch/ContentRepositoryQueueIndexer/Domain/Repository/NodeDataRepository.php @@ -2,6 +2,7 @@ namespace Flowpack\ElasticSearch\ContentRepositoryQueueIndexer\Domain\Repository; use Doctrine\Common\Persistence\ObjectManager; +use Doctrine\ORM\Internal\Hydration\IterableResult; use Doctrine\ORM\Query; use Doctrine\ORM\QueryBuilder; use TYPO3\Flow\Annotations as Flow; @@ -25,7 +26,7 @@ class NodeDataRepository extends Repository { * @param string $workspaceName * @param integer $firstResult * @param integer $maxResults - * @return array + * @return IterableResult */ public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $maxResults = 1000) { @@ -42,7 +43,30 @@ public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $max ':removed' => FALSE, ]); - return $queryBuilder->getQuery()->getArrayResult(); + return $queryBuilder->getQuery()->iterate(); + } + + /** + * Iterator over an IterableResult and return a Generator + * + * This methos is useful for batch processing huge result set as it clear the object + * manager and detach the current object on each iteration. + * + * @param IterableResult $iterator + * @param callable $callback + * @return \Generator + */ + public function iterate(IterableResult $iterator, callable $callback = null) + { + $iteration = 0; + foreach ($iterator as $object) { + $object = current($object); + yield $object; + if ($callback !== null) { + call_user_func($callback, $iteration, $object); + } + ++$iteration; + } } }