Skip to content

Commit

Permalink
TASK: Improve performance and memory usage during workspace indexing …
Browse files Browse the repository at this point in the history
…job creation
  • Loading branch information
dfeyer committed Jul 11, 2016
1 parent e5d1156 commit 0195768
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
use TYPO3\Flow\Exception;
use TYPO3\Flow\Persistence\PersistenceManagerInterface;
use TYPO3\Jobqueue\Common\Job\JobManager;
use TYPO3\TYPO3CR\Domain\Factory\NodeFactory;
use TYPO3\TYPO3CR\Domain\Model\NodeData;
use TYPO3\TYPO3CR\Domain\Model\NodeInterface;
use TYPO3\TYPO3CR\Domain\Repository\WorkspaceRepository;
Expand All @@ -34,6 +35,12 @@ class NodeIndexQueueCommandController extends CommandController {
*/
protected $jobManager;

/**
* @var PersistenceManagerInterface
* @Flow\Inject
*/
protected $persistenceManager;

/**
* @Flow\Inject
* @var NodeTypeMappingBuilder
Expand Down Expand Up @@ -65,6 +72,8 @@ class NodeIndexQueueCommandController extends CommandController {
protected $logger;

/**
* Index all nodes by creating a new index and when everything was completed, switch the index alias.
*
* @param string $workspace
*/
public function buildCommand($workspace = NULL) {
Expand Down Expand Up @@ -97,22 +106,26 @@ protected function indexWorkspace($workspaceName, $indexPostfix) {
$offset = 0;
$batchSize = 100;
while (TRUE) {
$result = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize);
if ($result === array()) {
break;
}
$iterator = $this->nodeDataRepository->findAllBySiteAndWorkspace($workspaceName, $offset, $batchSize);

$jobData = [];
foreach ($result as $data) {

foreach ($this->nodeDataRepository->iterate($iterator) as $data) {
$jobData[] = [
'nodeIdentifier' => $data['nodeIdentifier'],
'dimensions' => $data['dimensions']

];
}

if ($jobData === []) {
break;
}

$indexingJob = new IndexingJob($indexPostfix, $workspaceName, $jobData);
$this->jobManager->queue('Flowpack.ElasticSearch.ContentRepositoryQueueIndexer', $indexingJob);
$this->output('.');
$offset += $batchSize;
$this->persistenceManager->clearState();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
namespace Flowpack\ElasticSearch\ContentRepositoryQueueIndexer\Domain\Repository;

use Doctrine\Common\Persistence\ObjectManager;
use Doctrine\ORM\Internal\Hydration\IterableResult;
use Doctrine\ORM\Query;
use Doctrine\ORM\QueryBuilder;
use TYPO3\Flow\Annotations as Flow;
Expand All @@ -25,7 +26,7 @@ class NodeDataRepository extends Repository {
* @param string $workspaceName
* @param integer $firstResult
* @param integer $maxResults
* @return array
* @return IterableResult
*/
public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $maxResults = 1000) {

Expand All @@ -42,7 +43,30 @@ public function findAllBySiteAndWorkspace($workspaceName, $firstResult = 0, $max
':removed' => FALSE,
]);

return $queryBuilder->getQuery()->getArrayResult();
return $queryBuilder->getQuery()->iterate();
}

/**
* Iterator over an IterableResult and return a Generator
*
* This methos is useful for batch processing huge result set as it clear the object
* manager and detach the current object on each iteration.
*
* @param IterableResult $iterator
* @param callable $callback
* @return \Generator
*/
public function iterate(IterableResult $iterator, callable $callback = null)
{
$iteration = 0;
foreach ($iterator as $object) {
$object = current($object);
yield $object;
if ($callback !== null) {
call_user_func($callback, $iteration, $object);
}
++$iteration;
}
}

}

0 comments on commit 0195768

Please sign in to comment.