Skip to content

Commit 60b0062

Browse files
committed
Adding TabularDataReader::chunkBy method
1 parent 1a7b889 commit 60b0062

File tree

6 files changed

+75
-0
lines changed

6 files changed

+75
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ All Notable changes to `Csv` will be documented in this file
88

99
- `Statement::select`
1010
- `TabularDataReader::getRecordsAsObject`
11+
- `TabularDataReader::chunkBy`
1112

1213
### Deprecated
1314

docs/9.0/reader/tabular-data-reader.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,3 +472,23 @@ $reader->matchingFirstOrFail('row=3-1;4-6'); // will throw
472472

473473
<p class="message-info"> Wraps the functionality of <code>FragmentFinder</code> class.</p>
474474
<p class="message-notice">Added in version <code>9.12.0</code> for <code>Reader</code> and <code>ResultSet</code>.</p>
475+
476+
### chunkBy
477+
478+
<p class="message-notice">Added in version <code>9.15.0</code> for <code>Reader</code> and <code>ResultSet</code>.</p>
479+
480+
If you are dealing with a large CSV and you want it to be split in smaller sizes for better handling you can use
481+
the `chunkBy` method which breaks the `TabularDataReader` into multiple, smaller instance of a given size. The
482+
last instance may contain fewer records because of the chunk size you have chosen.
483+
484+
```php
485+
use League\Csv\Reader;
486+
487+
$reader = Reader::createFromString($csv);
488+
489+
foreach ($reader->chunkBy(4) as $chunk) {
490+
foreach ($chunk as $record) {
491+
//the actual record will be found here.
492+
}
493+
}
494+
```

src/Reader.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,18 @@ public function reduce(Closure $closure, mixed $initial = null): mixed
319319
return ResultSet::createFromTabularDataReader($this)->reduce($closure, $initial);
320320
}
321321

322+
/**
323+
* @param positive-int $length
324+
*
325+
* @throws InvalidArgument
326+
*
327+
* @return Iterator<TabularDataReader>
328+
*/
329+
public function chunkBy(int $length): Iterator
330+
{
331+
return ResultSet::createFromTabularDataReader($this)->chunkBy($length);
332+
}
333+
322334
/**
323335
* @param Closure(array<mixed>, array-key): bool $closure
324336
*

src/ResultSet.php

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,36 @@ public function reduce(Closure $closure, mixed $initial = null): mixed
160160
return $initial;
161161
}
162162

163+
/**
164+
* @param positive-int $length
165+
*
166+
* @throws InvalidArgument
167+
*
168+
* @return Iterator<TabularDataReader>
169+
*/
170+
public function chunkBy(int $length): Iterator
171+
{
172+
if ($length < 1) {
173+
throw InvalidArgument::dueToInvalidChunkSize($length, __METHOD__);
174+
}
175+
176+
$records = [];
177+
$i = 0;
178+
foreach ($this->getRecords() as $record) {
179+
$records[] = $record;
180+
++$i;
181+
if ($i === $length) {
182+
yield self::createFromRecords($records);
183+
$i = 0;
184+
$records = [];
185+
}
186+
}
187+
188+
if ([] !== $records) {
189+
yield self::createFromRecords($records);
190+
}
191+
}
192+
163193
public function filter(Closure $closure): TabularDataReader
164194
{
165195
return Statement::create()->where($closure)->process($this);

src/TabularDataReader.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
* @method TabularDataReader matchingFirstOrFail(string $expression) extract the first found fragment identifier of the tabular data or fail
4141
* @method TabularDataReader|null matchingFirst(string $expression) extract the first found fragment identifier of the tabular data or return null if none is found
4242
* @method iterable<int, TabularDataReader> matching(string $expression) extract all found fragment identifiers for the tabular data
43+
* @method iterable<int, TabularDataReader> chunkBy(int $length) Chunk the TabulaDataReader into smaller TabularDataReader instance of the given size or less.
4344
*/
4445
interface TabularDataReader extends Countable, IteratorAggregate
4546
{

src/TabularDataReaderTestCase.php

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,17 @@ public function __construct(
449449

450450
self::assertInstanceOf($class::class, $this->tabularDataWithHeader()->firstAsObject($class::class, ['observedOn', 'temperature', 'place']));
451451
}
452+
453+
public function testChunkingTabularDataUsingTheRangeMethod(): void
454+
{
455+
self::assertCount(2, [...$this->tabularData()->chunkBy(4)]);
456+
foreach ($this->tabularDataWithHeader()->chunkBy(4) as $offset => $item) {
457+
match ($offset) {
458+
0 => self::assertCount(4, $item),
459+
default => self::assertCount(2, $item),
460+
};
461+
}
462+
}
452463
}
453464

454465
enum Place: string

0 commit comments

Comments
 (0)