Skip to content

Commit

Permalink
Merge pull request #57 from keboola/CM-873-php-csv-may-detect-linebre…
Browse files Browse the repository at this point in the history
…ak-incorrectly-in-edge-case-v3

CM-873 May detect linebreak incorrectly in edge case
  • Loading branch information
ondrajodas authored Jan 11, 2024
2 parents 4d7785e + b2c49ed commit 73ffd7e
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 42 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.clrf.csv eol=crlf
6 changes: 3 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
},
"require-dev": {
"ext-json": "*",
"keboola/coding-standard": "^13.0",
"keboola/coding-standard": "^15.0",
"php-parallel-lint/php-parallel-lint": "^1.3",
"phpstan/phpstan": "^1.4",
"phpstan/phpstan": "^1.10",
"phpunit/phpunit": ">=7.5 <=9.6",
"phpstan/phpdoc-parser": "1.5.*"
"phpstan/phpdoc-parser": "^1.25"
},
"scripts": {
"phpstan": "phpstan analyse ./src ./tests --level=max --no-progress -c phpstan.neon",
Expand Down
5 changes: 4 additions & 1 deletion phpcs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@
<rule ref="SlevomatCodingStandard.TypeHints.DeclareStrictTypes">
<exclude name="SlevomatCodingStandard.TypeHints.DeclareStrictTypes.DeclareStrictTypesMissing"/>
</rule>
</ruleset>
<rule ref="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration">
<exclude name="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration.MissingTrailingComma"/>
</rule>
</ruleset>
7 changes: 6 additions & 1 deletion phpstan-baseline-8+.neon
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ parameters:
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
count: 1
Expand Down Expand Up @@ -493,4 +498,4 @@ parameters:
-
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
count: 1
path: tests/UTF8BOMHelperTest.php
path: tests/UTF8BOMHelperTest.php
7 changes: 6 additions & 1 deletion phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,11 @@ parameters:
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
count: 1
Expand Down Expand Up @@ -498,4 +503,4 @@ parameters:
-
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
count: 1
path: tests/UTF8BOMHelperTest.php
path: tests/UTF8BOMHelperTest.php
6 changes: 3 additions & 3 deletions src/CsvOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ protected function validateEnclosure($enclosure)
if (strlen($enclosure) > 1) {
throw new InvalidArgumentException(
'Enclosure must be a single character. ' . json_encode($enclosure) . ' received',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -64,14 +64,14 @@ protected function validateDelimiter($delimiter)
if (strlen($delimiter) > 1) {
throw new InvalidArgumentException(
'Delimiter must be a single character. ' . json_encode($delimiter) . ' received',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}

if (strlen($delimiter) === 0) {
throw new InvalidArgumentException(
'Delimiter cannot be empty.',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand Down
16 changes: 11 additions & 5 deletions src/CsvReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class CsvReader extends AbstractCsvFile implements Iterator
* @deprecated use Keboola\Csv\CsvOptions::DEFAULT_ENCLOSURE
*/
const DEFAULT_ESCAPED_BY = CsvOptions::DEFAULT_ESCAPED_BY;
const SAMPLE_SIZE = 10000;

/**
* @var int
Expand Down Expand Up @@ -92,7 +93,7 @@ protected function validateSkipLines($skipLines)
if (!is_int($skipLines) || $skipLines < 0) {
throw new InvalidArgumentException(
"Number of lines to skip must be a positive integer. \"$skipLines\" received.",
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -106,14 +107,14 @@ protected function openCsvFile($fileName)
if (!is_file($fileName)) {
throw new Exception(
'Cannot open file ' . $fileName,
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
$this->filePointer = @fopen($fileName, 'r');
if (!$this->filePointer) {
throw new Exception(
"Cannot open file {$fileName} " . error_get_last()['message'],
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
}
Expand All @@ -124,7 +125,12 @@ protected function openCsvFile($fileName)
protected function detectLineBreak()
{
@rewind($this->getFilePointer());
$sample = @fread($this->getFilePointer(), 10000);
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE);
if (substr((string) $sample, -1) === "\r") {
// we might have hit the file in the middle of CR+LF, only getting CR
@rewind($this->getFilePointer());
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE+1);
}

return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy());
}
Expand Down Expand Up @@ -155,7 +161,7 @@ protected function validateLineBreak()

throw new InvalidArgumentException(
"Invalid line break. Please use unix \\n or win \\r\\n line breaks.",
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}

Expand Down
13 changes: 7 additions & 6 deletions src/CsvWriter.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ private function validateLineBreak($lineBreak)
throw new Exception(
'Invalid line break: ' . json_encode($lineBreak) .
' allowed line breaks: ' . json_encode($allowedLineBreaks),
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -72,13 +72,14 @@ protected function openCsvFile($fileName)
throw new Exception(
"Cannot open file {$fileName} " . $e->getMessage(),
Exception::FILE_NOT_EXISTS,
$e
$e,
);
}

if (!$this->filePointer) {
throw new Exception(
"Cannot open file {$fileName} " . error_get_last()['message'],
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
}
Expand All @@ -99,7 +100,7 @@ public function writeRow(array $row)
' Return: false' .
' To write: ' . strlen($str) . ' Written: 0',
Exception::WRITE_ERROR,
$e
$e,
);
}

Expand All @@ -113,7 +114,7 @@ public function writeRow(array $row)
($ret === false && error_get_last() ? 'Error: ' . error_get_last()['message'] : '') .
' Return: ' . json_encode($ret) .
' To write: ' . strlen($str) . ' Written: ' . (int) $ret,
Exception::WRITE_ERROR
Exception::WRITE_ERROR,
);
}
}
Expand All @@ -137,7 +138,7 @@ public function rowToStr(array $row)
)) {
throw new Exception(
'Cannot write data into column: ' . var_export($column, true),
Exception::WRITE_ERROR
Exception::WRITE_ERROR,
);
}

Expand Down
23 changes: 15 additions & 8 deletions tests/CsvReadTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ public function testColumnsCount()
self::assertEquals(9, $csv->getColumnsCount());
}

public function testNewlineDetectionEdgecaseWithCrLf()
{
$this->expectNotToPerformAssertions();
// this used to throw "Invalid line break. Please use unix \n or win \r\n line breaks." before the fix
new CsvReader(__DIR__ . '/data/test-input-edgecase.crlf.csv');
}

/**
* @dataProvider validCsvFiles
* @param string $fileName
Expand Down Expand Up @@ -266,7 +273,7 @@ public function testSkipsHeaders()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
1
1,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -285,7 +292,7 @@ public function testSkipNoLines()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
0
0,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -305,7 +312,7 @@ public function testSkipsMultipleLines()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
3
3,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -322,7 +329,7 @@ public function testSkipsOverflow()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
100
100,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([], iterator_to_array($csvFile));
Expand Down Expand Up @@ -396,7 +403,7 @@ public function testInvalidSkipLines($skipLines, $message)
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ENCLOSURE,
$skipLines
$skipLines,
);
}

Expand Down Expand Up @@ -475,9 +482,9 @@ public function testWriteReadInTheMiddle()
'"1","first"',
'"2","second"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -525,7 +532,7 @@ public function testInvalidFile()
public function testPerformance($fileContent, $expectedRows, $maxDuration)
{
self::markTestSkipped(
'Run this test only manually. Because the duration is very different in local CI environment.'
'Run this test only manually. Because the duration is very different in local CI environment.',
);

try {
Expand Down
26 changes: 13 additions & 13 deletions tests/CsvWriteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ public function testWrite()
"\"columns with\nnew line\",\"columns with\ttab\"",
'"column with \\n \\t \\\\","second col"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -130,9 +130,9 @@ public function testWriteValidObject()
'"col1","col2"' ,
'"1","me string"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -191,9 +191,9 @@ public function testWritePointer()
[
'"col1","col2"' ,
'foo,bar',
]
],
),
$data
$data,
);
}

Expand All @@ -213,15 +213,15 @@ public function testInvalidPointer()
$or = new LogicalOr();
$or->setConstraints([
new StringContains(
'Cannot write to CSV file Return: 0 To write: 14 Written: 0'
'Cannot write to CSV file Return: 0 To write: 14 Written: 0',
),
new StringContains(
'Cannot write to CSV file Error: fwrite(): ' .
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
),
new StringContains(
'Cannot write to CSV file Error: fwrite(): ' .
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
),
]);
self::assertThat($e->getMessage(), $or);
Expand All @@ -238,7 +238,7 @@ public function testInvalidPointer2()
$rows = [['col1', 'col2']];
self::expectException(Exception::class);
self::expectExceptionMessage(
'a valid stream resource Return: false To write: 14 Written: '
'a valid stream resource Return: false To write: 14 Written: ',
);
$csvFile->writeRow($rows[0]);
}
Expand All @@ -258,7 +258,7 @@ public function testWriteLineBreak()
$fileName,
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
"\r\n"
"\r\n",
);
$rows = [
[
Expand All @@ -280,9 +280,9 @@ public function testWriteLineBreak()
'"col1","col2"',
'"val1","val2"',
'',
]
],
),
$data
$data,
);
}
}
2 changes: 1 addition & 1 deletion tests/LineBreaksHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public function testLineBreaksDetection($enclosure, $escapedBy, $input, $expecte
// Test line breaks detection
self::assertSame(
json_encode($expectedLineBreak),
json_encode(LineBreaksHelper::detectLineBreaks($input, $enclosure, $escapedBy))
json_encode(LineBreaksHelper::detectLineBreaks($input, $enclosure, $escapedBy)),
);
}

Expand Down
Loading

0 comments on commit 73ffd7e

Please sign in to comment.