Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CM-873 May detect linebreak incorrectly in edge case #57

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.clrf.csv eol=crlf
6 changes: 3 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
},
"require-dev": {
"ext-json": "*",
"keboola/coding-standard": "^13.0",
"keboola/coding-standard": "^15.0",
"php-parallel-lint/php-parallel-lint": "^1.3",
"phpstan/phpstan": "^1.4",
"phpstan/phpstan": "^1.10",
"phpunit/phpunit": ">=7.5 <=9.6",
"phpstan/phpdoc-parser": "1.5.*"
"phpstan/phpdoc-parser": "^1.25"
},
"scripts": {
"phpstan": "phpstan analyse ./src ./tests --level=max --no-progress -c phpstan.neon",
Expand Down
5 changes: 4 additions & 1 deletion phpcs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@
<rule ref="SlevomatCodingStandard.TypeHints.DeclareStrictTypes">
<exclude name="SlevomatCodingStandard.TypeHints.DeclareStrictTypes.DeclareStrictTypesMissing"/>
</rule>
</ruleset>
<rule ref="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration">
<exclude name="SlevomatCodingStandard.Functions.RequireTrailingCommaInDeclaration.MissingTrailingComma"/>
</rule>
</ruleset>
7 changes: 6 additions & 1 deletion phpstan-baseline-8+.neon
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ parameters:
count: 1
path: tests/CsvReadTest.php

-
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nebylo jednodušší tam ten void dopsat?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to bych teď neřešil, protože ten return type není nikde a byl bych za to udělat v novým PR (někdy)

message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
count: 1
Expand Down Expand Up @@ -493,4 +498,4 @@ parameters:
-
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
count: 1
path: tests/UTF8BOMHelperTest.php
path: tests/UTF8BOMHelperTest.php
7 changes: 6 additions & 1 deletion phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,11 @@ parameters:
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testNewlineDetectionEdgecaseWithCrLf\\(\\) has no return type specified\\.$#"
count: 1
path: tests/CsvReadTest.php

-
message: "#^Method Keboola\\\\Csv\\\\Tests\\\\CsvReadTest\\:\\:testParse\\(\\) has no return type specified\\.$#"
count: 1
Expand Down Expand Up @@ -498,4 +503,4 @@ parameters:
-
message: "#^Parameter \\#1 \\$header of static method Keboola\\\\Csv\\\\UTF8BOMHelper\\:\\:detectAndRemoveBOM\\(\\) expects array, mixed given\\.$#"
count: 1
path: tests/UTF8BOMHelperTest.php
path: tests/UTF8BOMHelperTest.php
6 changes: 3 additions & 3 deletions src/CsvOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ protected function validateEnclosure($enclosure)
if (strlen($enclosure) > 1) {
throw new InvalidArgumentException(
'Enclosure must be a single character. ' . json_encode($enclosure) . ' received',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -64,14 +64,14 @@ protected function validateDelimiter($delimiter)
if (strlen($delimiter) > 1) {
throw new InvalidArgumentException(
'Delimiter must be a single character. ' . json_encode($delimiter) . ' received',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}

if (strlen($delimiter) === 0) {
throw new InvalidArgumentException(
'Delimiter cannot be empty.',
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand Down
16 changes: 11 additions & 5 deletions src/CsvReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class CsvReader extends AbstractCsvFile implements Iterator
* @deprecated use Keboola\Csv\CsvOptions::DEFAULT_ENCLOSURE
*/
const DEFAULT_ESCAPED_BY = CsvOptions::DEFAULT_ESCAPED_BY;
const SAMPLE_SIZE = 10000;

/**
* @var int
Expand Down Expand Up @@ -92,7 +93,7 @@ protected function validateSkipLines($skipLines)
if (!is_int($skipLines) || $skipLines < 0) {
throw new InvalidArgumentException(
"Number of lines to skip must be a positive integer. \"$skipLines\" received.",
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -106,14 +107,14 @@ protected function openCsvFile($fileName)
if (!is_file($fileName)) {
throw new Exception(
'Cannot open file ' . $fileName,
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
$this->filePointer = @fopen($fileName, 'r');
if (!$this->filePointer) {
throw new Exception(
"Cannot open file {$fileName} " . error_get_last()['message'],
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
}
Expand All @@ -124,7 +125,12 @@ protected function openCsvFile($fileName)
protected function detectLineBreak()
{
@rewind($this->getFilePointer());
$sample = @fread($this->getFilePointer(), 10000);
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE);
if (substr((string) $sample, -1) === "\r") {
// we might have hit the file in the middle of CR+LF, only getting CR
@rewind($this->getFilePointer());
$sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE+1);
}

return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy());
}
Expand Down Expand Up @@ -155,7 +161,7 @@ protected function validateLineBreak()

throw new InvalidArgumentException(
"Invalid line break. Please use unix \\n or win \\r\\n line breaks.",
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}

Expand Down
13 changes: 7 additions & 6 deletions src/CsvWriter.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ private function validateLineBreak($lineBreak)
throw new Exception(
'Invalid line break: ' . json_encode($lineBreak) .
' allowed line breaks: ' . json_encode($allowedLineBreaks),
Exception::INVALID_PARAM
Exception::INVALID_PARAM,
);
}
}
Expand All @@ -72,13 +72,14 @@ protected function openCsvFile($fileName)
throw new Exception(
"Cannot open file {$fileName} " . $e->getMessage(),
Exception::FILE_NOT_EXISTS,
$e
$e,
);
}

if (!$this->filePointer) {
throw new Exception(
"Cannot open file {$fileName} " . error_get_last()['message'],
Exception::FILE_NOT_EXISTS
Exception::FILE_NOT_EXISTS,
);
}
}
Expand All @@ -99,7 +100,7 @@ public function writeRow(array $row)
' Return: false' .
' To write: ' . strlen($str) . ' Written: 0',
Exception::WRITE_ERROR,
$e
$e,
);
}

Expand All @@ -113,7 +114,7 @@ public function writeRow(array $row)
($ret === false && error_get_last() ? 'Error: ' . error_get_last()['message'] : '') .
' Return: ' . json_encode($ret) .
' To write: ' . strlen($str) . ' Written: ' . (int) $ret,
Exception::WRITE_ERROR
Exception::WRITE_ERROR,
);
}
}
Expand All @@ -137,7 +138,7 @@ public function rowToStr(array $row)
)) {
throw new Exception(
'Cannot write data into column: ' . var_export($column, true),
Exception::WRITE_ERROR
Exception::WRITE_ERROR,
);
}

Expand Down
23 changes: 15 additions & 8 deletions tests/CsvReadTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ public function testColumnsCount()
self::assertEquals(9, $csv->getColumnsCount());
}

public function testNewlineDetectionEdgecaseWithCrLf()
{
$this->expectNotToPerformAssertions();
// this used to throw "Invalid line break. Please use unix \n or win \r\n line breaks." before the fix
new CsvReader(__DIR__ . '/data/test-input-edgecase.crlf.csv');
}

/**
* @dataProvider validCsvFiles
* @param string $fileName
Expand Down Expand Up @@ -266,7 +273,7 @@ public function testSkipsHeaders()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
1
1,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -285,7 +292,7 @@ public function testSkipNoLines()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
0
0,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -305,7 +312,7 @@ public function testSkipsMultipleLines()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
3
3,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([
Expand All @@ -322,7 +329,7 @@ public function testSkipsOverflow()
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ESCAPED_BY,
100
100,
);
self::assertEquals(['id', 'isImported'], $csvFile->getHeader());
self::assertEquals([], iterator_to_array($csvFile));
Expand Down Expand Up @@ -396,7 +403,7 @@ public function testInvalidSkipLines($skipLines, $message)
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
CsvOptions::DEFAULT_ENCLOSURE,
$skipLines
$skipLines,
);
}

Expand Down Expand Up @@ -475,9 +482,9 @@ public function testWriteReadInTheMiddle()
'"1","first"',
'"2","second"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -525,7 +532,7 @@ public function testInvalidFile()
public function testPerformance($fileContent, $expectedRows, $maxDuration)
{
self::markTestSkipped(
'Run this test only manually. Because the duration is very different in local CI environment.'
'Run this test only manually. Because the duration is very different in local CI environment.',
);

try {
Expand Down
26 changes: 13 additions & 13 deletions tests/CsvWriteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ public function testWrite()
"\"columns with\nnew line\",\"columns with\ttab\"",
'"column with \\n \\t \\\\","second col"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -130,9 +130,9 @@ public function testWriteValidObject()
'"col1","col2"' ,
'"1","me string"',
'',
]
],
),
$data
$data,
);
}

Expand Down Expand Up @@ -191,9 +191,9 @@ public function testWritePointer()
[
'"col1","col2"' ,
'foo,bar',
]
],
),
$data
$data,
);
}

Expand All @@ -213,15 +213,15 @@ public function testInvalidPointer()
$or = new LogicalOr();
$or->setConstraints([
new StringContains(
'Cannot write to CSV file Return: 0 To write: 14 Written: 0'
'Cannot write to CSV file Return: 0 To write: 14 Written: 0',
),
new StringContains(
'Cannot write to CSV file Error: fwrite(): ' .
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
),
new StringContains(
'Cannot write to CSV file Error: fwrite(): ' .
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0'
'Write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0',
),
]);
self::assertThat($e->getMessage(), $or);
Expand All @@ -238,7 +238,7 @@ public function testInvalidPointer2()
$rows = [['col1', 'col2']];
self::expectException(Exception::class);
self::expectExceptionMessage(
'a valid stream resource Return: false To write: 14 Written: '
'a valid stream resource Return: false To write: 14 Written: ',
);
$csvFile->writeRow($rows[0]);
}
Expand All @@ -258,7 +258,7 @@ public function testWriteLineBreak()
$fileName,
CsvOptions::DEFAULT_DELIMITER,
CsvOptions::DEFAULT_ENCLOSURE,
"\r\n"
"\r\n",
);
$rows = [
[
Expand All @@ -280,9 +280,9 @@ public function testWriteLineBreak()
'"col1","col2"',
'"val1","val2"',
'',
]
],
),
$data
$data,
);
}
}
2 changes: 1 addition & 1 deletion tests/LineBreaksHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public function testLineBreaksDetection($enclosure, $escapedBy, $input, $expecte
// Test line breaks detection
self::assertSame(
json_encode($expectedLineBreak),
json_encode(LineBreaksHelper::detectLineBreaks($input, $enclosure, $escapedBy))
json_encode(LineBreaksHelper::detectLineBreaks($input, $enclosure, $escapedBy)),
);
}

Expand Down
Loading