Skip to content

Commit 0c5e8f2

Browse files
Fixing #62625
1 parent 8f3e746 commit 0c5e8f2

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

Crawler.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,11 @@ private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DO
11101110

11111111
$internalErrors = libxml_use_internal_errors(true);
11121112

1113-
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
1113+
try {
1114+
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
1115+
} catch (\ValueError) {
1116+
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS);
1117+
}
11141118

11151119
libxml_use_internal_errors($internalErrors);
11161120

Tests/CrawlerTest.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ public function testAddXmlContentWithErrors()
13351335
#[Group('legacy')]
13361336
public function testHtml5ParserNotSameAsNativeParserForSpecificHtml()
13371337
{
1338-
// Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
1338+
// HTML that creates a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
13391339
$html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html>';
13401340

13411341
$html5Crawler = new Crawler(null, null, null, true);
@@ -1396,6 +1396,14 @@ public function testAlpineJs()
13961396
$this->assertCount(3, $crawler->filterXPath('//div'));
13971397
}
13981398

1399+
public function testInvalidCharset()
1400+
{
1401+
$email = "Content-Type: text/html; charset=foobar\n\n<!DOCTYPE html><html><body>One Two Three</body></html>";
1402+
$crawler = $this->createCrawler($email);
1403+
// Not really needed anymore, since the test would already have crashed with: ValueError: Dom\HTMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
1404+
$this->assertSame('Content-Type: text/html; charset=foobar One Two Three', $crawler->text());
1405+
}
1406+
13991407
protected function createTestCrawler($uri = null)
14001408
{
14011409
$html = $this->getDoctype().'

0 commit comments

Comments
 (0)