Skip to content

Commit

Permalink
Add support for Reversed Chars instruction in BMC blocs (#402)
Browse files Browse the repository at this point in the history
  • Loading branch information
smalot authored Apr 13, 2021
1 parent 3205823 commit b8784eb
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 2 deletions.
Binary file added samples/bugs/Issue398.pdf
Binary file not shown.
21 changes: 19 additions & 2 deletions src/Smalot/PdfParser/PDFObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ private function getDefaultFont(Page $page = null)
*/
public function getText(Page $page = null)
{
$text = '';
$result = '';
$sections = $this->getSectionsText($this->content);
$current_font = $this->getDefaultFont($page);
$clipped_font = $current_font;
Expand All @@ -283,9 +283,17 @@ public function getText(Page $page = null)

foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
$reverse_text = false;
$text = '';

foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
case 'BMC':
if ('ReversedChars' == $command[self::COMMAND]) {
$reverse_text = true;
}
break;

// set character spacing
case 'Tc':
break;
Expand Down Expand Up @@ -453,11 +461,20 @@ public function getText(Page $page = null)
default:
}
}

// Fix Hebrew and other reverse text oriented languages.
// @see: https://github.com/smalot/pdfparser/issues/398
if ($reverse_text) {
$chars = mb_str_split($text, 1, mb_internal_encoding());
$text = implode('', array_reverse($chars));
}

$result .= $text;
}

array_pop(self::$recursionStack);

return $text.' ';
return $result.' ';
}

/**
Expand Down
16 changes: 16 additions & 0 deletions tests/Integration/PDFObjectTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -231,4 +231,20 @@ public function testGetSectionText()
$sections
);
}

/**
* Tests behavior with reversed chars instruction.
*
* @see: https://github.com/smalot/pdfparser/issues/398
*/
public function testReversedChars()
{
$filename = $this->rootDir.'/samples/bugs/Issue398.pdf';

$parser = $this->getParserInstance();
$document = $parser->parseFile($filename);
$pages = $document->getPages();

$this->assertStringContainsString('שלומי טסט', $pages[0]->getText());
}
}

0 comments on commit b8784eb

Please sign in to comment.