Skip to content

Commit

Permalink
Implement undefined method in Encoding class (#378)
Browse files Browse the repository at this point in the history
* Implement undefined method in Encoding class. The __toString method was missing/not implemented, even though it is called in some cases. Fixes #364

* Add PHPDoc and fix type error

* Fix style issues

* added test in FontTest to proof fix is working; coding style nice up in Font.php

* added EncodingTest with 2 tests for new method "getEncodingClass"

Co-authored-by: Konrad Abicht <hi@inspirito.de>
  • Loading branch information
tomlutzenberger and k00ni authored Dec 25, 2020
1 parent 5d37464 commit a4bb6d2
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 9 deletions.
38 changes: 30 additions & 8 deletions src/Smalot/PdfParser/Encoding.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

namespace Smalot\PdfParser;

use Exception;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Encoding\PostScriptGlyphs;

Expand Down Expand Up @@ -60,14 +61,7 @@ public function init()
$this->encoding = [];

if ($this->has('BaseEncoding')) {
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;

if (!class_exists($className)) {
throw new \Exception('Missing encoding data for: "'.$baseEncoding.'".');
}

$className = $this->getEncodingClass();
$class = new $className();
$this->encoding = $class->getTranslations();

Expand Down Expand Up @@ -130,4 +124,32 @@ public function translateChar($dec)

return PostScriptGlyphs::getCodePoint($dec);
}

/**
* @return string
*
* @throws \Exception
*/
public function __toString()
{
return $this->getEncodingClass();
}

/**
* @return string
*
* @throws \Exception
*/
protected function getEncodingClass()
{
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;

if (!class_exists($className)) {
throw new Exception('Missing encoding data for: "'.$baseEncoding.'".');
}

return $className;
}
}
8 changes: 7 additions & 1 deletion src/Smalot/PdfParser/Font.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@

namespace Smalot\PdfParser;

use Smalot\PdfParser\Encoding\WinAnsiEncoding;

/**
* Class Font
*/
Expand Down Expand Up @@ -101,7 +103,11 @@ public function translateChar($char, $use_default = true)

// fallback for decoding single-byte ANSI characters that are not in the lookup table
$fallbackDecoded = $char;
if (\strlen($char) < 2 && $this->has('Encoding') && 'WinAnsiEncoding' === $this->get('Encoding')->__toString()) {
if (
\strlen($char) < 2
&& $this->has('Encoding')
&& WinAnsiEncoding::class === $this->get('Encoding')->__toString()
) {
$fallbackDecoded = self::uchr($dec);
}

Expand Down
70 changes: 70 additions & 0 deletions tests/Integration/EncodingTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php

/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2020-06-01
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/

namespace Tests\Smalot\PdfParser\Integration;

use Exception;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Encoding;
use Smalot\PdfParser\Encoding\StandardEncoding;
use Smalot\PdfParser\Header;
use Tests\Smalot\PdfParser\TestCase;

class EncodingTest extends TestCase
{
public function testGetEncodingClass()
{
$header = new Header(['BaseEncoding' => new Element('StandardEncoding')]);

$encoding = new Encoding(new Document(), $header);
$encoding->init();

$this->assertEquals('\\'.StandardEncoding::class, $encoding->__toString());
}

/**
* This tests checks behavior if given Encoding class doesn't exist.
*/
public function testGetEncodingClassMissingClassException()
{
$this->expectException(Exception::class);
$this->expectExceptionMessage('Missing encoding data for: "invalid"');

$header = new Header(['BaseEncoding' => new Element('invalid')]);

$encoding = new Encoding(new Document(), $header);
$encoding->init();

$encoding->__toString();
}
}
30 changes: 30 additions & 0 deletions tests/Integration/FontTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
namespace Tests\Smalot\PdfParser\Integration;

use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Encoding;
use Smalot\PdfParser\Font;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\PDFObject;
Expand Down Expand Up @@ -124,6 +126,34 @@ public function testTranslateChar()
$this->assertEquals(Font::MISSING, $font->translateChar("\x99"));
}

/**
* Tests buggy behavior of #364.
*
* In some cases Front::translateChar calls Encoding::__toString, which doesn't exist.
*
* Resulting error: Call to undefined method Smalot\PdfParser\Encoding::__toString()
*
* @see https://github.com/smalot/pdfparser/issues/364
*/
public function testTranslateCharIssue364()
{
/*
* Approach: we provoke the __toString call with a minimal set of input data.
*/
$doc = new Document();

$header = new Header(['BaseEncoding' => new Element('StandardEncoding')]);

$encoding = new Encoding($doc, $header);
$encoding->init();

$font = new Font($doc, new Header(['Encoding' => $encoding]));
$font->init();

// without the fix from #378, calling translateChar would raise "undefined method" error
$this->assertEquals('?', $font->translateChar('t'));
}

public function testLoadTranslateTable()
{
$document = new Document();
Expand Down

0 comments on commit a4bb6d2

Please sign in to comment.