diff --git a/doc/Usage.md b/doc/Usage.md index 98d26945..cd559325 100644 --- a/doc/Usage.md +++ b/doc/Usage.md @@ -153,3 +153,17 @@ $pdf = $parser->parseContent(base64_decode($base64PDF)); $text = $pdf->getText(); echo $text; ``` + +## Calculate text width + +Try to calculate text width for given font. +Characters without width are added to `$missing` array in second parameter. + +```php +$parser = new \Smalot\PdfParser\Parser(); +$pdf = $parser->parseFile('document.pdf'); +// get first font (we assume here there is at least one) +$font = reset($pdf->getFonts()); +// get width +$width = $font->calculateTextWidth('Some text', $missing); +``` diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index 2994e185..66c35438 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -267,6 +267,38 @@ public function setTable(array $table) $this->table = $table; } + /** + * Calculate text width with data from header 'Widths'. If width of character is not found then character is added to missing array. + */ + public function calculateTextWidth(string $text, array &$missing = null): ?float + { + $index_map = array_flip($this->table); + $details = $this->getDetails(); + $widths = $details['Widths']; + + // Widths array is zero indexed but table is not. We must map them based on FirstChar and LastChar + $width_map = array_flip(range($details['FirstChar'], $details['LastChar'])); + + $width = null; + $missing = []; + $textLength = mb_strlen($text); + for ($i = 0; $i < $textLength; ++$i) { + $char = mb_substr($text, $i, 1); + if ( + !\array_key_exists($char, $index_map) + || !\array_key_exists($index_map[$char], $width_map) + || !\array_key_exists($width_map[$index_map[$char]], $widths) + ) { + $missing[] = $char; + continue; + } + $width_index = $width_map[$index_map[$char]]; + $width += $widths[$width_index]; + } + + return $width; + } + /** * Decode hexadecimal encoded string. If $add_braces is true result value would be wrapped by parentheses. */ diff --git a/tests/Integration/FontTest.php b/tests/Integration/FontTest.php index 7b98a4c0..eeebe1da 100644 --- a/tests/Integration/FontTest.php +++ b/tests/Integration/FontTest.php @@ -410,4 +410,25 @@ public function testEncodingWithoutBaseEncoding(): void $font->setTable([]); $this->assertEquals('?', $font->translateChar('a')); } + + public function testCalculateTextWidth(): void + { + $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf'; + $parser = $this->getParserInstance(); + $document = $parser->parseFile($filename); + $fonts = $document->getFonts(); + + $font = $fonts['7_0']; + $widths = $font->getDetails()['Widths']; + $this->assertEquals($widths[0], $font->calculateTextWidth('D')); + $this->assertEquals($widths[10], $font->calculateTextWidth('l')); + + $width = $font->calculateTextWidth('Calibri', $missing); + $this->assertEquals(936, $width); + $this->assertEquals(['C', 'a', 'b', 'r'], $missing); + + $width = $fonts['9_0']->calculateTextWidth('Calibri', $missing); + $this->assertEquals(2573, $width); + $this->assertEquals([], $missing); + } }