Skip to content

Commit

Permalink
Handle <sub> and <sup> elements as neighbors. Fixes #134.
Browse files Browse the repository at this point in the history
  • Loading branch information
mundschenk-at committed Mar 24, 2024
1 parent 0156382 commit 71c246c
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 31 deletions.
44 changes: 25 additions & 19 deletions src/class-dom.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ abstract class DOM {
'math',
];

/**
* Elements that are acceptable as neighbor nodes.
*
* @since 7.0.0
*/
private const ACCEPTABLE_NEIGHBOR_ELEMENTS = [
'br' => true,
'sub' => true,
'sup' => true,
];

/**
* Retrieves an array of block tags.
*
Expand Down Expand Up @@ -235,12 +246,20 @@ public static function get_next_chr( \DOMNode $node ) {
* @return string The character or an empty string.
*/
private static function get_adjacent_character( \DOMNode $node, $position, $length, callable $get_textnode ) {
$adjacent_node = $get_textnode( [ __CLASS__, 'is_text_or_linebreak' ], $node );
$adjacent_node = $get_textnode( [ __CLASS__, 'is_acceptable_neighbor_node' ], $node );
$character = '';

if ( null !== $adjacent_node ) {
if ( self::is_linebreak( $adjacent_node ) ) {
$character = ' ';
if ( $adjacent_node instanceof \DOMElement ) {
switch ( $adjacent_node->tagName ) {
case 'br':
$character = ' ';
break;
case 'sub':
case 'sup':
default:
$character = '';
}
} elseif ( $adjacent_node instanceof \DOMText ) {
$node_data = $adjacent_node->data;
$character = \preg_replace( '/\p{C}/Su', '', Strings::functions( $node_data )['substr']( $node_data, $position, $length ) );
Expand All @@ -250,19 +269,6 @@ private static function get_adjacent_character( \DOMNode $node, $position, $leng
return $character;
}

/**
* Determines if the node is a <br> element.
*
* @since 7.0.0
*
* @param ?\DOMNode $node The node to test.
*
* @return bool
*/
private static function is_linebreak( ?\DOMNode $node ): bool {
return $node instanceof \DOMElement && ( $node->tagName ?? '' ) === 'br';
}

/**
* Determines if the node is a textnode.
*
Expand All @@ -277,16 +283,16 @@ private static function is_textnode( ?\DOMNode $node ): bool {
}

/**
* Determines if the node is a textnode or <br> element.
* Determines if the node is a textnode or one of the acceptable elements.
*
* @since 7.0.0
*
* @param ?\DOMNode $node The node to test.
*
* @return bool
*/
private static function is_text_or_linebreak( ?\DOMNode $node ): bool {
return $node instanceof \DOMText || ( $node instanceof \DOMElement && 'br' === $node->tagName );
private static function is_acceptable_neighbor_node( ?\DOMNode $node ): bool {
return $node instanceof \DOMText || ( $node instanceof \DOMElement && isset( self::ACCEPTABLE_NEIGHBOR_ELEMENTS[ $node->tagName ] ) );
}


Expand Down
40 changes: 32 additions & 8 deletions tests/class-dom-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,7 @@ public function test_get_block_parent_name( $input_xpath, $parent_xpath, $parent
* @covers ::get_previous_acceptable_node
* @covers ::get_adjacent_node
* @covers ::is_block_tag
* @covers ::is_linebreak
* @covers ::is_text_or_linebreak
* @covers ::is_acceptable_neighbor_node
*
* @uses ::get_last_acceptable_node
* @uses ::get_edge_node
Expand All @@ -330,8 +329,7 @@ public function test_get_prev_chr() {
* @covers ::get_adjacent_character
* @covers ::get_previous_acceptable_node
* @covers ::get_adjacent_node
* @covers ::is_linebreak
* @covers ::is_text_or_linebreak
* @covers ::is_acceptable_neighbor_node
*
* @uses ::is_block_tag
* @uses ::get_last_acceptable_node
Expand Down Expand Up @@ -374,8 +372,7 @@ public function test_get_previous_textnode_null() {
* @covers ::get_next_acceptable_node
* @covers ::get_adjacent_node
* @covers ::is_block_tag
* @covers ::is_linebreak
* @covers ::is_text_or_linebreak
* @covers ::is_acceptable_neighbor_node
*
* @uses ::get_first_acceptable_node
* @uses ::get_edge_node
Expand Down Expand Up @@ -403,8 +400,7 @@ public function test_get_next_chr() {
* @covers ::get_next_acceptable_node
* @covers ::get_adjacent_node
* @covers ::is_block_tag
* @covers ::is_linebreak
* @covers ::is_text_or_linebreak
* @covers ::is_acceptable_neighbor_node
*
* @uses ::get_first_acceptable_node
* @uses ::get_edge_node
Expand All @@ -424,6 +420,34 @@ public function test_get_next_chr_with_br() {
$this->assertSame( ' ', $prev_char );
}

/**
* Test get_next_chr followed by <br>.
*
* @covers ::get_next_chr
* @covers ::get_adjacent_character
* @covers ::get_next_acceptable_node
* @covers ::get_adjacent_node
* @covers ::is_block_tag
* @covers ::is_acceptable_neighbor_node
*
* @uses ::get_first_acceptable_node
* @uses ::get_edge_node
* @uses PHP_Typography\Strings::functions
*/
public function test_get_next_chr_with_sub_sup() {
$html = '<p><span id="foo">A</span><span id="bar"><sup>new</sup> hope.</span><sub>x</sub></p><p><span>The empire</span> strikes back.</p<';
$doc = $this->load_html( $html );
$xpath = new \DOMXPath( $doc );

$textnodes = $xpath->query( "//*[@id='foo']/text()" ); // really only one.
$prev_char = DOM::get_next_chr( $textnodes->item( 0 ) );
$this->assertSame( '', $prev_char );

$textnodes = $xpath->query( "//*[@id='bar']/text()" ); // really only one.
$prev_char = DOM::get_next_chr( $textnodes->item( 0 ) );
$this->assertSame( '', $prev_char );
}

/**
* Test get_next_textnode.
*
Expand Down
1 change: 1 addition & 0 deletions tests/class-php-typography-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ public function provide_process_data() {
[ 'ein-, zweimal', 'ein&#8209;, zweimal', true ],
[ 'В зависимости от региона, может выращиватся на зерно и силос. После колосовых может выращиватся на второй посев.', 'В зависимости от региона, может выращиватся на зерно и&nbsp;силос. После колосовых может выращиватся на второй посев.', false ],
[ '"Text."<br>Text after.', '<span class="pull-double">&ldquo;</span>Text.&rdquo;<br>Text after.', '&ldquo;Text.&rdquo;<br>Text after.' ],
[ 'à "l’âge"<sup>N112</sup>', '&agrave; <span class="push-double"></span>&#8203;<span class="pull-double">&ldquo;</span>l&rsquo;&acirc;ge&rdquo;<sup><span class="caps">N<span class="numbers">112</span></span></sup>', '&agrave; &ldquo;l&rsquo;&acirc;ge&rdquo;<sup>N112</sup>' ],
];
}

Expand Down
12 changes: 8 additions & 4 deletions tests/fixes/node-fixes/class-smart-quotes-fix-test.php
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,12 @@ public function provide_smart_quotes_special_data() {
'"',
],
[
'à "l’âge"<br>N112',
'&agrave; &laquo;&#8239;l&rsquo;&acirc;ge&#8239;&raquo;<br>N112',
'à "l’âge"',
'&agrave; &laquo;&#8239;l&rsquo;&acirc;ge&#8239;&raquo;',
Quote_Style::DOUBLE_GUILLEMETS_FRENCH,
Quote_Style::SINGLE_GUILLEMETS,
null,
new \DOMElement( 'br' ),
],
[
'à "l’âge" N112',
Expand All @@ -131,10 +133,12 @@ public function provide_smart_quotes_special_data() {
Quote_Style::SINGLE_GUILLEMETS,
],
[
'à "l’âge"<sup>N112</sup>',
'&agrave; &laquo;&#8239;l&rsquo;&acirc;ge&#8239;&raquo;<sup>N112</sup>',
'à "l’âge"',
'&agrave; &laquo;&#8239;l&rsquo;&acirc;ge&#8239;&raquo;',
Quote_Style::DOUBLE_GUILLEMETS_FRENCH,
Quote_Style::SINGLE_GUILLEMETS,
null,
new \DOMElement( 'sup', 'N112' ),
],
];
}
Expand Down

0 comments on commit 71c246c

Please sign in to comment.