diff --git a/CHANGELOG.md b/CHANGELOG.md index 255493f..aa94043 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,7 +83,7 @@ * _Bugfix_: Smart diacritics won't try to "correct" the spelling of `Uber` anymore. ## 6.2.1 - August 27, 2018 -* _Bugfix_: French punctuation is now correctly applied to quotes preceeded or +* _Bugfix_: French punctuation is now correctly applied to quotes preceded or followed by round and square brackets. ## 6.2.0 - August 26, 2018 @@ -93,7 +93,7 @@ - Bulgarian - German - German (Traditional) - - German (Swiss Tradtional) + - German (Swiss Traditional) - Latin (Liturgical) - Thai * _Bugfix_: Smart quotes replacement could result in invalid unicode sequences in rare cases. diff --git a/ROADMAP.md b/ROADMAP.md index 9ef4abb..e74a74f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -7,6 +7,6 @@ PHP-Typography follows [Semantic Versioning 2.0.0](https://semver.org/spec/v2.0. The current stable release at the time of this writing is 6.2.0. The API has been mostly stable for a while, however some parts of it need a bit of polishing (documentation, parameter order etc.). Except for the `Settings` class, which so far remains monolithic, everything has been broken up in to self-contained smaller classes. -Partioning the `Settings` class to be more modular is conceptually difficult, but has to happen sooner or later. When it's done, there will be a 7.0.0 release. All development towards that end happens in the `7.0-dev` branch. +Partitioning the `Settings` class to be more modular is conceptually difficult, but has to happen sooner or later. When it's done, there will be a 7.0.0 release. All development towards that end happens in the `7.0-dev` branch. **Last updated:** 2018-08-26 diff --git a/src/class-dom.php b/src/class-dom.php index c5a9a9c..3513d03 100644 --- a/src/class-dom.php +++ b/src/class-dom.php @@ -180,7 +180,7 @@ public static function nodelist_to_array( DOMNodeList $node_list ): array { /** * Retrieves an array containing all the ancestors of the node. This could be done - * via an XPath query for "ancestor::*", but DOM walking is in all likelyhood faster. + * via an XPath query for "ancestor::*", but DOM walking is in all likelihood faster. * * @param DOMNode $node Required. * @@ -323,7 +323,7 @@ function ( callable $is_node_acceptable, DOMNode &$another_node ): ?DOMNode { } /** - * Retrieves the next accceptable sibling (if there is one). + * Retrieves the next acceptable sibling (if there is one). * * @param callable $is_acceptable Returns true if the \DOMnode is acceptable. * @param ?DOMNode $node Optional. The content node. Default null. diff --git a/src/class-php-typography.php b/src/class-php-typography.php index c29402c..a69b5d1 100644 --- a/src/class-php-typography.php +++ b/src/class-php-typography.php @@ -225,7 +225,7 @@ private function process_textnodes_internal( \DOMDocument $dom, \DOMNode $body_n */ $new = $textnode->data; - // Replace original node (if anthing was changed). + // Replace original node (if anything was changed). if ( $new !== $original ) { $this->replace_node_with_html( $textnode, $settings->apply_character_mapping( $new ) ); } @@ -258,7 +258,7 @@ protected static function arrays_intersect( array $array1, array $array2 ) { * * @since 6.0.0 Parameter $body_classes added. * - * @param HTML5 $parser An intialized parser object. + * @param HTML5 $parser An initialized parser object. * @param string $html The HTML fragment to parse (not a complete document). * @param Settings $settings The settings to apply. * @param string[] $body_classes Optional. CSS classes added to the virtual
diff --git a/src/class-re.php b/src/class-re.php index 6c4bd61..c7b4ebc 100644 --- a/src/class-re.php +++ b/src/class-re.php @@ -131,7 +131,7 @@ private static function get_top_level_domains_from_file( $path ) { if ( ! empty( $domains ) ) { return \implode( '|', $domains ); } else { - return 'ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|com|coop|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw'; + return 'ac|ad|aero|ae|af|ag|ai|al|am|an|ao|aq|arpa|ar|asia|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|biz|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|cat|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|com|coop|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|edu|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gov|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|info|int|in|io|iq|ir|is|it|je|jm|jobs|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mil|mk|ml|mm|mn|mobi|mo|mp|mq|mr|ms|mt|museum|mu|mv|mw|mx|my|mz|name|na|nc|net|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|org|pa|pe|pf|pg|ph|pk|pl|pm|pn|pro|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tel|tf|tg|th|tj|tk|tl|tm|tn|to|tp|travel|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw'; // @spellchecker:disable-line } } diff --git a/src/class-settings.php b/src/class-settings.php index 4e7e848..1758e32 100644 --- a/src/class-settings.php +++ b/src/class-settings.php @@ -1046,7 +1046,7 @@ public function set_style_numbers( $on = true ): void { } /** - * Enables/disables wrapping of punctiation and wide characters in . + * Enables/disables wrapping of punctuation and wide characters in . * * @param bool $on Optional. Default true. */ @@ -1176,7 +1176,7 @@ public function set_hyphenation_exceptions( $exceptions = [] ): void { * @since 5.2.0 The new parameter $raw_output has been added. * * @param int $max_length Optional. The maximum number of bytes returned (0 for unlimited). Default 16. - * @param bool $raw_output Optional. Wether to return raw binary data for the hash. Default true. + * @param bool $raw_output Optional. Whether to return raw binary data for the hash. Default true. * * @return string A binary hash value for the current settings limited to $max_length. */ diff --git a/src/class-text-parser.php b/src/class-text-parser.php index 3b0b01b..78eb028 100644 --- a/src/class-text-parser.php +++ b/src/class-text-parser.php @@ -78,7 +78,7 @@ class Text_Parser { ; ) | - (?: # hexidecimal matches + (?: # hexadecimal matches &\#x (?: 000[9ad]|0020|00a0|1361|1680|200[0-9a]|202f|205f|3000 ) ; @@ -94,7 +94,7 @@ class Text_Parser { private const SPACE = '(?:\s|' . self::HTML_SPACING . ')+'; // required modifiers: x (multiline pattern) i (case insensitive) $utf8. /** - * Find punctuation and symbols before words (to capture preceeding delimiating characters like hyphens or underscores) + * Find punctuation and symbols before words (to capture preceding delimiting characters like hyphens or underscores) * * @see http://www.unicode.org/charts/PDF/U2000.pdf * @@ -112,7 +112,7 @@ class Text_Parser { (?: (?: # alpha matches & - (?:quot|amp|frasl|lt|gt|iexcl|cent|pound|curren|yen|brvbar|sect|uml|pound|ordf|laquo|not|reg|macr|deg|plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|times|divide|circ|tilde|thetasym|upsih|piv|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|bull|hellip|permil|prime|Prime|lsaquo|rsaquo|oline|frasl|euro|trade|alefsym|larr|uarr|rarr|darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|emptyn|abla|isin|notin|ni|prod|sum|minus|lowast|radic|prop|infin|ang|and|orc|ap|cup|int|there4|simc|ong|asymp|ne|equiv|le|ge|sub|supn|sub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|loz|spades|clubs|hearts|diams) + (?:quot|amp|frasl|lt|gt|iexcl|cent|pound|curren|yen|brvbar|sect|uml|pound|ordf|laquo|not|reg|macr|deg|plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|times|divide|circ|tilde|thetasym|upsih|piv|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|bull|hellip|permil|prime|Prime|lsaquo|rsaquo|oline|frasl|euro|trade|alefsym|larr|uarr|rarr|darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|emptyn|abla|isin|notin|ni|prod|sum|minus|lowast|radic|prop|infin|ang|and|orc|ap|cup|int|there4|simc|ong|asymp|ne|equiv|le|ge|sub|supn|sub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|loz|spades|clubs|hearts|diams) # spellchecker:disable-line ; ) | @@ -122,7 +122,7 @@ class Text_Parser { ; ) | - (?: # hexidecimal matches + (?: # hexadecimal matches &\#x (?: 002[1-9a-cef]|003[a-cef]|0040|005[b-e]|0060|007[b-e]|00a[1-9a-cef]|00b[0-9a-f]|00d7|00f7|02c6|02dc|03d[126]|201[3-9a-f]|202[0-7]|20[34][0-9a-f]|205[0-9a-e]|206[1-4]|20[a-c][0-9a-f]|21[0-4][0-9a-f]|219[0-9a-f]|2[23][0-9a-f][0-9a-f]|25[a-f][0-9a-f]|23[0-9a-f][0-9a-f]|2e[0-7][0-9a-f] ) ; @@ -165,7 +165,7 @@ class Text_Parser { ; ) | - (?: # hexidecimal matches + (?: # hexadecimal matches &\#x (?: 002d|005f|00ad|200[b-d]|201[0-2] ) ; @@ -197,7 +197,7 @@ class Text_Parser { ; ) | - (?: # hexidecimal matches + (?: # hexadecimal matches (?: &\#x00 (?: 3[1-9]|4[1-9a-f]|5[0-9a]|6[1-9a-f]|7[0-9a]|c[0-9a-f]|d[0-689]|e[0-9a-f]|f[0-689a-f] ) @@ -302,11 +302,11 @@ protected static function tokenize( array $parts ) { $tokens[ $index ] = new Token( $part, Token::PUNCTUATION ); } elseif ( \preg_match( self::RE_WORD, $part ) ) { // Make sure that things like email addresses and URLs are not broken up - // into words and punctuation not preceeded by an 'other'. + // into words and punctuation not preceded by an 'other'. self::parse_ambiguous_token( Token::WORD, $part, $tokens, $index ); } else { // Make sure that things like email addresses and URLs are not broken up into words - // and punctuation not preceeded by an 'other' or 'word'. + // and punctuation not preceded by an 'other' or 'word'. self::parse_ambiguous_token( Token::OTHER, $part, $tokens, $index ); } @@ -317,7 +317,7 @@ protected static function tokenize( array $parts ) { } /** - * Parse ambigious tokens (that may need to be combined with the predecessors). + * Parse ambiguous tokens (that may need to be combined with the predecessors). * * @param Token::WORD|Token::OTHER $expected_type The expected token type. * @param string $part The string fragment to parse. @@ -327,12 +327,12 @@ protected static function tokenize( array $parts ) { protected static function parse_ambiguous_token( $expected_type, $part, array &$tokens, &$index ): void { // Make sure that things like email addresses and URLs are not broken up incorrectly. - if ( self::is_preceeded_by( Token::OTHER, $tokens, $index ) || ( Token::OTHER === $expected_type && self::is_preceeded_by( Token::WORD, $tokens, $index ) ) ) { + if ( self::is_preceded_by( Token::OTHER, $tokens, $index ) || ( Token::OTHER === $expected_type && self::is_preceded_by( Token::WORD, $tokens, $index ) ) ) { $old_part = $tokens[ --$index ]->value; $tokens[ $index ] = new Token( $old_part . $part, Token::OTHER ); - } elseif ( self::is_preceeded_by( Token::PUNCTUATION, $tokens, $index ) && self::is_not_preceeded_by( Token::SPACE, $tokens, $index, 2 ) ) { - // Not preceeded by a non-space + punctuation. + } elseif ( self::is_preceded_by( Token::PUNCTUATION, $tokens, $index ) && self::is_not_preceded_by( Token::SPACE, $tokens, $index, 2 ) ) { + // Not preceded by a non-space + punctuation. $old_part = $tokens[ $index - 1 ]->value; $older_part = $tokens[ $index - 2 ]->value; $tokens[ $index - 2 ] = new Token( $older_part . $old_part . $part, Token::OTHER ); @@ -348,6 +348,8 @@ protected static function parse_ambiguous_token( $expected_type, $part, array &$ /** * Checks if the predecessor of the current token is of a certain type. * + * @since 7.0.0 Renamed to `is_preceded_by`. + * * @param Token::* $type A valid token type (e.g. Token::WORD). * @param Token[] $tokens An array of tokens. * @param int $index The current token index. @@ -355,13 +357,15 @@ protected static function parse_ambiguous_token( $expected_type, $part, array &$ * * @return bool */ - protected static function is_preceeded_by( $type, array $tokens, $index, $steps = 1 ) { + protected static function is_preceded_by( $type, array $tokens, $index, $steps = 1 ) { return $index - $steps >= 0 && $type === $tokens[ $index - $steps ]->type; } /** * Checks if the predecessor of the current token is not of a certain type. * + * @since 7.0.0 Renamed to `is_not_preceded_by`. + * * @param Token::* $type A valid token type (e.g. Token::WORD). * @param Token[] $tokens An array of tokens. * @param int $index The current token index. @@ -369,7 +373,7 @@ protected static function is_preceeded_by( $type, array $tokens, $index, $steps * * @return bool */ - protected static function is_not_preceeded_by( $type, array $tokens, $index, $steps = 1 ) { + protected static function is_not_preceded_by( $type, array $tokens, $index, $steps = 1 ) { return $index - $steps >= 0 && $type !== $tokens[ $index - $steps ]->type; } diff --git a/src/fixes/node-fixes/class-dewidow-fix.php b/src/fixes/node-fixes/class-dewidow-fix.php index b583511..2112bc4 100644 --- a/src/fixes/node-fixes/class-dewidow-fix.php +++ b/src/fixes/node-fixes/class-dewidow-fix.php @@ -46,7 +46,7 @@ class Dewidow_Fix extends Abstract_Node_Fix { const SPACE_BETWEEN = '[\s]+'; // \s includes all special spaces (but not ZWSP) with the u flag. const WIDOW = '[\w\p{M}\-' . U::HYPHEN . U::ZERO_WIDTH_SPACE . U::SOFT_HYPHEN . ']+?'; // \w includes all alphanumeric Unicode characters but not composed characters. - // Mandatory UTF-8 modifer. + // Mandatory UTF-8 modifier. const REGEX_START = '/ (?: \A diff --git a/src/fixes/node-fixes/class-numbered-abbreviation-spacing-fix.php b/src/fixes/node-fixes/class-numbered-abbreviation-spacing-fix.php index e8579d4..c50c29b 100644 --- a/src/fixes/node-fixes/class-numbered-abbreviation-spacing-fix.php +++ b/src/fixes/node-fixes/class-numbered-abbreviation-spacing-fix.php @@ -2,7 +2,7 @@ /** * This file is part of PHP-Typography. * - * Copyright 2017-2022 Peter Putzer. + * Copyright 2017-2024 Peter Putzer. * * This program is free software; you can redistribute it and/or modify modify * it under the terms of the GNU General Public License as published by @@ -43,7 +43,7 @@ class Numbered_Abbreviation_Spacing_Fix extends Simple_Regex_Replacement_Fix { private const ISO = 'ISO(?:\/(?:IEC|TR|TS))?'; private const ABBREVIATIONS = ' - ### Internationl standards + ### International standards ' . self::ISO . '| ### German standards diff --git a/src/fixes/node-fixes/class-smart-exponents-fix.php b/src/fixes/node-fixes/class-smart-exponents-fix.php index b45077c..341036d 100644 --- a/src/fixes/node-fixes/class-smart-exponents-fix.php +++ b/src/fixes/node-fixes/class-smart-exponents-fix.php @@ -2,7 +2,7 @@ /** * This file is part of PHP-Typography. * - * Copyright 2014-2019 Peter Putzer. + * Copyright 2014-2024 Peter Putzer. * Copyright 2009-2011 KINGdesk, LLC. * * This program is free software; you can redistribute it and/or modify modify @@ -33,7 +33,7 @@ /** * Applies smart exponents (if enabled). - * Purposefully seperated from smart_math because of HTML code injection. + * Purposefully separated from smart_math because of HTML code injection. * * @author Peter Putzer