From 9e3377827a46affbcbe1c6d7d8c563bed756efc6 Mon Sep 17 00:00:00 2001 From: "David E. Wheeler" Date: Wed, 7 Feb 2024 21:35:35 -0500 Subject: [PATCH] Add imports to customize extension matching Let users easily customize the regular expression used to match files for a given parser by passing a regular expression to the `use` statement for each (except for None). --- .github/workflows/release.yml | 2 +- Build.PL | 6 ++---- Changes | 2 ++ lib/Text/Markup.pm | 30 +++++++++++++++++++++++------- lib/Text/Markup/Asciidoc.pm | 11 +++++++++++ lib/Text/Markup/Asciidoctor.pm | 12 ++++++++++-- lib/Text/Markup/Bbcode.pm | 11 +++++++++++ lib/Text/Markup/CommonMark.pm | 11 +++++++++-- lib/Text/Markup/Creole.pm | 11 +++++++++++ lib/Text/Markup/HTML.pm | 11 +++++++++++ lib/Text/Markup/Markdown.pm | 11 +++++++++++ lib/Text/Markup/Mediawiki.pm | 10 ++++++++++ lib/Text/Markup/Multimarkdown.pm | 11 +++++++++++ lib/Text/Markup/None.pm | 1 + lib/Text/Markup/Pod.pm | 11 +++++++++++ lib/Text/Markup/Rest.pm | 11 +++++++++++ lib/Text/Markup/Textile.pm | 11 +++++++++++ lib/Text/Markup/Trac.pm | 11 +++++++++++ t/formats.t | 12 +++++++++++- 19 files changed, 179 insertions(+), 17 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b6d30bc..0e912c4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,7 +10,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - name: Check out the repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Perl uses: shogo82148/actions-setup-perl@v1 - name: Install Release Dependencies diff --git a/Build.PL b/Build.PL index 206c99a..a161fc3 100644 --- a/Build.PL +++ b/Build.PL @@ -33,7 +33,8 @@ my $build = $class->new( license => 'perl', create_makefile_pl => 'traditional', configure_requires => { 'Module::Build' => '0.4209' }, - build_requires => { + recommmends => { 'CommonMark' => '0.290000' }, + test_requires => { 'File::Spec::Functions' => 0, 'Module::Build' => '0.4209', 'Test::More' => '0.96', @@ -54,9 +55,6 @@ my $build = $class->new( 'Parse::BBCode' => '0.15', 'Text::WikiCreole' => '0.07', }, - recommmends => { - 'CommonMark' => '0.290000', - }, meta_merge => { "meta-spec" => { version => 2 }, resources => { diff --git a/Changes b/Changes index d848061..30fec09 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,8 @@ Revision history for Perl extension Text-Markup. 0.32 + - Added the ability to change the regular expression for a format by + passing it in the `use` statement. 0.31 2023-09-10T23:24:43Z - Fixed the passing of parameters to `parse()`. diff --git a/lib/Text/Markup.pm b/lib/Text/Markup.pm index aaeaed1..066a7c2 100644 --- a/lib/Text/Markup.pm +++ b/lib/Text/Markup.pm @@ -3,6 +3,7 @@ package Text::Markup; use 5.8.1; use strict; use warnings; +use Text::Markup; use Text::Markup::None; use Carp; @@ -147,6 +148,15 @@ This distribution includes support for a number of markup formats: =back +Modules under the Text::Markup namespace provide these parsers, and Text::Markup +automatically loads them on recognizing file name suffixes documented for each +module. To change the file extensions recognized for a particular parser (except +for L), load it directly and pass a regular expression. For +example, to have the Mediawiki parser recognized files with the suffixes +C, C, C, or C, load it like so: + + use Text::Markup::Mediawiki qr{tr[au]ck?}; + Adding support for more markup languages is straight-forward, and patches adding them to this distribution are also welcome. See L for step-by-step instructions. @@ -304,6 +314,11 @@ C module, it might look something like this: use Text::FooBar (); use File::BOM qw(open_bom) + sub import { + # Replace the regex if passed one. + Text::Markup->register( foobar => $_[1] ) if $_[1]; + } + sub parser { my ($file, $encoding, $opts) = @_; my $md = Text::FooBar->new(@{ $opts || [] }); @@ -332,9 +347,8 @@ In such a case, read in the file as raw bytes: open my $fh, '<:raw', $file or die "Cannot open $file: $!\n"; The returned HTML, however, B. Please include an -L, such -as a content-type C<< >> element: +L, +such as a content-type C<< >> element: @@ -430,13 +444,15 @@ UI. =back If you don't want to submit your parser, you can still create and use one -independently. Rather than add its information to the C<%REGEX_FOR> hash in -this module, you can just load your parser manually, and have it call the -C method, like so: +independently. Just omit editing the C<%REGEX_FOR> hash in this module and make +sure you C the parser manually with a default regular expression +in the C method, like so: package My::Markup::FooBar; use Text::Markup; - Text::Markup->register(foobar => qr{fb|foob(?:ar)?}); + sub import { + Text::Markup->register( foobar => $_[1] || qr{fb|foob(?:ar)?} ); + } This will be useful for creating private parsers you might not want to contribute, or that you'd want to distribute independently. diff --git a/lib/Text/Markup/Asciidoc.pm b/lib/Text/Markup/Asciidoc.pm index 20378c9..774322c 100644 --- a/lib/Text/Markup/Asciidoc.pm +++ b/lib/Text/Markup/Asciidoc.pm @@ -3,11 +3,17 @@ package Text::Markup::Asciidoc; use 5.8.1; use strict; use warnings; +use Text::Markup; use Text::Markup::Cmd; use utf8; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( asciidoc => $_[1] ) if $_[1]; +} + my $ASCIIDOC = find_cmd([ (map { (WIN32 ? ("$_.exe", "$_.bat") : ($_)) } qw(asciidoc)), 'asciidoc.py', @@ -87,6 +93,11 @@ Asciidoc: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Asciidoc qr{ski?doc}; + Normally this parser returns the output of C wrapped in a minimal HTML page skeleton. If you would prefer to just get the exact output returned by C, you can pass in a true value for the C option. diff --git a/lib/Text/Markup/Asciidoctor.pm b/lib/Text/Markup/Asciidoctor.pm index d66bea7..aeee2b4 100644 --- a/lib/Text/Markup/Asciidoctor.pm +++ b/lib/Text/Markup/Asciidoctor.pm @@ -3,13 +3,16 @@ package Text::Markup::Asciidoctor; use 5.8.1; use strict; use warnings; +use Text::Markup; use Text::Markup::Cmd; use utf8; our $VERSION = '0.32'; -# Replace Text::Markup::Asciidoc. -Text::Markup->register( asciidoc => qr{a(?:sc(?:iidoc)?|doc)?} ); +sub import { + # Replace Text::Markup::Asciidoc. + Text::Markup->register( asciidoc => $_[1] || qr{a(?:sc(?:iidoc)?|doc)?} ); +} # Find Asciidoc. my $ASCIIDOC = find_cmd([ @@ -98,6 +101,11 @@ Asciidoc: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::AsciiDoctor qr{ski?doc}; + Normally this parser returns the output of C wrapped in a minimal HTML page skeleton. If you would prefer to just get the exact output returned by C, you can pass in a true value for the C option. diff --git a/lib/Text/Markup/Bbcode.pm b/lib/Text/Markup/Bbcode.pm index ae0363b..03a31bb 100644 --- a/lib/Text/Markup/Bbcode.pm +++ b/lib/Text/Markup/Bbcode.pm @@ -3,11 +3,17 @@ package Text::Markup::Bbcode; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Parse::BBCode; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( bbcode => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my %params = @{ $opts }; @@ -64,6 +70,11 @@ It recognizes files with the following extensions as Markdown: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Bbcode qr{beebee}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output with the raw skeleton, you can pass the C option to C. diff --git a/lib/Text/Markup/CommonMark.pm b/lib/Text/Markup/CommonMark.pm index c99a30f..a59acb7 100644 --- a/lib/Text/Markup/CommonMark.pm +++ b/lib/Text/Markup/CommonMark.pm @@ -9,8 +9,10 @@ use File::BOM qw(open_bom); our $VERSION = '0.32'; -# Replace Text::Markup::Markdown. -Text::Markup->register( markdown => qr{m(?:d(?:own)?|kdn?|arkdown)} ); +sub import { + # Replace Text::Markup::Markdown. + Text::Markup->register( markdown => $_[1] || qr{m(?:d(?:own)?|kdn?|arkdown)} ); +} sub parser { my ($file, $encoding, $opts) = @_; @@ -83,6 +85,11 @@ It recognizes files with the following extensions as CommonMark Markdown: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::CommonMark qr{markd?}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to C. diff --git a/lib/Text/Markup/Creole.pm b/lib/Text/Markup/Creole.pm index 0976a92..d20b4aa 100644 --- a/lib/Text/Markup/Creole.pm +++ b/lib/Text/Markup/Creole.pm @@ -3,11 +3,17 @@ package Text::Markup::Creole; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::WikiCreole; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( creole => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; open_bom my $fh, $file, ":encoding($encoding)"; @@ -60,6 +66,11 @@ It recognizes files with the following extensions as Markdown: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Creole qr{cre+ole+}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to C. diff --git a/lib/Text/Markup/HTML.pm b/lib/Text/Markup/HTML.pm index 2b77fd9..734a0f1 100644 --- a/lib/Text/Markup/HTML.pm +++ b/lib/Text/Markup/HTML.pm @@ -3,9 +3,15 @@ package Text::Markup::HTML; use 5.8.1; use strict; use warnings; +use Text::Markup; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( html => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my $html = do { @@ -47,6 +53,11 @@ with no decoding. It recognizes files with the following extensions as HTML: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::HTML qr{hachetml}; + =head1 Author David E. Wheeler diff --git a/lib/Text/Markup/Markdown.pm b/lib/Text/Markup/Markdown.pm index cf20e43..ab0ef29 100644 --- a/lib/Text/Markup/Markdown.pm +++ b/lib/Text/Markup/Markdown.pm @@ -3,11 +3,17 @@ package Text::Markup::Markdown; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::Markdown (); our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( markdown => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my %params = @{ $opts }; @@ -69,6 +75,11 @@ It recognizes files with the following extensions as Markdown: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Markdown qr{markd?}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to C. diff --git a/lib/Text/Markup/Mediawiki.pm b/lib/Text/Markup/Mediawiki.pm index 5472cfd..61aeebb 100644 --- a/lib/Text/Markup/Mediawiki.pm +++ b/lib/Text/Markup/Mediawiki.pm @@ -3,11 +3,17 @@ package Text::Markup::Mediawiki; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::MediawikiFormat 1.0; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( mediawiki => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; open_bom my $fh, $file, ":encoding($encoding)"; @@ -65,6 +71,10 @@ It recognizes files with the following extensions as MediaWiki: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Mediawiki qr{kwiki?}; Text::Markup::Mediawiki supports the two L, a hash diff --git a/lib/Text/Markup/Multimarkdown.pm b/lib/Text/Markup/Multimarkdown.pm index 064d2cf..cc8df12 100644 --- a/lib/Text/Markup/Multimarkdown.pm +++ b/lib/Text/Markup/Multimarkdown.pm @@ -3,11 +3,17 @@ package Text::Markup::Multimarkdown; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::MultiMarkdown (); our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( multimarkdown => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my %params = @{ $opts }; @@ -70,6 +76,11 @@ It recognizes files with the following extensions as MultiMarkdown: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Multimarkdown qr{mmm+}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to the format options argument to C. diff --git a/lib/Text/Markup/None.pm b/lib/Text/Markup/None.pm index 8a08af7..0758f7f 100644 --- a/lib/Text/Markup/None.pm +++ b/lib/Text/Markup/None.pm @@ -3,6 +3,7 @@ package Text::Markup::None; use 5.8.1; use strict; use warnings; +use Text::Markup; use HTML::Entities; use File::BOM qw(open_bom); diff --git a/lib/Text/Markup/Pod.pm b/lib/Text/Markup/Pod.pm index cfb8b67..d616b24 100644 --- a/lib/Text/Markup/Pod.pm +++ b/lib/Text/Markup/Pod.pm @@ -3,8 +3,14 @@ package Text::Markup::Pod; use 5.8.1; use strict; use warnings; +use Text::Markup; use Pod::Simple::XHTML 3.15; +sub import { + # Replace the regex if passed one. + Text::Markup->register( pod => $_[1] ) if $_[1]; +} + # Disable the use of HTML::Entities. $Pod::Simple::XHTML::HAS_HTML_ENTITIES = 0; @@ -60,6 +66,11 @@ extensions as Pod: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Pod qr{cgi}; + =head1 Options You may pass an arrayref of settings to this parser which changes the output returned. For example, diff --git a/lib/Text/Markup/Rest.pm b/lib/Text/Markup/Rest.pm index b63ea74..827d741 100644 --- a/lib/Text/Markup/Rest.pm +++ b/lib/Text/Markup/Rest.pm @@ -3,11 +3,17 @@ package Text::Markup::Rest; use 5.8.1; use strict; use warnings; +use Text::Markup; use Text::Markup::Cmd; use File::Basename; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( rest => $_[1] ) if $_[1]; +} + # Find Python or die. my $PYTHON = find_cmd( [WIN32 ? 'python3.exe' : 'python3'], @@ -113,6 +119,11 @@ extensions as reST: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Rest qr{re?st(?:aurant)}; + =head1 Author Daniele Varrazzo diff --git a/lib/Text/Markup/Textile.pm b/lib/Text/Markup/Textile.pm index 4dc805a..91cda71 100644 --- a/lib/Text/Markup/Textile.pm +++ b/lib/Text/Markup/Textile.pm @@ -3,11 +3,17 @@ package Text::Markup::Textile; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::Textile 2.10; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( textile => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my %params = @{ $opts }; @@ -67,6 +73,11 @@ It recognizes files with the following extension as Textile: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Textile qr{text(?:ile)?}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to C. diff --git a/lib/Text/Markup/Trac.pm b/lib/Text/Markup/Trac.pm index 6555bec..93af4bb 100644 --- a/lib/Text/Markup/Trac.pm +++ b/lib/Text/Markup/Trac.pm @@ -3,11 +3,17 @@ package Text::Markup::Trac; use 5.8.1; use strict; use warnings; +use Text::Markup; use File::BOM qw(open_bom); use Text::Trac 0.10; our $VERSION = '0.32'; +sub import { + # Replace the regex if passed one. + Text::Markup->register( trac => $_[1] ) if $_[1]; +} + sub parser { my ($file, $encoding, $opts) = @_; my %params = @{ $opts }; @@ -64,6 +70,11 @@ It recognizes files with the following extensions as Trac: =back +To change it the files it recognizes, load this module directly and pass a +regular expression matching the desired extension(s), like so: + + use Text::Markup::Trac qr{tr[au]ck?}; + Normally this module returns the output wrapped in a minimal HTML document skeleton. If you would like the raw output without the skeleton, you can pass the C option to C. diff --git a/t/formats.t b/t/formats.t index ca50464..7db5d2a 100644 --- a/t/formats.t +++ b/t/formats.t @@ -43,6 +43,8 @@ my %parsed_filter_for = ( ); my @loaded = Text::Markup->formats; +my %regex_for = Text::Markup->format_matchers; + while (my $data = ) { next if $data =~ /^#/; chomp $data; @@ -58,7 +60,7 @@ while (my $data = ) { } } if $req; - plan tests => @exts + 5; + plan tests => @exts + 7; use_ok $module or next; push @loaded => $format unless grep { $_ eq $format } @loaded; @@ -93,6 +95,14 @@ while (my $data = ) { file => catfile('t', 'empty.txt'), format => $format, ), undef, "Parse empty $name file"; + + # Try recognizing in plain text. + is $parser->guess_format('hello.txt'), undef, + 'guess_format should not match .txt'; + $module->import(qr/txt/); + is $parser->guess_format('hello.txt'), $format, + 'Now guess_format should match .txt'; + $module->import($regex_for{$format}); } }