From 9e3377827a46affbcbe1c6d7d8c563bed756efc6 Mon Sep 17 00:00:00 2001
From: "David E. Wheeler" <david@justatheory.com>
Date: Wed, 7 Feb 2024 21:35:35 -0500
Subject: [PATCH] Add imports to customize extension matching

Let users easily customize the regular expression used to match files
for a given parser by passing a regular expression to the `use`
statement for each (except for None).
---
 .github/workflows/release.yml    |  2 +-
 Build.PL                         |  6 ++----
 Changes                          |  2 ++
 lib/Text/Markup.pm               | 30 +++++++++++++++++++++++-------
 lib/Text/Markup/Asciidoc.pm      | 11 +++++++++++
 lib/Text/Markup/Asciidoctor.pm   | 12 ++++++++++--
 lib/Text/Markup/Bbcode.pm        | 11 +++++++++++
 lib/Text/Markup/CommonMark.pm    | 11 +++++++++--
 lib/Text/Markup/Creole.pm        | 11 +++++++++++
 lib/Text/Markup/HTML.pm          | 11 +++++++++++
 lib/Text/Markup/Markdown.pm      | 11 +++++++++++
 lib/Text/Markup/Mediawiki.pm     | 10 ++++++++++
 lib/Text/Markup/Multimarkdown.pm | 11 +++++++++++
 lib/Text/Markup/None.pm          |  1 +
 lib/Text/Markup/Pod.pm           | 11 +++++++++++
 lib/Text/Markup/Rest.pm          | 11 +++++++++++
 lib/Text/Markup/Textile.pm       | 11 +++++++++++
 lib/Text/Markup/Trac.pm          | 11 +++++++++++
 t/formats.t                      | 12 +++++++++++-
 19 files changed, 179 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b6d30bc..0e912c4 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -10,7 +10,7 @@ jobs:
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     steps:
     - name: Check out the repo
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
     - name: Setup Perl
       uses: shogo82148/actions-setup-perl@v1
     - name: Install Release Dependencies
diff --git a/Build.PL b/Build.PL
index 206c99a..a161fc3 100644
--- a/Build.PL
+++ b/Build.PL
@@ -33,7 +33,8 @@ my $build = $class->new(
     license            => 'perl',
     create_makefile_pl => 'traditional',
     configure_requires => { 'Module::Build' => '0.4209' },
-    build_requires     => {
+    recommmends        => { 'CommonMark' => '0.290000' },
+    test_requires      => {
         'File::Spec::Functions' => 0,
         'Module::Build'         => '0.4209',
         'Test::More'            => '0.96',
@@ -54,9 +55,6 @@ my $build = $class->new(
         'Parse::BBCode'         => '0.15',
         'Text::WikiCreole'      => '0.07',
     },
-    recommmends => {
-        'CommonMark'            => '0.290000',
-    },
     meta_merge => {
          "meta-spec" => { version => 2 },
         resources => {
diff --git a/Changes b/Changes
index d848061..30fec09 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,8 @@
 Revision history for Perl extension Text-Markup.
 
 0.32
+    - Added the ability to change the regular expression for a format by
+      passing it in the `use` statement.
 
 0.31  2023-09-10T23:24:43Z
     - Fixed the passing of parameters to `parse()`.
diff --git a/lib/Text/Markup.pm b/lib/Text/Markup.pm
index aaeaed1..066a7c2 100644
--- a/lib/Text/Markup.pm
+++ b/lib/Text/Markup.pm
@@ -3,6 +3,7 @@ package Text::Markup;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use Text::Markup::None;
 use Carp;
 
@@ -147,6 +148,15 @@ This distribution includes support for a number of markup formats:
 
 =back
 
+Modules under the Text::Markup namespace provide these parsers, and Text::Markup
+automatically loads them on recognizing file name suffixes documented for each
+module. To change the file extensions recognized for a particular parser (except
+for L<Text::Markup::None>), load it directly and pass a regular expression. For
+example, to have the Mediawiki parser recognized files with the suffixes
+C<truck>, C<truc>, C<track>, or C<trac>, load it like so:
+
+  use Text::Markup::Mediawiki qr{tr[au]ck?};
+
 Adding support for more markup languages is straight-forward, and patches
 adding them to this distribution are also welcome. See L</Add a Parser> for
 step-by-step instructions.
@@ -304,6 +314,11 @@ C<Text::FooBar> module, it might look something like this:
   use Text::FooBar ();
   use File::BOM qw(open_bom)
 
+  sub import {
+      # Replace the regex if passed one.
+      Text::Markup->register( foobar => $_[1] ) if $_[1];
+  }
+
   sub parser {
       my ($file, $encoding, $opts) = @_;
       my $md = Text::FooBar->new(@{ $opts || [] });
@@ -332,9 +347,8 @@ In such a case, read in the file as raw bytes:
       open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
 
 The returned HTML, however, B<must be encoded in UTF-8>. Please include an
-L<encoding
-declaration|https://en.wikipedia.org/wiki/Character_encodings_in_HTML>, such
-as a content-type C<< <meta> >> element:
+L<encoding declaration|https://en.wikipedia.org/wiki/Character_encodings_in_HTML>,
+such as a content-type C<< <meta> >> element:
 
   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 
@@ -430,13 +444,15 @@ UI.
 =back
 
 If you don't want to submit your parser, you can still create and use one
-independently. Rather than add its information to the C<%REGEX_FOR> hash in
-this module, you can just load your parser manually, and have it call the
-C<register> method, like so:
+independently. Just omit editing the C<%REGEX_FOR> hash in this module and make
+sure you C<register> the parser manually with a default regular expression
+in the C<import> method, like so:
 
   package My::Markup::FooBar;
   use Text::Markup;
-  Text::Markup->register(foobar => qr{fb|foob(?:ar)?});
+  sub import {
+      Text::Markup->register( foobar => $_[1] || qr{fb|foob(?:ar)?} );
+  }
 
 This will be useful for creating private parsers you might not want to
 contribute, or that you'd want to distribute independently.
diff --git a/lib/Text/Markup/Asciidoc.pm b/lib/Text/Markup/Asciidoc.pm
index 20378c9..774322c 100644
--- a/lib/Text/Markup/Asciidoc.pm
+++ b/lib/Text/Markup/Asciidoc.pm
@@ -3,11 +3,17 @@ package Text::Markup::Asciidoc;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use Text::Markup::Cmd;
 use utf8;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( asciidoc => $_[1] ) if $_[1];
+}
+
 my $ASCIIDOC = find_cmd([
     (map { (WIN32 ? ("$_.exe", "$_.bat") : ($_)) } qw(asciidoc)),
     'asciidoc.py',
@@ -87,6 +93,11 @@ Asciidoc:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Asciidoc qr{ski?doc};
+
 Normally this parser returns the output of C<asciidoc> wrapped in a minimal
 HTML page skeleton. If you would prefer to just get the exact output returned
 by C<asciidoc>, you can pass in a true value for the C<raw> option.
diff --git a/lib/Text/Markup/Asciidoctor.pm b/lib/Text/Markup/Asciidoctor.pm
index d66bea7..aeee2b4 100644
--- a/lib/Text/Markup/Asciidoctor.pm
+++ b/lib/Text/Markup/Asciidoctor.pm
@@ -3,13 +3,16 @@ package Text::Markup::Asciidoctor;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use Text::Markup::Cmd;
 use utf8;
 
 our $VERSION = '0.32';
 
-# Replace Text::Markup::Asciidoc.
-Text::Markup->register( asciidoc => qr{a(?:sc(?:iidoc)?|doc)?} );
+sub import {
+    # Replace Text::Markup::Asciidoc.
+    Text::Markup->register( asciidoc => $_[1] || qr{a(?:sc(?:iidoc)?|doc)?} );
+}
 
 # Find Asciidoc.
 my $ASCIIDOC = find_cmd([
@@ -98,6 +101,11 @@ Asciidoc:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::AsciiDoctor qr{ski?doc};
+
 Normally this parser returns the output of C<asciidoctor> wrapped in a minimal
 HTML page skeleton. If you would prefer to just get the exact output returned
 by C<asciidoctor>, you can pass in a true value for the C<raw> option.
diff --git a/lib/Text/Markup/Bbcode.pm b/lib/Text/Markup/Bbcode.pm
index ae0363b..03a31bb 100644
--- a/lib/Text/Markup/Bbcode.pm
+++ b/lib/Text/Markup/Bbcode.pm
@@ -3,11 +3,17 @@ package Text::Markup::Bbcode;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Parse::BBCode;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( bbcode => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my %params = @{ $opts };
@@ -64,6 +70,11 @@ It recognizes files with the following extensions as Markdown:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Bbcode qr{beebee};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output with the raw skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/lib/Text/Markup/CommonMark.pm b/lib/Text/Markup/CommonMark.pm
index c99a30f..a59acb7 100644
--- a/lib/Text/Markup/CommonMark.pm
+++ b/lib/Text/Markup/CommonMark.pm
@@ -9,8 +9,10 @@ use File::BOM qw(open_bom);
 
 our $VERSION = '0.32';
 
-# Replace Text::Markup::Markdown.
-Text::Markup->register( markdown => qr{m(?:d(?:own)?|kdn?|arkdown)} );
+sub import {
+    # Replace Text::Markup::Markdown.
+    Text::Markup->register( markdown => $_[1] || qr{m(?:d(?:own)?|kdn?|arkdown)} );
+}
 
 sub parser {
     my ($file, $encoding, $opts) = @_;
@@ -83,6 +85,11 @@ It recognizes files with the following extensions as CommonMark Markdown:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::CommonMark qr{markd?};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/lib/Text/Markup/Creole.pm b/lib/Text/Markup/Creole.pm
index 0976a92..d20b4aa 100644
--- a/lib/Text/Markup/Creole.pm
+++ b/lib/Text/Markup/Creole.pm
@@ -3,11 +3,17 @@ package Text::Markup::Creole;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::WikiCreole;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( creole => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     open_bom my $fh, $file, ":encoding($encoding)";
@@ -60,6 +66,11 @@ It recognizes files with the following extensions as Markdown:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Creole qr{cre+ole+};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/lib/Text/Markup/HTML.pm b/lib/Text/Markup/HTML.pm
index 2b77fd9..734a0f1 100644
--- a/lib/Text/Markup/HTML.pm
+++ b/lib/Text/Markup/HTML.pm
@@ -3,9 +3,15 @@ package Text::Markup::HTML;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( html => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my $html = do {
@@ -47,6 +53,11 @@ with no decoding. It recognizes files with the following extensions as HTML:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::HTML qr{hachetml};
+
 =head1 Author
 
 David E. Wheeler <david@justatheory.com>
diff --git a/lib/Text/Markup/Markdown.pm b/lib/Text/Markup/Markdown.pm
index cf20e43..ab0ef29 100644
--- a/lib/Text/Markup/Markdown.pm
+++ b/lib/Text/Markup/Markdown.pm
@@ -3,11 +3,17 @@ package Text::Markup::Markdown;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::Markdown ();
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( markdown => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my %params = @{ $opts };
@@ -69,6 +75,11 @@ It recognizes files with the following extensions as Markdown:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Markdown qr{markd?};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/lib/Text/Markup/Mediawiki.pm b/lib/Text/Markup/Mediawiki.pm
index 5472cfd..61aeebb 100644
--- a/lib/Text/Markup/Mediawiki.pm
+++ b/lib/Text/Markup/Mediawiki.pm
@@ -3,11 +3,17 @@ package Text::Markup::Mediawiki;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::MediawikiFormat 1.0;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( mediawiki => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     open_bom my $fh, $file, ":encoding($encoding)";
@@ -65,6 +71,10 @@ It recognizes files with the following extensions as MediaWiki:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Mediawiki qr{kwiki?};
 
 Text::Markup::Mediawiki supports the two
 L<Text::MediawikiFormat arguments|Text::MediawikiFormat/format>, a hash
diff --git a/lib/Text/Markup/Multimarkdown.pm b/lib/Text/Markup/Multimarkdown.pm
index 064d2cf..cc8df12 100644
--- a/lib/Text/Markup/Multimarkdown.pm
+++ b/lib/Text/Markup/Multimarkdown.pm
@@ -3,11 +3,17 @@ package Text::Markup::Multimarkdown;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::MultiMarkdown ();
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( multimarkdown => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my %params = @{ $opts };
@@ -70,6 +76,11 @@ It recognizes files with the following extensions as MultiMarkdown:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Multimarkdown qr{mmm+};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to the format options argument to C<parse>.
diff --git a/lib/Text/Markup/None.pm b/lib/Text/Markup/None.pm
index 8a08af7..0758f7f 100644
--- a/lib/Text/Markup/None.pm
+++ b/lib/Text/Markup/None.pm
@@ -3,6 +3,7 @@ package Text::Markup::None;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use HTML::Entities;
 use File::BOM qw(open_bom);
 
diff --git a/lib/Text/Markup/Pod.pm b/lib/Text/Markup/Pod.pm
index cfb8b67..d616b24 100644
--- a/lib/Text/Markup/Pod.pm
+++ b/lib/Text/Markup/Pod.pm
@@ -3,8 +3,14 @@ package Text::Markup::Pod;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use Pod::Simple::XHTML 3.15;
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( pod => $_[1] ) if $_[1];
+}
+
 # Disable the use of HTML::Entities.
 $Pod::Simple::XHTML::HAS_HTML_ENTITIES = 0;
 
@@ -60,6 +66,11 @@ extensions as Pod:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Pod qr{cgi};
+
 =head1 Options
 
 You may pass an arrayref of settings to this parser which changes the output returned.  For example,
diff --git a/lib/Text/Markup/Rest.pm b/lib/Text/Markup/Rest.pm
index b63ea74..827d741 100644
--- a/lib/Text/Markup/Rest.pm
+++ b/lib/Text/Markup/Rest.pm
@@ -3,11 +3,17 @@ package Text::Markup::Rest;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use Text::Markup::Cmd;
 use File::Basename;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( rest => $_[1] ) if $_[1];
+}
+
 # Find Python or die.
 my $PYTHON = find_cmd(
     [WIN32 ? 'python3.exe' : 'python3'],
@@ -113,6 +119,11 @@ extensions as reST:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Rest qr{re?st(?:aurant)};
+
 =head1 Author
 
 Daniele Varrazzo <daniele.varrazzo@gmail.com>
diff --git a/lib/Text/Markup/Textile.pm b/lib/Text/Markup/Textile.pm
index 4dc805a..91cda71 100644
--- a/lib/Text/Markup/Textile.pm
+++ b/lib/Text/Markup/Textile.pm
@@ -3,11 +3,17 @@ package Text::Markup::Textile;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::Textile 2.10;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( textile => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my %params = @{ $opts };
@@ -67,6 +73,11 @@ It recognizes files with the following extension as Textile:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Textile qr{text(?:ile)?};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/lib/Text/Markup/Trac.pm b/lib/Text/Markup/Trac.pm
index 6555bec..93af4bb 100644
--- a/lib/Text/Markup/Trac.pm
+++ b/lib/Text/Markup/Trac.pm
@@ -3,11 +3,17 @@ package Text::Markup::Trac;
 use 5.8.1;
 use strict;
 use warnings;
+use Text::Markup;
 use File::BOM qw(open_bom);
 use Text::Trac 0.10;
 
 our $VERSION = '0.32';
 
+sub import {
+    # Replace the regex if passed one.
+    Text::Markup->register( trac => $_[1] ) if $_[1];
+}
+
 sub parser {
     my ($file, $encoding, $opts) = @_;
     my %params = @{ $opts };
@@ -64,6 +70,11 @@ It recognizes files with the following extensions as Trac:
 
 =back
 
+To change it the files it recognizes, load this module directly and pass a
+regular expression matching the desired extension(s), like so:
+
+  use Text::Markup::Trac qr{tr[au]ck?};
+
 Normally this module returns the output wrapped in a minimal HTML document
 skeleton. If you would like the raw output without the skeleton, you can pass
 the C<raw> option to C<parse>.
diff --git a/t/formats.t b/t/formats.t
index ca50464..7db5d2a 100644
--- a/t/formats.t
+++ b/t/formats.t
@@ -43,6 +43,8 @@ my %parsed_filter_for = (
 );
 
 my @loaded = Text::Markup->formats;
+my %regex_for = Text::Markup->format_matchers;
+
 while (my $data = <DATA>) {
     next if $data =~ /^#/;
     chomp $data;
@@ -58,7 +60,7 @@ while (my $data = <DATA>) {
             }
         } if $req;
 
-        plan tests => @exts + 5;
+        plan tests => @exts + 7;
         use_ok $module or next;
 
         push @loaded => $format unless grep { $_ eq $format } @loaded;
@@ -93,6 +95,14 @@ while (my $data = <DATA>) {
             file   => catfile('t', 'empty.txt'),
             format => $format,
         ), undef, "Parse empty $name file";
+
+        # Try recognizing in plain text.
+        is $parser->guess_format('hello.txt'), undef,
+            'guess_format should not match .txt';
+        $module->import(qr/txt/);
+        is $parser->guess_format('hello.txt'), $format,
+            'Now guess_format should match .txt';
+        $module->import($regex_for{$format});
     }
 }