From 96749bcc79b757fd0cf626c213978d296ca0658b Mon Sep 17 00:00:00 2001
From: Mathias Kende <mathias@kende.fr>
Date: Sun, 29 Sep 2024 23:29:37 +0200
Subject: [PATCH] Add a cheat sheet POD and improve the main documentation.

---
 .aspelldict                 |   9 +-
 .gitignore                  |   3 +
 Changes                     |   3 +
 MANIFEST.SKIP               |   2 +
 Makefile.PL                 |  18 +++
 lib/App/PTP/Cheat_Sheet.pod | 218 ++++++++++++++++++++++++++++++++++++
 script/ptp                  | 139 +++++++++++------------
 7 files changed, 322 insertions(+), 70 deletions(-)
 create mode 100644 lib/App/PTP/Cheat_Sheet.pod
diff --git a/.aspelldict b/.aspelldict
index b36b6af..00d57f1 100644
--- a/.aspelldict
+++ b/.aspelldict
@@ -1,4 +1,4 @@
-personal_ws-1.1 en 112 
+personal_ws-1.1 en 119 
 AliasVar
 CMD
 CPAN
@@ -13,6 +13,7 @@ LF
 MERCHANTABILITY
 MarkersArray
 NONINFRINGEMENT
+NUL
 PCRE
 PODNAME
 PTP
@@ -24,6 +25,8 @@ ProhibitMagicNumbers
 ProhibitNoWarnings
 ProhibitOneArgSelect
 ProhibitStringyEval
+RO
+RW
 ReadOnlyVar
 Readonly
 RequireArgUnpacking
@@ -45,6 +48,8 @@ cmd
 cmp
 coderef
 comparator
+cpanm
+cpanminus
 csv
 dir
 dirs
@@ -60,6 +65,7 @@ expressivity
 fh
 filepath
 fn
+gcc
 globaluniqstr
 gu
 guniq
@@ -99,6 +105,7 @@ sublicense
 subprocess
 subst
 substr
+sudo
 tac
 tempfile
 tsv
diff --git a/.gitignore b/.gitignore
index d5cf41f..06bd7be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,6 @@
 /App-PTP-*/
 
 # End of the template. You can add custom content below this line.
+
+/ptp_cheat_sheet.html
+/ptp_cheat_sheet.pdf
diff --git a/Changes b/Changes
index 450b702..418e6e3 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,8 @@
 Revision history for Perl distribution App-PTP
 
+1.16 - ??
+ - Add a "cheat sheet" POD to the distribution.
+
 1.15 - 2024-09-29
  - Remove a remaining smartmatch usage from our benchmarks.
 
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
index e09576f..9802e09 100644
--- a/MANIFEST.SKIP
+++ b/MANIFEST.SKIP
@@ -32,3 +32,5 @@
 .*\.bak
 
 # End of the template. You can add custom content below this line.
+
+^ptp_cheat_sheet\..*$
diff --git a/Makefile.PL b/Makefile.PL
index db23740..2125e66 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -134,3 +134,21 @@ MAKE_FRAGMENT
 # add to the generated Makefile.
 
 # End of the template. You can add custom content below this line.
+
+sub postamble {
+  return <<"MAKE_FRAGMENT";
+cheat_sheet: ptp_cheat_sheet.html ptp_cheat_sheet.pdf
+
+ptp_cheat_sheet.pdf: lib/App/PTP/Cheat_Sheet.pod
+\tpod2pdf --left-margin 30 --right-margin 30 --top-margin 36 --bottom-margin 36 --title="PTP Cheat Sheet" --noheader --nofooter \$< | pdfjam --landscape --nup 2x1 --outfile \$@ 2>/dev/null
+
+ptp_cheat_sheet.html: lib/App/PTP/Cheat_Sheet.pod
+\tpod2html --noindex --title="PTP Cheat Sheet" \$< > \$@
+
+clean_cheat_sheet:
+\trm -f ptp_cheat_sheet.pdf ptp_cheat_sheet.html
+
+clean:: clean_cheat_sheet
+
+MAKE_FRAGMENT
+}
diff --git a/lib/App/PTP/Cheat_Sheet.pod b/lib/App/PTP/Cheat_Sheet.pod
new file mode 100644
index 0000000..d05f4a9
--- /dev/null
+++ b/lib/App/PTP/Cheat_Sheet.pod
@@ -0,0 +1,218 @@
+=pod
+
+=head1 PTP Cheat Sheet
+
+=head2 SYNOPSIS
+
+Install with:
+
+  sudo apt-get install perl cpanminus perl-doc build-essential
+  # or
+  sudo yum install perl Perl-App-cpanminus perl-doc gcc make
+
+  sudo cpanm App::PTP -n -L /usr/local --man-pages --install-args \
+    'DESTINSTALLBIN=/usr/local/bin'
+
+Run with:
+
+  ptp file1 file2 ... [--grep re] [--substitute re subst] ... [-o out]
+
+=head2 INPUT FILES
+
+Input files can appear anywhere on the command line and are processed in the
+order in which they are given.
+
+=over 8
+
+=item C<F<filename>> (anywhere in the command line, not starting with a C<->)
+
+=item C<-> (reads from stdin)
+
+=item C<-- F<filename> ...> (for any filename)
+
+=back
+
+=head2 PIPELINE COMMANDS
+
+Pipeline commands are applied, in order, to all the input files.
+
+=over 8
+
+=item B<--g> I<regex> (B<--grep>), B<-s> I<regex> I<string> (B<--substitute>)
+
+=item B<--p> I<code> (B<-perl>): read and write C<$_> to modify the file
+
+=item B<-n> I<code>: read from C<$_>, write the return values
+
+=item B<-f> I<code> (B<--filter>): return I<true> to keep the line
+
+=item B<-e> I<code> (B<--execute>): execute once per input file
+
+=item B<-l> I<path> (B<--load>): execute the given file, once per input file
+
+=item B<-M> I<module>: load the given module
+
+=item B<--sort>, B<--ns> (B<--numeric-sort>), B<--ls>, (B<--locale-sort>),
+B<--cs> I<code> (B<--custom-sort>)
+
+=item B<-u> (B<--unique>), B<--gu> (B<--global-unique>)
+
+=item B<--head> [I<n>], B<--tail> [I<n>], B<--reverse> (B<--tac>), B<--shuffle>
+
+=item B<--eat>: discard the content of the file
+
+=item B<--ml> I<code> (B<--mark-line>): set the marker for the line with the
+return value
+
+=item B<--clear-markers>, B<--set-all-markers>
+
+=item B<--delete-marked>, B<--delete-before>, B<--delete-after>,
+B<--delete-at-offset> I<offset>
+
+=item B<--insert-before> I<string>, B<--insert-after> I<string>,
+B<--insert-at-offset> I<offset> I<string>: insert interpolated text next to marked
+lines (offset I<0> is just after)
+
+=item B<--cut> I<N>,I<N>,...: select fields according to B<-F> and concatenate
+them with B<-P>
+
+=item B<--paste> I<file>: paste with B<-P> line by line with the current content
+
+=item B<--pivot>: turn the file into a single line with B<-P>
+
+=item B<--anti-pivot>: split all lines according to B<-F>
+
+=item B<--transpose>: transpose lines and columns using B<-F> and B<-P>
+
+=item B<--nl> (B<--number-lines>), B<--pfn> (B<--prefix-file-name>)
+
+=item B<--fn> (B<--file-name>), B<--lc>, B<--line-count>: replace the content of
+the file
+
+=item B<-m> (B<--merge>): merge all the files in a single one
+
+=item B<--tee> I<filename>: duplicate the output
+
+=item B<--shell> I<command>: sends the content as input to the command
+
+=back
+
+=head2 PROGRAM BEHAVIOR
+
+Global option for the program execution.
+
+=over 8
+
+=item B<-o> I<output_file> (B<--output>), B<-a> I<output_file> (B<--append>),
+B<-i> (B<--in-place>): by default output to standard output
+
+=item B<-R>, B<--recursive>, B<--input-filter> I<code>: expand directories,
+optionally filter input files
+
+=item B<--input-encoding> I<encoding>, B<--output-encoding> I<encoding>: default
+is UTF-8
+
+=item B<--input-separator> I<separator>, B<--output-separator> I<separator>:
+default is C<\n>
+
+=item B<--eol> (B<--preserve-input-separator>), B<--fix-final-separator>
+
+=item B<-0>: set B<--input-separator> to C<NUL> and B<--output-separator> to the
+empty string.
+
+=item B<--00>: set B<--output-separator> to the C<NUL>, useful with C<xargs -0>
+
+=item B<-h> (B<--help>), B<--version>: remember to have B<perldoc> installed
+
+=item B<-d> (B<--debug>), B<--abort>
+
+=item B<--preserve-perl-env>: keep environment across files
+
+=item B<--safe> [I<n>]: default is I<0>, strictest is I<2>
+
+=back
+
+=head2 PIPELINE MODES
+
+Options for the pipeline commands coming after them. Most modes have a reverse
+mode to return to the default.
+
+=over 8
+
+=item B<-I> (B<--case-insensitive>), B<-S> (B<--case-sensitive>): mode for any
+I<regex> argument, B<-S> is the default
+
+=item B<-Q> (B<--quote-regexp>), B<-E> (B<--end-quote-regexp>): disable
+interpolation in any I<regex>, I<string>, I<filename> or I<command> argument,
+B<-E> is the default
+
+=item B<-V> (B<--inverse-match>), B<-N> (B<--normal-match>): inverse behavior of
+B<--grep> and B<--filter>
+
+=item B<-L> (B<--local-match>), B<-G> (B<--global-match>): apply B<--substitute>
+once per line or as much as possible (this is the default)
+
+=item B<-C> I<code> (B<--comparator>): for B<--sort>, default is C<$a cmp $b>
+
+=item B<-F> I<regex> (B<--input-field-spec>): how to split fields, default is
+B<\s*,\*s|\t>
+
+=item B<-P> I<string> (B<--output-field-spec>): how to paste fields, default is
+a tab
+
+=item B<--default>, B<--bytes>, B<--csv>, B<--tsv>, B<--none>: set the B<-F> and
+B<-P> flags
+
+=item B<--sq> I<string> (B<--single-quote-replacement>),
+B<--dq> I<string> (B<--double-quote-replacement>), B<--ds> I<string>
+(B<--dollar-sigil-replacement>): replace the given character or string by C<'>,
+C<">, or C<$> in all I<code> arguments
+
+=item B<--re> I<engine>, B<--regex-engine>: use the specified regex engine (e.g.
+I<RE2>, I<PCRE>, I<TRE>, I<GNU>, etc.) if installed
+
+=item B<-X> (B<--fatal-error>), B<--ignore-error>: dies on error in B<--perl>,
+B<-n> and B<--filter>
+
+=back
+
+=head2 PERL ENVIRONMENT
+
+Variables and functions available to I<code> arguments as well as I<regex>,
+I<string>, I<filename>, and I<command> ones (unless B<-Q> has been passed).
+
+=over 8
+
+=item B<$_>: current line content (I<RW>)
+
+=item B<$f>, B<$F>: current file name, current absolute file name (I<RO>)
+
+=item B<$n>: current line number (same as standard B<$.>)  (I<RO>)
+
+=item B<$N>: number of lines in the current file (I<RO>)
+
+=item B<$m>: marker of the current line (I<RW>)
+
+=item B<@m>: markers for all the lines, current line is at index 0 (I<RW>)
+
+=item B<$I>: 1-based index of the file being processed (I<RW>)
+
+=item B<ss> I<start>[, I<len>[, I<$var>]]: like C<substr> but returns C<''>
+instead of C<undef>.
+
+=item B<pf> I<format>[, I<args...>]: like C<$_ = sprintf I<format>, I<arg...>>
+
+=item B<spf> I<format>[, I<args...>]: like C<sprintf>
+
+=back
+
+=head2 AUTHOR AND LICENCE
+
+Copyright 2019-2024 Mathias Kende (L<mailto:mathias@cpan.org>).
+
+This program is distributed under the MIT (X11) License:
+L<http://www.opensource.org/licenses/mit-license.php>
+
+See more in the full documentation at L<https://metacpan.org/pod/ptp>.
+
+=cut
diff --git a/script/ptp b/script/ptp
index a7b2c1d..42cca4c 100755
--- a/script/ptp
+++ b/script/ptp
@@ -39,10 +39,10 @@ are described (and in more details) below, in the L</OPTIONS> section.
 
 =over 4
 
-=item B<-g> I<pattern>, B<-s> I<pattern> I<subst>
+=item B<-g> I<regex>, B<-s> I<regex> I<string>
 
-Filter all the lines using the given pattern (inverted with B<-V> before the
-B<-g> option), or replace all the match of the pattern by the given substitution
+Filter all the lines using the given regex (inverted with B<-V> before the
+B<-g> option), or replace all the match of the regex by the given substitution
 string.
 
 =item B<-p> I<perl code>
@@ -178,7 +178,7 @@ description of the affected commands.
 
 =over 8
 
-=item B<--g> I<pattern>, B<--grep>
+=item B<--g> I<regex>, B<--grep>
 
 Filter each input to keep only the lines that match the given regular
 expression. That expression cannot have delimiters (e.g. /foo/) so, if you
@@ -192,12 +192,12 @@ This command is much faster then manually giving a match operation to the
 B<--filter> command, because the code does not need to be escaped.
 
 This operation can be made case-insensitive with the B<-I> flag, inverted with
-B<-V> and the pattern can be interpreted as an exact string with B<-Q>.
+B<-V> and the regex can be interpreted as an exact string with B<-Q>.
 
-=item B<-s> I<pattern> I<subst>, B<--substitute>
+=item B<-s> I<regex> I<string>, B<--substitute>
 
 Replace all matches of the given regular expression by the given substitution
-pattern on each line of the input. The substitution string is evaluated like a
+text on each line of the input. The substitution string is evaluated like a
 Perl string, so it can contain references to capture group in the regular
 expression using the B<$1>, B<$2>, etc. syntax.
 
@@ -245,15 +245,6 @@ doing it.
 An error in the Perl code will result in a message printed to the standard
 output but the processing will continue. The current line will not be removed.
 
-=item B<--ml> I<code>, B<--mark-line>
-
-Execute the given code for each line of input (the current line is in the B<$_>
-variable) and store the return value (usually a boolean) in the I<marker> of
-the current line.
-
-The marker can then be accessed by other commands through the B<$m> variable or
-used directly by the commands that operate on marked lines.
-
 =item B<-e> I<code>, B<--execute>
 
 Execute the given code. As other command, this will be executed once per input
@@ -262,11 +253,6 @@ functions used in B<--perl> or B<-n> commands.
 
 Any error in the Perl code will terminate the execution of the program.
 
-=item B<-M> I<module>
-
-Load the given Perl module in the Perl environment. This option cannot be used
-when B<--safe> is specified with level strictly greater than 0.
-
 =item B<-l> I<path>, B<--load>
 
 Same as B<--execute> except that it takes the code to execute from the given
@@ -274,6 +260,11 @@ file.
 
 Any error in the Perl code will terminate the execution of the program.
 
+=item B<-M> I<module>
+
+Load the given Perl module in the Perl environment. This option cannot be used
+when B<--safe> is specified with level strictly greater than 0.
+
 =item B<--sort>
 
 Sort the content of the input using the default lexicographic order. Or the
@@ -349,31 +340,48 @@ the content any-more (maybe you have sent it to another command with B<--shell>)
 but you cannot redirect the output (typically to get the output of that shell
 command).
 
+=item B<--ml> I<code>, B<--mark-line>
+
+Execute the given code for each line of input (the current line is in the B<$_>
+variable) and store the return value (usually a boolean) in the I<marker> of
+the current line.
+
+The marker can then be accessed by other commands through the B<$m> variable or
+used directly by the commands that operate on marked lines.
+
+=item B<--clear-markers>
+
+Clear the marker of all the input lines.
+
+=item B<--set-all-markers>
+
+Set the marker of all the input lines.
+
 =item B<--delete-marked>
 
-Delete every line whose marker is currently set. See the B<--mark-line> command
-for details on how to set the marker of a line.
+Delete every line whose marker is currently set to a true value. See the
+B<--mark-line> command for details on how to set the marker of a line.
 
 After this operation, no line has a marker set (they were all deleted).
 
 =item B<--delete-before>
 
-Delete all the lines immediately preceding a line whose marker is set. The
-markers of the lines that are not deleted are not changed.
+Delete all the lines immediately preceding a line whose marker is set to a true
+value. The markers of the lines that are not deleted are not changed.
 
 =item B<--delete-after>
 
-Delete all the lines immediately following a line whose marker is set. The
-markers of the lines that are not deleted are not changed.
+Delete all the lines immediately following a line whose marker is set to a true
+value. The markers of the lines that are not deleted are not changed.
 
 =item B<--delete-at-offset> I<offset>
 
 Delete all the lines situated at the given offset from a marked line. A positive offset means lines that are after the marked lines.
 
-=item B<--insert-before> I<text>
+=item B<--insert-before> I<string>
 
 Insert the given line of text immediately before each marked line. The given
-I<text> is treated as a quoted Perl string, so it can use any of the variable
+I<string> is treated as a quoted Perl string, so it can use any of the variable
 described in L</PERL ENVIRONMENT>. In particular, the B<$_> variable is set to
 the marked line before which the insertion is taking place. However this text is
 not a general Perl expression, so you may have to post-process with an other
@@ -386,24 +394,16 @@ your shell before the argument is read by the program).
 The newly inserted lines have their markers unset. Other lines' markers are not
 changed.
 
-=item B<--insert-after> I<text>
+=item B<--insert-after> I<string>
 
 Same as B<--insert-before>, but the new line is inserted after the marked line.
 
-=item B<--insert-at-offset> I<offset> I<text>
+=item B<--insert-at-offset> I<offset> I<string>
 
 Generalized version of the B<--insert-before> and <--insert-after> commands.
 This commands insert the given text at the given offset relative to the marked
 line. Offset I<0> means inserting the line immediately after the marked line.
 
-=item B<--clear-markers>
-
-Clear the marker of all the input lines.
-
-=item B<--set-all-markers>
-
-Set the marker of all the input lines.
-
 =item B<--cut> I<field>,I<field>,...
 
 Select specific fields of each input line and replace the line content with
@@ -566,7 +566,7 @@ returns a true value are kept. The complete file name is passed to the code in
 the default B<$_> variable. You can view this option in action in the
 L</EXAMPLES> sections
 
-This option applies only of files recursively expended from a directory passed
+This option only applies on files recursively expended from a directory passed
 on the command line. It does not apply on files that are explicitly listed. In
 particular, this option does not apply on files that are expended by a shell
 glob. It derives that this option is useless unless B<-R> is specified too.
@@ -719,16 +719,25 @@ of regular expressions.
 
 This is the default mode when B<--quote-regexp> is not specified.
 
-=item B<-G>, B<--global-match>
+=item B<-V>, B<--inverse-match>
 
-Apply the substitution given to the B<--substitute> command as many times as
-possible (this is the default).
+Inverse the behavior of the B<--grep> and B<--filter> commands (lines that
+would normally be dropped are kept and inversely).
+
+=item B<-N>, B<--normal-match>
+
+Give the default behavior to the B<--grep> and B<--filter> commands.
 
 =item B<-L>, B<--local-match>
 
 Apply the substitution given to the B<--substitute> command at most once per
 line.
 
+=item B<-G>, B<--global-match>
+
+Apply the substitution given to the B<--substitute> command as many times as
+possible (this is the default).
+
 =item B<-C> I<code>, B<--comparator>
 
 Specify a custom comparator to use with the B<--sort> command. This flag
@@ -814,7 +823,7 @@ commands. The default value I<perl> uses Perl built-in engine. Other values are
 (e.g. I<RE2>, I<PCRE>, I<TRE>, I<GNU>, etc.). The matching Perl module needs to
 be installed. Note that the name of the engine is case-sensitive.
 
-For the B<--substitute> command, only the pattern is affected by this option.
+For the B<--substitute> command, only the regex is affected by this option.
 The substitution still uses the Perl syntax to refer to matched group (e.g.
 B<$1>, etc.).
 
@@ -833,15 +842,6 @@ Print an error to the standard output when an error occurs in the Perl code
 provided to the B<--perl>, B<-n> and B<--filter> commands and continue the
 processing (this is the default).
 
-=item B<-V>, B<--inverse-match>
-
-Inverse the behavior of the B<--grep> and B<--filter> commands (lines that
-would normally be dropped are kept and inversely).
-
-=item B<-N>, B<--normal-match>
-
-Give the default behavior to the B<--grep> and B<--filter> commands.
-
 =back
 
 =head2 PERL ENVIRONMENT
@@ -855,6 +855,10 @@ While not directly executing Perl code, the B<--grep> and B<--substitute>
 commands also have access to the variables described below and those that are
 created by user supplied code.
 
+Note that the Perl environment is entirely reset between each file (and when
+encountering the B<--merge> command) unless the B<--preserve-perl-env> option
+was passed.
+
 =head3 B<$_>
 
 This variable is set to the current line being processed. In most context (but
@@ -939,16 +943,13 @@ files:
 
   ptp file1 file2 file3
 
-This example is similar to the built-in B<--nl> commands. It replaces each line
-with the output of the B<sprintf> function which, here, will prefix the line
-number to each line.
-
-That example also demonstrates that a variable can be re-used across the lines
-of an input (the B<$i> variable), but that it is reset between each input. Using
-the variables and functions described in L</PERL ENVIRONMENT> the argument to
-the B<-n> command could be rewritten C<spf "% 5d  %s", $n, $_>:
+This next example replaces each line with the output of the B<sprintf> function
+which, here, will prefix the line number to each line (similar to the B<--nl>
+command). This example also demonstrates that a variable can be re-used across
+the lines of an input (the B<$i> variable), but that it is reset between each
+input. Using the variables and functions described in L</PERL ENVIRONMENT>:
 
-  ptp file1 file2 -n 'sprintf("%5d  %s", ++$i, $_)'
+  ptp file1 file2 -n 'spf("% 5d  %s", ++$i, $_)'
 
 Same as the example above, but does not number empty lines (this is the default
 behavior of the GNU B<nl> util). Also this uses the B<pf> function that modifies
@@ -1011,6 +1012,12 @@ Setting this variable to B<1> means the the B<-Q> flag is in effect at the
 beginning of the parsing of the command line arguments. Setting the variable to
 B<0> gives the default behavior (as if B<-E> was passed).
 
+=item PTP_DEFAULT_INVERSE_MATCH
+
+Setting this variable to B<1> means that the B<-V> flag is in effect at the
+beginning of the parsing of the command line arguments. Setting the variable to
+B<0> gives the default behavior (as if B<-N> was passed).
+
 =item PTP_DEFAULT_LOCAL_MATCH
 
 Setting this variable to B<1> means the the B<-L> flag is in effect at the
@@ -1028,12 +1035,6 @@ Setting this variable to B<1> means that the B<-X> flag is in effect at the
 beginning of the parsing of the command line arguments. Setting the variable to
 B<0> gives the default behavior (as if B<-ignore-error> was passed).
 
-=item PTP_DEFAULT_INVERSE_MATCH
-
-Setting this variable to B<1> means that the B<-V> flag is in effect at the
-beginning of the parsing of the command line arguments. Setting the variable to
-B<0> gives the default behavior (as if B<-N> was passed).
-
 =item PTP_DEFAULT_SAFE
 
 Setting this variable to an integer value will set the default mode of executing
@@ -1060,7 +1061,7 @@ This program has been written by L<Mathias Kende|mailto:mathias@cpan.org>.
 
 =head1 LICENCE
 
-Copyright 2019 Mathias Kende
+Copyright 2019-2024 Mathias Kende
 
 This program is distributed under the MIT (X11) License:
 L<http://www.opensource.org/licenses/mit-license.php>