From 96749bcc79b757fd0cf626c213978d296ca0658b Mon Sep 17 00:00:00 2001 From: Mathias Kende Date: Sun, 29 Sep 2024 23:29:37 +0200 Subject: [PATCH] Add a cheat sheet POD and improve the main documentation. --- .aspelldict | 9 +- .gitignore | 3 + Changes | 3 + MANIFEST.SKIP | 2 + Makefile.PL | 18 +++ lib/App/PTP/Cheat_Sheet.pod | 218 ++++++++++++++++++++++++++++++++++++ script/ptp | 139 +++++++++++------------ 7 files changed, 322 insertions(+), 70 deletions(-) create mode 100644 lib/App/PTP/Cheat_Sheet.pod diff --git a/.aspelldict b/.aspelldict index b36b6af..00d57f1 100644 --- a/.aspelldict +++ b/.aspelldict @@ -1,4 +1,4 @@ -personal_ws-1.1 en 112 +personal_ws-1.1 en 119 AliasVar CMD CPAN @@ -13,6 +13,7 @@ LF MERCHANTABILITY MarkersArray NONINFRINGEMENT +NUL PCRE PODNAME PTP @@ -24,6 +25,8 @@ ProhibitMagicNumbers ProhibitNoWarnings ProhibitOneArgSelect ProhibitStringyEval +RO +RW ReadOnlyVar Readonly RequireArgUnpacking @@ -45,6 +48,8 @@ cmd cmp coderef comparator +cpanm +cpanminus csv dir dirs @@ -60,6 +65,7 @@ expressivity fh filepath fn +gcc globaluniqstr gu guniq @@ -99,6 +105,7 @@ sublicense subprocess subst substr +sudo tac tempfile tsv diff --git a/.gitignore b/.gitignore index d5cf41f..06bd7be 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ /App-PTP-*/ # End of the template. You can add custom content below this line. + +/ptp_cheat_sheet.html +/ptp_cheat_sheet.pdf diff --git a/Changes b/Changes index 450b702..418e6e3 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,8 @@ Revision history for Perl distribution App-PTP +1.16 - ?? + - Add a "cheat sheet" POD to the distribution. + 1.15 - 2024-09-29 - Remove a remaining smartmatch usage from our benchmarks. diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index e09576f..9802e09 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -32,3 +32,5 @@ .*\.bak # End of the template. You can add custom content below this line. + +^ptp_cheat_sheet\..*$ diff --git a/Makefile.PL b/Makefile.PL index db23740..2125e66 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -134,3 +134,21 @@ MAKE_FRAGMENT # add to the generated Makefile. # End of the template. You can add custom content below this line. + +sub postamble { + return <<"MAKE_FRAGMENT"; +cheat_sheet: ptp_cheat_sheet.html ptp_cheat_sheet.pdf + +ptp_cheat_sheet.pdf: lib/App/PTP/Cheat_Sheet.pod +\tpod2pdf --left-margin 30 --right-margin 30 --top-margin 36 --bottom-margin 36 --title="PTP Cheat Sheet" --noheader --nofooter \$< | pdfjam --landscape --nup 2x1 --outfile \$@ 2>/dev/null + +ptp_cheat_sheet.html: lib/App/PTP/Cheat_Sheet.pod +\tpod2html --noindex --title="PTP Cheat Sheet" \$< > \$@ + +clean_cheat_sheet: +\trm -f ptp_cheat_sheet.pdf ptp_cheat_sheet.html + +clean:: clean_cheat_sheet + +MAKE_FRAGMENT +} diff --git a/lib/App/PTP/Cheat_Sheet.pod b/lib/App/PTP/Cheat_Sheet.pod new file mode 100644 index 0000000..d05f4a9 --- /dev/null +++ b/lib/App/PTP/Cheat_Sheet.pod @@ -0,0 +1,218 @@ +=pod + +=head1 PTP Cheat Sheet + +=head2 SYNOPSIS + +Install with: + + sudo apt-get install perl cpanminus perl-doc build-essential + # or + sudo yum install perl Perl-App-cpanminus perl-doc gcc make + + sudo cpanm App::PTP -n -L /usr/local --man-pages --install-args \ + 'DESTINSTALLBIN=/usr/local/bin' + +Run with: + + ptp file1 file2 ... [--grep re] [--substitute re subst] ... [-o out] + +=head2 INPUT FILES + +Input files can appear anywhere on the command line and are processed in the +order in which they are given. + +=over 8 + +=item C> (anywhere in the command line, not starting with a C<->) + +=item C<-> (reads from stdin) + +=item C<-- F ...> (for any filename) + +=back + +=head2 PIPELINE COMMANDS + +Pipeline commands are applied, in order, to all the input files. + +=over 8 + +=item B<--g> I (B<--grep>), B<-s> I I (B<--substitute>) + +=item B<--p> I (B<-perl>): read and write C<$_> to modify the file + +=item B<-n> I: read from C<$_>, write the return values + +=item B<-f> I (B<--filter>): return I to keep the line + +=item B<-e> I (B<--execute>): execute once per input file + +=item B<-l> I (B<--load>): execute the given file, once per input file + +=item B<-M> I: load the given module + +=item B<--sort>, B<--ns> (B<--numeric-sort>), B<--ls>, (B<--locale-sort>), +B<--cs> I (B<--custom-sort>) + +=item B<-u> (B<--unique>), B<--gu> (B<--global-unique>) + +=item B<--head> [I], B<--tail> [I], B<--reverse> (B<--tac>), B<--shuffle> + +=item B<--eat>: discard the content of the file + +=item B<--ml> I (B<--mark-line>): set the marker for the line with the +return value + +=item B<--clear-markers>, B<--set-all-markers> + +=item B<--delete-marked>, B<--delete-before>, B<--delete-after>, +B<--delete-at-offset> I + +=item B<--insert-before> I, B<--insert-after> I, +B<--insert-at-offset> I I: insert interpolated text next to marked +lines (offset I<0> is just after) + +=item B<--cut> I,I,...: select fields according to B<-F> and concatenate +them with B<-P> + +=item B<--paste> I: paste with B<-P> line by line with the current content + +=item B<--pivot>: turn the file into a single line with B<-P> + +=item B<--anti-pivot>: split all lines according to B<-F> + +=item B<--transpose>: transpose lines and columns using B<-F> and B<-P> + +=item B<--nl> (B<--number-lines>), B<--pfn> (B<--prefix-file-name>) + +=item B<--fn> (B<--file-name>), B<--lc>, B<--line-count>: replace the content of +the file + +=item B<-m> (B<--merge>): merge all the files in a single one + +=item B<--tee> I: duplicate the output + +=item B<--shell> I: sends the content as input to the command + +=back + +=head2 PROGRAM BEHAVIOR + +Global option for the program execution. + +=over 8 + +=item B<-o> I (B<--output>), B<-a> I (B<--append>), +B<-i> (B<--in-place>): by default output to standard output + +=item B<-R>, B<--recursive>, B<--input-filter> I: expand directories, +optionally filter input files + +=item B<--input-encoding> I, B<--output-encoding> I: default +is UTF-8 + +=item B<--input-separator> I, B<--output-separator> I: +default is C<\n> + +=item B<--eol> (B<--preserve-input-separator>), B<--fix-final-separator> + +=item B<-0>: set B<--input-separator> to C and B<--output-separator> to the +empty string. + +=item B<--00>: set B<--output-separator> to the C, useful with C + +=item B<-h> (B<--help>), B<--version>: remember to have B installed + +=item B<-d> (B<--debug>), B<--abort> + +=item B<--preserve-perl-env>: keep environment across files + +=item B<--safe> [I]: default is I<0>, strictest is I<2> + +=back + +=head2 PIPELINE MODES + +Options for the pipeline commands coming after them. Most modes have a reverse +mode to return to the default. + +=over 8 + +=item B<-I> (B<--case-insensitive>), B<-S> (B<--case-sensitive>): mode for any +I argument, B<-S> is the default + +=item B<-Q> (B<--quote-regexp>), B<-E> (B<--end-quote-regexp>): disable +interpolation in any I, I, I or I argument, +B<-E> is the default + +=item B<-V> (B<--inverse-match>), B<-N> (B<--normal-match>): inverse behavior of +B<--grep> and B<--filter> + +=item B<-L> (B<--local-match>), B<-G> (B<--global-match>): apply B<--substitute> +once per line or as much as possible (this is the default) + +=item B<-C> I (B<--comparator>): for B<--sort>, default is C<$a cmp $b> + +=item B<-F> I (B<--input-field-spec>): how to split fields, default is +B<\s*,\*s|\t> + +=item B<-P> I (B<--output-field-spec>): how to paste fields, default is +a tab + +=item B<--default>, B<--bytes>, B<--csv>, B<--tsv>, B<--none>: set the B<-F> and +B<-P> flags + +=item B<--sq> I (B<--single-quote-replacement>), +B<--dq> I (B<--double-quote-replacement>), B<--ds> I +(B<--dollar-sigil-replacement>): replace the given character or string by C<'>, +C<">, or C<$> in all I arguments + +=item B<--re> I, B<--regex-engine>: use the specified regex engine (e.g. +I, I, I, I, etc.) if installed + +=item B<-X> (B<--fatal-error>), B<--ignore-error>: dies on error in B<--perl>, +B<-n> and B<--filter> + +=back + +=head2 PERL ENVIRONMENT + +Variables and functions available to I arguments as well as I, +I, I, and I ones (unless B<-Q> has been passed). + +=over 8 + +=item B<$_>: current line content (I) + +=item B<$f>, B<$F>: current file name, current absolute file name (I) + +=item B<$n>: current line number (same as standard B<$.>) (I) + +=item B<$N>: number of lines in the current file (I) + +=item B<$m>: marker of the current line (I) + +=item B<@m>: markers for all the lines, current line is at index 0 (I) + +=item B<$I>: 1-based index of the file being processed (I) + +=item B I[, I[, I<$var>]]: like C but returns C<''> +instead of C. + +=item B I[, I]: like C<$_ = sprintf I, I> + +=item B I[, I]: like C + +=back + +=head2 AUTHOR AND LICENCE + +Copyright 2019-2024 Mathias Kende (L). + +This program is distributed under the MIT (X11) License: +L + +See more in the full documentation at L. + +=cut diff --git a/script/ptp b/script/ptp index a7b2c1d..42cca4c 100755 --- a/script/ptp +++ b/script/ptp @@ -39,10 +39,10 @@ are described (and in more details) below, in the L section. =over 4 -=item B<-g> I, B<-s> I I +=item B<-g> I, B<-s> I I -Filter all the lines using the given pattern (inverted with B<-V> before the -B<-g> option), or replace all the match of the pattern by the given substitution +Filter all the lines using the given regex (inverted with B<-V> before the +B<-g> option), or replace all the match of the regex by the given substitution string. =item B<-p> I @@ -178,7 +178,7 @@ description of the affected commands. =over 8 -=item B<--g> I, B<--grep> +=item B<--g> I, B<--grep> Filter each input to keep only the lines that match the given regular expression. That expression cannot have delimiters (e.g. /foo/) so, if you @@ -192,12 +192,12 @@ This command is much faster then manually giving a match operation to the B<--filter> command, because the code does not need to be escaped. This operation can be made case-insensitive with the B<-I> flag, inverted with -B<-V> and the pattern can be interpreted as an exact string with B<-Q>. +B<-V> and the regex can be interpreted as an exact string with B<-Q>. -=item B<-s> I I, B<--substitute> +=item B<-s> I I, B<--substitute> Replace all matches of the given regular expression by the given substitution -pattern on each line of the input. The substitution string is evaluated like a +text on each line of the input. The substitution string is evaluated like a Perl string, so it can contain references to capture group in the regular expression using the B<$1>, B<$2>, etc. syntax. @@ -245,15 +245,6 @@ doing it. An error in the Perl code will result in a message printed to the standard output but the processing will continue. The current line will not be removed. -=item B<--ml> I, B<--mark-line> - -Execute the given code for each line of input (the current line is in the B<$_> -variable) and store the return value (usually a boolean) in the I of -the current line. - -The marker can then be accessed by other commands through the B<$m> variable or -used directly by the commands that operate on marked lines. - =item B<-e> I, B<--execute> Execute the given code. As other command, this will be executed once per input @@ -262,11 +253,6 @@ functions used in B<--perl> or B<-n> commands. Any error in the Perl code will terminate the execution of the program. -=item B<-M> I - -Load the given Perl module in the Perl environment. This option cannot be used -when B<--safe> is specified with level strictly greater than 0. - =item B<-l> I, B<--load> Same as B<--execute> except that it takes the code to execute from the given @@ -274,6 +260,11 @@ file. Any error in the Perl code will terminate the execution of the program. +=item B<-M> I + +Load the given Perl module in the Perl environment. This option cannot be used +when B<--safe> is specified with level strictly greater than 0. + =item B<--sort> Sort the content of the input using the default lexicographic order. Or the @@ -349,31 +340,48 @@ the content any-more (maybe you have sent it to another command with B<--shell>) but you cannot redirect the output (typically to get the output of that shell command). +=item B<--ml> I, B<--mark-line> + +Execute the given code for each line of input (the current line is in the B<$_> +variable) and store the return value (usually a boolean) in the I of +the current line. + +The marker can then be accessed by other commands through the B<$m> variable or +used directly by the commands that operate on marked lines. + +=item B<--clear-markers> + +Clear the marker of all the input lines. + +=item B<--set-all-markers> + +Set the marker of all the input lines. + =item B<--delete-marked> -Delete every line whose marker is currently set. See the B<--mark-line> command -for details on how to set the marker of a line. +Delete every line whose marker is currently set to a true value. See the +B<--mark-line> command for details on how to set the marker of a line. After this operation, no line has a marker set (they were all deleted). =item B<--delete-before> -Delete all the lines immediately preceding a line whose marker is set. The -markers of the lines that are not deleted are not changed. +Delete all the lines immediately preceding a line whose marker is set to a true +value. The markers of the lines that are not deleted are not changed. =item B<--delete-after> -Delete all the lines immediately following a line whose marker is set. The -markers of the lines that are not deleted are not changed. +Delete all the lines immediately following a line whose marker is set to a true +value. The markers of the lines that are not deleted are not changed. =item B<--delete-at-offset> I Delete all the lines situated at the given offset from a marked line. A positive offset means lines that are after the marked lines. -=item B<--insert-before> I +=item B<--insert-before> I Insert the given line of text immediately before each marked line. The given -I is treated as a quoted Perl string, so it can use any of the variable +I is treated as a quoted Perl string, so it can use any of the variable described in L. In particular, the B<$_> variable is set to the marked line before which the insertion is taking place. However this text is not a general Perl expression, so you may have to post-process with an other @@ -386,24 +394,16 @@ your shell before the argument is read by the program). The newly inserted lines have their markers unset. Other lines' markers are not changed. -=item B<--insert-after> I +=item B<--insert-after> I Same as B<--insert-before>, but the new line is inserted after the marked line. -=item B<--insert-at-offset> I I +=item B<--insert-at-offset> I I Generalized version of the B<--insert-before> and <--insert-after> commands. This commands insert the given text at the given offset relative to the marked line. Offset I<0> means inserting the line immediately after the marked line. -=item B<--clear-markers> - -Clear the marker of all the input lines. - -=item B<--set-all-markers> - -Set the marker of all the input lines. - =item B<--cut> I,I,... Select specific fields of each input line and replace the line content with @@ -566,7 +566,7 @@ returns a true value are kept. The complete file name is passed to the code in the default B<$_> variable. You can view this option in action in the L sections -This option applies only of files recursively expended from a directory passed +This option only applies on files recursively expended from a directory passed on the command line. It does not apply on files that are explicitly listed. In particular, this option does not apply on files that are expended by a shell glob. It derives that this option is useless unless B<-R> is specified too. @@ -719,16 +719,25 @@ of regular expressions. This is the default mode when B<--quote-regexp> is not specified. -=item B<-G>, B<--global-match> +=item B<-V>, B<--inverse-match> -Apply the substitution given to the B<--substitute> command as many times as -possible (this is the default). +Inverse the behavior of the B<--grep> and B<--filter> commands (lines that +would normally be dropped are kept and inversely). + +=item B<-N>, B<--normal-match> + +Give the default behavior to the B<--grep> and B<--filter> commands. =item B<-L>, B<--local-match> Apply the substitution given to the B<--substitute> command at most once per line. +=item B<-G>, B<--global-match> + +Apply the substitution given to the B<--substitute> command as many times as +possible (this is the default). + =item B<-C> I, B<--comparator> Specify a custom comparator to use with the B<--sort> command. This flag @@ -814,7 +823,7 @@ commands. The default value I uses Perl built-in engine. Other values are (e.g. I, I, I, I, etc.). The matching Perl module needs to be installed. Note that the name of the engine is case-sensitive. -For the B<--substitute> command, only the pattern is affected by this option. +For the B<--substitute> command, only the regex is affected by this option. The substitution still uses the Perl syntax to refer to matched group (e.g. B<$1>, etc.). @@ -833,15 +842,6 @@ Print an error to the standard output when an error occurs in the Perl code provided to the B<--perl>, B<-n> and B<--filter> commands and continue the processing (this is the default). -=item B<-V>, B<--inverse-match> - -Inverse the behavior of the B<--grep> and B<--filter> commands (lines that -would normally be dropped are kept and inversely). - -=item B<-N>, B<--normal-match> - -Give the default behavior to the B<--grep> and B<--filter> commands. - =back =head2 PERL ENVIRONMENT @@ -855,6 +855,10 @@ While not directly executing Perl code, the B<--grep> and B<--substitute> commands also have access to the variables described below and those that are created by user supplied code. +Note that the Perl environment is entirely reset between each file (and when +encountering the B<--merge> command) unless the B<--preserve-perl-env> option +was passed. + =head3 B<$_> This variable is set to the current line being processed. In most context (but @@ -939,16 +943,13 @@ files: ptp file1 file2 file3 -This example is similar to the built-in B<--nl> commands. It replaces each line -with the output of the B function which, here, will prefix the line -number to each line. - -That example also demonstrates that a variable can be re-used across the lines -of an input (the B<$i> variable), but that it is reset between each input. Using -the variables and functions described in L the argument to -the B<-n> command could be rewritten C: +This next example replaces each line with the output of the B function +which, here, will prefix the line number to each line (similar to the B<--nl> +command). This example also demonstrates that a variable can be re-used across +the lines of an input (the B<$i> variable), but that it is reset between each +input. Using the variables and functions described in L: - ptp file1 file2 -n 'sprintf("%5d %s", ++$i, $_)' + ptp file1 file2 -n 'spf("% 5d %s", ++$i, $_)' Same as the example above, but does not number empty lines (this is the default behavior of the GNU B util). Also this uses the B function that modifies @@ -1011,6 +1012,12 @@ Setting this variable to B<1> means the the B<-Q> flag is in effect at the beginning of the parsing of the command line arguments. Setting the variable to B<0> gives the default behavior (as if B<-E> was passed). +=item PTP_DEFAULT_INVERSE_MATCH + +Setting this variable to B<1> means that the B<-V> flag is in effect at the +beginning of the parsing of the command line arguments. Setting the variable to +B<0> gives the default behavior (as if B<-N> was passed). + =item PTP_DEFAULT_LOCAL_MATCH Setting this variable to B<1> means the the B<-L> flag is in effect at the @@ -1028,12 +1035,6 @@ Setting this variable to B<1> means that the B<-X> flag is in effect at the beginning of the parsing of the command line arguments. Setting the variable to B<0> gives the default behavior (as if B<-ignore-error> was passed). -=item PTP_DEFAULT_INVERSE_MATCH - -Setting this variable to B<1> means that the B<-V> flag is in effect at the -beginning of the parsing of the command line arguments. Setting the variable to -B<0> gives the default behavior (as if B<-N> was passed). - =item PTP_DEFAULT_SAFE Setting this variable to an integer value will set the default mode of executing @@ -1060,7 +1061,7 @@ This program has been written by L. =head1 LICENCE -Copyright 2019 Mathias Kende +Copyright 2019-2024 Mathias Kende This program is distributed under the MIT (X11) License: L