Skip to content

Commit e32d240

Browse files
committed
new switch --ignore-regex, #862, #865, #868
1 parent e35e611 commit e32d240

File tree

4 files changed

+214
-37
lines changed

4 files changed

+214
-37
lines changed

Unix/cloc

Lines changed: 99 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ my (
247247
$opt_ignore_whitespace ,
248248
$opt_ignore_case ,
249249
$opt_ignore_case_ext ,
250+
@opt_ignore_regex ,
250251
$opt_follow_links ,
251252
$opt_autoconf ,
252253
$opt_sum_one ,
@@ -352,6 +353,7 @@ my $getopt_success = GetOptions( # {{{1
352353
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
353354
"ignore_case|ignore-case" => \$opt_ignore_case ,
354355
"ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext ,
356+
"ignore_regex|ignore-regex=s" => \@opt_ignore_regex ,
355357
"follow_links|follow-links" => \$opt_follow_links ,
356358
"autoconf" => \$opt_autoconf ,
357359
"sum_one|sum-one" => \$opt_sum_one ,
@@ -456,6 +458,7 @@ load_from_config_file($config_file, # {{{2
456458
\$opt_ignore_whitespace ,
457459
\$opt_ignore_case ,
458460
\$opt_ignore_case_ext ,
461+
\@opt_ignore_regex ,
459462
\$opt_follow_links ,
460463
\$opt_autoconf ,
461464
\$opt_sum_one ,
@@ -540,6 +543,7 @@ $opt_exclude_ext = "" unless $opt_exclude_ext;
540543
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
541544
$opt_ignore_case = 0 unless $opt_ignore_case;
542545
$opt_ignore_case_ext = 0 unless $opt_ignore_case_ext;
546+
my %ignore_regex = ();
543547
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
544548
$opt_follow_links = 0 unless $opt_follow_links;
545549
if (defined $opt_diff_timeout) {
@@ -841,6 +845,8 @@ if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
841845
}
842846
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
843847
\%Scale_Factor);
848+
parse_ignore_regex(\@opt_ignore_regex, \%Filters_by_Language, \%ignore_regex)
849+
if @opt_ignore_regex;
844850

845851
my $nCounted = 0;
846852

@@ -1416,7 +1422,7 @@ if ( $max_processes == 0) {
14161422
# Multiprocessing is disabled
14171423
my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
14181424
\@files_removed_tot, \@file_pairs_tot,
1419-
0, \%Language, \%Ignored);
1425+
0, \%Language, \%ignore_regex, \%Ignored);
14201426
%Results_by_File = %{$part->{'results_by_file'}};
14211427
%Results_by_Language= %{$part->{'results_by_language'}};
14221428
%Delta_by_File = %{$part->{'delta_by_file'}};
@@ -1504,7 +1510,7 @@ if ( $max_processes == 0) {
15041510
$pm->start() and next;
15051511
my $count_result = count_filesets ( $fset_a, $fset_b,
15061512
\@files_added_part, \@files_removed_part,
1507-
\@filepairs_part, 1, \%Language, \%Ignored );
1513+
\@filepairs_part, 1, \%Language, \%ignore_regex, \%Ignored );
15081514
$pm->finish(0 , $count_result);
15091515
}
15101516
# Wait for processes to finish
@@ -1634,7 +1640,7 @@ my @sorted_files = sort keys %unique_source_file;
16341640

16351641
if ( $max_processes == 0) {
16361642
# Multiprocessing is disabled
1637-
my $part = count_files ( \@sorted_files , 0, \%Language);
1643+
my $part = count_files ( \@sorted_files , 0, \%ignore_regex, \%Language);
16381644
%Results_by_File = %{$part->{'results_by_file'}};
16391645
%Results_by_Language= %{$part->{'results_by_language'}};
16401646
%Ignored = ( %Ignored, %{$part->{'ignored'}});
@@ -1677,7 +1683,7 @@ if ( $max_processes == 0) {
16771683
my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
16781684
while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
16791685
$pm->start() and next;
1680-
my $count_result = count_files ( \@part, 1, \%Language );
1686+
my $count_result = count_files ( \@part, 1, \%ignore_regex, \%Language );
16811687
$pm->finish(0 , $count_result);
16821688
}
16831689
# Wait for processes to finish
@@ -1975,6 +1981,18 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
19751981
C++; this switch would count .C files as C rather
19761982
than C++ on *nix operating systems). File name
19771983
case insensitivity is always true on Windows.
1984+
--ignore-regex Ignore lines in source files that match the given
1985+
Perl regular expression for the given language(s).
1986+
This option can be specified multiple times.
1987+
Language names are comma separated and are followed
1988+
by the pipe character and the regular expression.
1989+
Use * to match all languages.
1990+
Examples:
1991+
--ignore-regex=\"C,Java,C++|^\\s*[{};]\\s*\$\"
1992+
--ignore-regex=\"*|DEBUG|TEST\\s+ONLY\"
1993+
These filters are applied after comments are
1994+
removed. Use --strip-comments=EXT to create
1995+
new files that show these filters applied.
19781996
--lang-no-ext=<lang> Count files without extensions using the <lang>
19791997
counter. This option overrides internal logic
19801998
for files without extensions (where such files
@@ -2188,6 +2206,7 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
21882206
--categorized=<file> Save file sizes in bytes, identified languages
21892207
and names of categorized files to <file>.
21902208
--counted=<file> Save names of processed source files to <file>.
2209+
See also --found, --ignored, --unique.
21912210
--diff-alignment=<file> Write to <file> a list of files and file pairs
21922211
showing which files were added, removed, and/or
21932212
compared during a run with --diff. This switch
@@ -2198,9 +2217,11 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
21982217
regular expressions. An examination of the
21992218
source code may be needed for further explanation.
22002219
--help Print this usage information and exit.
2201-
--found=<file> Save names of every file found to <file>.
2220+
--found=<file> Save names of every file found to <file>. See
2221+
also --counted, --ignored, --unique.
22022222
--ignored=<file> Save names of ignored files and the reason they
2203-
were ignored to <file>.
2223+
were ignored to <file>. See also --counted,
2224+
--found, --unique.
22042225
--print-filter-stages Print processed source code before and after
22052226
each filter is applied.
22062227
--show-ext[=<ext>] Print information about all known (or just the
@@ -2209,6 +2230,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
22092230
given) languages and exit.
22102231
--show-os Print the value of the operating system mode
22112232
and exit. See also --unix, --windows.
2233+
--unique=<file> Save names of unique files found to <file>. See
2234+
also --counted, --found, --ignored.
22122235
-v[=<n>] Verbose switch (optional numeric value).
22132236
-verbose[=<n>] Long form of -v.
22142237
--version Print the version of this program and exit.
@@ -2558,7 +2581,7 @@ sub file_extension { # {{{1
25582581
}
25592582
} # 1}}}
25602583
sub count_files { # {{{1
2561-
my ($filelist, $counter_type, $language_hash) = @_;
2584+
my ($filelist, $counter_type, $rha_ignore_regex, $language_hash) = @_;
25622585
print "-> count_files()\n" if $opt_v > 2;
25632586
my @p_errors = ();
25642587
my %p_ignored = ();
@@ -2629,7 +2652,8 @@ sub count_files { # {{{1
26292652
} else {
26302653
($all_line_count,
26312654
$blank_count ,
2632-
$comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
2655+
$comment_count ,) = call_counter($file, $Language{$file},
2656+
$rha_ignore_regex, \@Errors);
26332657
$code_count = $all_line_count - $blank_count - $comment_count;
26342658
}
26352659
}
@@ -2666,6 +2690,7 @@ sub count_filesets { # {{{1
26662690
$file_pairs,
26672691
$counter_type,
26682692
$language_hash,
2693+
$rha_ignore_regex,
26692694
$rh_Ignored) = @_;
26702695
print "-> count_filesets()\n" if $opt_v > 2;
26712696
my @p_errors = ();
@@ -2695,7 +2720,7 @@ sub count_filesets { # {{{1
26952720
my ($all_line_count,
26962721
$blank_count ,
26972722
$comment_count ,
2698-
) = call_counter($file, $Lang, \@p_errors);
2723+
) = call_counter($file, $Lang, $rha_ignore_regex, \@p_errors);
26992724
$already_counted{$file} = 1;
27002725
my $code_count = $all_line_count-$blank_count-$comment_count;
27012726
if ($opt_by_file) {
@@ -2755,7 +2780,7 @@ sub count_filesets { # {{{1
27552780
my ($all_line_count,
27562781
$blank_count ,
27572782
$comment_count ,
2758-
) = call_counter($f, $this_lang, \@p_errors);
2783+
) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
27592784
$p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
27602785
$p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count;
27612786
$p_dbl{ $this_lang }{'code'}{'added'} +=
@@ -2795,7 +2820,7 @@ sub count_filesets { # {{{1
27952820
my ($all_line_count,
27962821
$blank_count ,
27972822
$comment_count ,
2798-
) = call_counter($f, $this_lang, \@p_errors);
2823+
) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
27992824
$p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
28002825
$p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count;
28012826
$p_dbl{ $this_lang}{'code'}{'removed'} +=
@@ -3057,18 +3082,18 @@ sub count_filesets { # {{{1
30573082
($all_line_count_L,
30583083
$blank_count_L ,
30593084
$comment_count_L ,
3060-
) = call_counter($file_L, $Lang_L, \@Errors);
3085+
) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);
30613086

30623087
($all_line_count_R,
30633088
$blank_count_R ,
30643089
$comment_count_R ,
3065-
) = call_counter($file_R, $Lang_R, \@Errors);
3090+
) = call_counter($file_R, $Lang_R, $rha_ignore_regex, \@Errors);
30663091
} else {
30673092
# L and R file contents are identical, no need to diff
30683093
($all_line_count_L,
30693094
$blank_count_L ,
30703095
$comment_count_L ,
3071-
) = call_counter($file_L, $Lang_L, \@Errors);
3096+
) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);
30723097
$all_line_count_R = $all_line_count_L;
30733098
$blank_count_R = $blank_count_L ;
30743099
$comment_count_R = $comment_count_L ;
@@ -6872,9 +6897,10 @@ sub different_files { # {{{1
68726897
return @unique;
68736898
} # 1}}}
68746899
sub call_counter { # {{{1
6875-
my ($file , # in
6876-
$language , # in
6877-
$ra_Errors, # out
6900+
my ($file , # in
6901+
$language , # in
6902+
$rha_ignore_regex , # in
6903+
$ra_Errors , # out
68786904
) = @_;
68796905

68806906
# Logic: pass the file through the following filters:
@@ -6883,7 +6909,9 @@ sub call_counter { # {{{1
68836909
# 3. remove comments using each filter defined for this language
68846910
# (example: SQL has two, remove_starts_with(--) and
68856911
# remove_c_comments() )
6886-
# 4. compute comment lines as
6912+
# 4. if ignore regex filters are defined, remove lines that
6913+
# match any of them
6914+
# 5. compute comment lines as
68876915
# total lines - blank lines - lines left over after all
68886916
# comment filters have been applied
68896917

@@ -6938,6 +6966,22 @@ sub call_counter { # {{{1
69386966
@lines = rm_comments(\@lines, $language, $file,
69396967
\%EOL_Continuation_re, $ra_Errors);
69406968

6969+
if (%{$rha_ignore_regex} and defined($rha_ignore_regex->{$language})) {
6970+
my @keep_lines = ();
6971+
foreach my $line (@lines) {
6972+
my $keep = 1;
6973+
foreach my $regex (@{$rha_ignore_regex->{$language}}) {
6974+
if ($line =~ m{$regex}) {
6975+
print "reject '$line' in $file because of '$regex'\n" if $opt_v > 4;
6976+
$keep = 0;
6977+
last;
6978+
}
6979+
}
6980+
push @keep_lines, $line if $keep;
6981+
}
6982+
@lines = @keep_lines;
6983+
}
6984+
69416985
my $comment_lines = $total_lines - $blank_lines - scalar @lines;
69426986
if ($opt_strip_comments) {
69436987
my $stripped_file = "";
@@ -14693,6 +14737,7 @@ sub load_from_config_file { # {{{1
1469314737
$rs_ignore_whitespace ,
1469414738
$rs_ignore_case ,
1469514739
$rs_ignore_case_ext ,
14740+
$ra_ignore_regex ,
1469614741
$rs_follow_links ,
1469714742
$rs_autoconf ,
1469814743
$rs_sum_one ,
@@ -14801,6 +14846,7 @@ sub load_from_config_file { # {{{1
1480114846
} elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1;
1480214847
} elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1;
1480314848
} elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1;
14849+
} elsif (! @{$ra_ignore_regex} and /^(?:ignore_regex|ignore-regex)(=|\s+)['"]?(.*?)['"]?$/) { push @{$ra_ignore_regex}, $2;
1480414850
} elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1;
1480514851
} elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1;
1480614852
} elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1;
@@ -15095,6 +15141,41 @@ sub print_format_n { # {{{1
1509515141
return @prt_lines;
1509615142
print "<- print_format_n()\n" if $opt_v > 2;
1509715143
} # 1}}}
15144+
sub parse_ignore_regex { # {{{1
15145+
#
15146+
# Convert the list of "language(s)|regex" into a hash
15147+
# $ignore_regex{language} = [list of regex]
15148+
15149+
my ($ra_lang_regex , # in, as given on command line
15150+
$rhaa_Filters_by_Language, # in, hash of filters by language
15151+
$rha_ignore_regex) = @_;
15152+
print "-> parse_ignore_regex()\n" if $opt_v > 2;
15153+
15154+
foreach my $lang_regex (@{$ra_lang_regex}) {
15155+
die "Missing '|' character in --ignore-regex '$lang_regex'\n"
15156+
unless $lang_regex =~ /\|/;
15157+
my ($lang, $regex) = split(/\|/, $lang_regex, 2);
15158+
die "Invalid --ignore-regex: $lang_regex\n"
15159+
unless defined $lang and defined $regex;
15160+
my @languages = split(/,/, $lang);
15161+
foreach my $lang (@languages) {
15162+
if ($lang eq '*') {
15163+
foreach my $lang (keys %{$rhaa_Filters_by_Language}) {
15164+
push @{$rha_ignore_regex->{$lang}}, $regex;
15165+
}
15166+
} else {
15167+
die "Unknown language '$lang' in --ignore-regex '$lang_regex'\n"
15168+
unless defined $rhaa_Filters_by_Language->{$lang};
15169+
push @{$rha_ignore_regex->{$lang}}, $regex;
15170+
}
15171+
#print "lang=$lang regex=[$regex]\n";
15172+
}
15173+
}
15174+
#use Data::Dumper;
15175+
#print Dumper($rha_ignore_regex);
15176+
print "<- parse_ignore_regex()\n" if $opt_v > 2;
15177+
}
15178+
# 1}}}
1509815179
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
1509915180
my %php_files = (); # really_is_php()
1510015181
sub really_is_pascal { # {{{1

Unix/cloc.1.pod

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,7 @@ names of categorized files to FILE.
614614
=item B<--counted=FILE>
615615

616616
Save names of processed source files to FILE.
617+
See also B<--found>, B<--ignored>, B<--unique>.
617618

618619
=item B<--diff-alignment=FILE>
619620

@@ -636,11 +637,13 @@ Print cloc's internal usage information and exit.
636637

637638
=item B<--found=FILE>
638639

639-
Save names of every file found to FILE.
640+
Save names of every file found to FILE. See also B<--counted>,
641+
B<--ignored>, B<--unique>.
640642

641643
=item B<--ignored=FILE>
642644

643645
Save names of ignored files and the reason they were ignored to FILE.
646+
See also B<--counted>, B<--found>, B<--unique>.
644647

645648
=item B<--print-filter-stages>
646649

@@ -662,6 +665,11 @@ exit.
662665
Print the value of the operating system mode and exit. See also
663666
B<--unix>, B<--windows>.
664667

668+
=item B<--unique=FILE>
669+
670+
Save names of ignored files and the reason they were ignored to FILE.
671+
See also B<--counted>, B<--found>, B<--ignored>.
672+
665673
=item B<-v[=N]>
666674

667675
Turn on verbose with optional numeric value.

Unix/t/01_opts.t

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,13 @@ my @Tests = (
865865
'ref' => '../tests/outputs/issues/851/results.yaml',
866866
},
867867

868+
{
869+
'name' => '--ignore-regex (github issues #862, #865, #868)',
870+
'cd' => '../tests/inputs/issues/862',
871+
'args' => '--ignore-regex="C,Fortran 77|^\\s*([{};]|END)\\s*\$" *.f *.c',
872+
'ref' => '../tests/outputs/issues/862/results.yaml',
873+
},
874+
868875
);
869876

870877
# Special cases:

0 commit comments

Comments
 (0)