247
247
$opt_ignore_whitespace ,
248
248
$opt_ignore_case ,
249
249
$opt_ignore_case_ext ,
250
+ @opt_ignore_regex ,
250
251
$opt_follow_links ,
251
252
$opt_autoconf ,
252
253
$opt_sum_one ,
@@ -352,6 +353,7 @@ my $getopt_success = GetOptions( # {{{1
352
353
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
353
354
"ignore_case|ignore-case" => \$opt_ignore_case ,
354
355
"ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext ,
356
+ "ignore_regex|ignore-regex=s" => \@opt_ignore_regex ,
355
357
"follow_links|follow-links" => \$opt_follow_links ,
356
358
"autoconf" => \$opt_autoconf ,
357
359
"sum_one|sum-one" => \$opt_sum_one ,
@@ -456,6 +458,7 @@ load_from_config_file($config_file, # {{{2
456
458
\$opt_ignore_whitespace ,
457
459
\$opt_ignore_case ,
458
460
\$opt_ignore_case_ext ,
461
+ \@opt_ignore_regex ,
459
462
\$opt_follow_links ,
460
463
\$opt_autoconf ,
461
464
\$opt_sum_one ,
@@ -540,6 +543,7 @@ $opt_exclude_ext = "" unless $opt_exclude_ext;
540
543
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
541
544
$opt_ignore_case = 0 unless $opt_ignore_case;
542
545
$opt_ignore_case_ext = 0 unless $opt_ignore_case_ext;
546
+ my %ignore_regex = ();
543
547
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
544
548
$opt_follow_links = 0 unless $opt_follow_links;
545
549
if (defined $opt_diff_timeout) {
@@ -841,6 +845,8 @@ if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
841
845
}
842
846
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
843
847
\%Scale_Factor);
848
+ parse_ignore_regex(\@opt_ignore_regex, \%Filters_by_Language, \%ignore_regex)
849
+ if @opt_ignore_regex;
844
850
845
851
my $nCounted = 0;
846
852
@@ -1416,7 +1422,7 @@ if ( $max_processes == 0) {
1416
1422
# Multiprocessing is disabled
1417
1423
my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
1418
1424
\@files_removed_tot, \@file_pairs_tot,
1419
- 0, \%Language, \%Ignored);
1425
+ 0, \%Language, \%ignore_regex, \% Ignored);
1420
1426
%Results_by_File = %{$part->{'results_by_file'}};
1421
1427
%Results_by_Language= %{$part->{'results_by_language'}};
1422
1428
%Delta_by_File = %{$part->{'delta_by_file'}};
@@ -1504,7 +1510,7 @@ if ( $max_processes == 0) {
1504
1510
$pm->start() and next;
1505
1511
my $count_result = count_filesets ( $fset_a, $fset_b,
1506
1512
\@files_added_part, \@files_removed_part,
1507
- \@filepairs_part, 1, \%Language, \%Ignored );
1513
+ \@filepairs_part, 1, \%Language, \%ignore_regex, \% Ignored );
1508
1514
$pm->finish(0 , $count_result);
1509
1515
}
1510
1516
# Wait for processes to finish
@@ -1634,7 +1640,7 @@ my @sorted_files = sort keys %unique_source_file;
1634
1640
1635
1641
if ( $max_processes == 0) {
1636
1642
# Multiprocessing is disabled
1637
- my $part = count_files ( \@sorted_files , 0, \%Language);
1643
+ my $part = count_files ( \@sorted_files , 0, \%ignore_regex, \% Language);
1638
1644
%Results_by_File = %{$part->{'results_by_file'}};
1639
1645
%Results_by_Language= %{$part->{'results_by_language'}};
1640
1646
%Ignored = ( %Ignored, %{$part->{'ignored'}});
@@ -1677,7 +1683,7 @@ if ( $max_processes == 0) {
1677
1683
my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
1678
1684
while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
1679
1685
$pm->start() and next;
1680
- my $count_result = count_files ( \@part, 1, \%Language );
1686
+ my $count_result = count_files ( \@part, 1, \%ignore_regex, \% Language );
1681
1687
$pm->finish(0 , $count_result);
1682
1688
}
1683
1689
# Wait for processes to finish
@@ -1975,6 +1981,18 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
1975
1981
C++; this switch would count .C files as C rather
1976
1982
than C++ on *nix operating systems). File name
1977
1983
case insensitivity is always true on Windows.
1984
+ --ignore-regex Ignore lines in source files that match the given
1985
+ Perl regular expression for the given language(s).
1986
+ This option can be specified multiple times.
1987
+ Language names are comma separated and are followed
1988
+ by the pipe character and the regular expression.
1989
+ Use * to match all languages.
1990
+ Examples:
1991
+ --ignore-regex=\"C,Java,C++|^\\s*[{};]\\s*\$\"
1992
+ --ignore-regex=\"*|DEBUG|TEST\\s+ONLY\"
1993
+ These filters are applied after comments are
1994
+ removed. Use --strip-comments=EXT to create
1995
+ new files that show these filters applied.
1978
1996
--lang-no-ext=<lang> Count files without extensions using the <lang>
1979
1997
counter. This option overrides internal logic
1980
1998
for files without extensions (where such files
@@ -2188,6 +2206,7 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
2188
2206
--categorized=<file> Save file sizes in bytes, identified languages
2189
2207
and names of categorized files to <file>.
2190
2208
--counted=<file> Save names of processed source files to <file>.
2209
+ See also --found, --ignored, --unique.
2191
2210
--diff-alignment=<file> Write to <file> a list of files and file pairs
2192
2211
showing which files were added, removed, and/or
2193
2212
compared during a run with --diff. This switch
@@ -2198,9 +2217,11 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
2198
2217
regular expressions. An examination of the
2199
2218
source code may be needed for further explanation.
2200
2219
--help Print this usage information and exit.
2201
- --found=<file> Save names of every file found to <file>.
2220
+ --found=<file> Save names of every file found to <file>. See
2221
+ also --counted, --ignored, --unique.
2202
2222
--ignored=<file> Save names of ignored files and the reason they
2203
- were ignored to <file>.
2223
+ were ignored to <file>. See also --counted,
2224
+ --found, --unique.
2204
2225
--print-filter-stages Print processed source code before and after
2205
2226
each filter is applied.
2206
2227
--show-ext[=<ext>] Print information about all known (or just the
@@ -2209,6 +2230,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
2209
2230
given) languages and exit.
2210
2231
--show-os Print the value of the operating system mode
2211
2232
and exit. See also --unix, --windows.
2233
+ --unique=<file> Save names of unique files found to <file>. See
2234
+ also --counted, --found, --ignored.
2212
2235
-v[=<n>] Verbose switch (optional numeric value).
2213
2236
-verbose[=<n>] Long form of -v.
2214
2237
--version Print the version of this program and exit.
@@ -2558,7 +2581,7 @@ sub file_extension { # {{{1
2558
2581
}
2559
2582
} # 1}}}
2560
2583
sub count_files { # {{{1
2561
- my ($filelist, $counter_type, $language_hash) = @_;
2584
+ my ($filelist, $counter_type, $rha_ignore_regex, $ language_hash) = @_;
2562
2585
print "-> count_files()\n" if $opt_v > 2;
2563
2586
my @p_errors = ();
2564
2587
my %p_ignored = ();
@@ -2629,7 +2652,8 @@ sub count_files { # {{{1
2629
2652
} else {
2630
2653
($all_line_count,
2631
2654
$blank_count ,
2632
- $comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
2655
+ $comment_count ,) = call_counter($file, $Language{$file},
2656
+ $rha_ignore_regex, \@Errors);
2633
2657
$code_count = $all_line_count - $blank_count - $comment_count;
2634
2658
}
2635
2659
}
@@ -2666,6 +2690,7 @@ sub count_filesets { # {{{1
2666
2690
$file_pairs,
2667
2691
$counter_type,
2668
2692
$language_hash,
2693
+ $rha_ignore_regex,
2669
2694
$rh_Ignored) = @_;
2670
2695
print "-> count_filesets()\n" if $opt_v > 2;
2671
2696
my @p_errors = ();
@@ -2695,7 +2720,7 @@ sub count_filesets { # {{{1
2695
2720
my ($all_line_count,
2696
2721
$blank_count ,
2697
2722
$comment_count ,
2698
- ) = call_counter($file, $Lang, \@p_errors);
2723
+ ) = call_counter($file, $Lang, $rha_ignore_regex, \@p_errors);
2699
2724
$already_counted{$file} = 1;
2700
2725
my $code_count = $all_line_count-$blank_count-$comment_count;
2701
2726
if ($opt_by_file) {
@@ -2755,7 +2780,7 @@ sub count_filesets { # {{{1
2755
2780
my ($all_line_count,
2756
2781
$blank_count ,
2757
2782
$comment_count ,
2758
- ) = call_counter($f, $this_lang, \@p_errors);
2783
+ ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
2759
2784
$p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
2760
2785
$p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count;
2761
2786
$p_dbl{ $this_lang }{'code'}{'added'} +=
@@ -2795,7 +2820,7 @@ sub count_filesets { # {{{1
2795
2820
my ($all_line_count,
2796
2821
$blank_count ,
2797
2822
$comment_count ,
2798
- ) = call_counter($f, $this_lang, \@p_errors);
2823
+ ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
2799
2824
$p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
2800
2825
$p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count;
2801
2826
$p_dbl{ $this_lang}{'code'}{'removed'} +=
@@ -3057,18 +3082,18 @@ sub count_filesets { # {{{1
3057
3082
($all_line_count_L,
3058
3083
$blank_count_L ,
3059
3084
$comment_count_L ,
3060
- ) = call_counter($file_L, $Lang_L, \@Errors);
3085
+ ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);
3061
3086
3062
3087
($all_line_count_R,
3063
3088
$blank_count_R ,
3064
3089
$comment_count_R ,
3065
- ) = call_counter($file_R, $Lang_R, \@Errors);
3090
+ ) = call_counter($file_R, $Lang_R, $rha_ignore_regex, \@Errors);
3066
3091
} else {
3067
3092
# L and R file contents are identical, no need to diff
3068
3093
($all_line_count_L,
3069
3094
$blank_count_L ,
3070
3095
$comment_count_L ,
3071
- ) = call_counter($file_L, $Lang_L, \@Errors);
3096
+ ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);
3072
3097
$all_line_count_R = $all_line_count_L;
3073
3098
$blank_count_R = $blank_count_L ;
3074
3099
$comment_count_R = $comment_count_L ;
@@ -6872,9 +6897,10 @@ sub different_files { # {{{1
6872
6897
return @unique;
6873
6898
} # 1}}}
6874
6899
sub call_counter { # {{{1
6875
- my ($file , # in
6876
- $language , # in
6877
- $ra_Errors, # out
6900
+ my ($file , # in
6901
+ $language , # in
6902
+ $rha_ignore_regex , # in
6903
+ $ra_Errors , # out
6878
6904
) = @_;
6879
6905
6880
6906
# Logic: pass the file through the following filters:
@@ -6883,7 +6909,9 @@ sub call_counter { # {{{1
6883
6909
# 3. remove comments using each filter defined for this language
6884
6910
# (example: SQL has two, remove_starts_with(--) and
6885
6911
# remove_c_comments() )
6886
- # 4. compute comment lines as
6912
+ # 4. if ignore regex filters are defined, remove lines that
6913
+ # match any of them
6914
+ # 5. compute comment lines as
6887
6915
# total lines - blank lines - lines left over after all
6888
6916
# comment filters have been applied
6889
6917
@@ -6938,6 +6966,22 @@ sub call_counter { # {{{1
6938
6966
@lines = rm_comments(\@lines, $language, $file,
6939
6967
\%EOL_Continuation_re, $ra_Errors);
6940
6968
6969
+ if (%{$rha_ignore_regex} and defined($rha_ignore_regex->{$language})) {
6970
+ my @keep_lines = ();
6971
+ foreach my $line (@lines) {
6972
+ my $keep = 1;
6973
+ foreach my $regex (@{$rha_ignore_regex->{$language}}) {
6974
+ if ($line =~ m{$regex}) {
6975
+ print "reject '$line' in $file because of '$regex'\n" if $opt_v > 4;
6976
+ $keep = 0;
6977
+ last;
6978
+ }
6979
+ }
6980
+ push @keep_lines, $line if $keep;
6981
+ }
6982
+ @lines = @keep_lines;
6983
+ }
6984
+
6941
6985
my $comment_lines = $total_lines - $blank_lines - scalar @lines;
6942
6986
if ($opt_strip_comments) {
6943
6987
my $stripped_file = "";
@@ -14693,6 +14737,7 @@ sub load_from_config_file { # {{{1
14693
14737
$rs_ignore_whitespace ,
14694
14738
$rs_ignore_case ,
14695
14739
$rs_ignore_case_ext ,
14740
+ $ra_ignore_regex ,
14696
14741
$rs_follow_links ,
14697
14742
$rs_autoconf ,
14698
14743
$rs_sum_one ,
@@ -14801,6 +14846,7 @@ sub load_from_config_file { # {{{1
14801
14846
} elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1;
14802
14847
} elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1;
14803
14848
} elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1;
14849
+ } elsif (! @{$ra_ignore_regex} and /^(?:ignore_regex|ignore-regex)(=|\s+)['"]?(.*?)['"]?$/) { push @{$ra_ignore_regex}, $2;
14804
14850
} elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1;
14805
14851
} elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1;
14806
14852
} elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1;
@@ -15095,6 +15141,41 @@ sub print_format_n { # {{{1
15095
15141
return @prt_lines;
15096
15142
print "<- print_format_n()\n" if $opt_v > 2;
15097
15143
} # 1}}}
15144
+ sub parse_ignore_regex { # {{{1
15145
+ #
15146
+ # Convert the list of "language(s)|regex" into a hash
15147
+ # $ignore_regex{language} = [list of regex]
15148
+
15149
+ my ($ra_lang_regex , # in, as given on command line
15150
+ $rhaa_Filters_by_Language, # in, hash of filters by language
15151
+ $rha_ignore_regex) = @_;
15152
+ print "-> parse_ignore_regex()\n" if $opt_v > 2;
15153
+
15154
+ foreach my $lang_regex (@{$ra_lang_regex}) {
15155
+ die "Missing '|' character in --ignore-regex '$lang_regex'\n"
15156
+ unless $lang_regex =~ /\|/;
15157
+ my ($lang, $regex) = split(/\|/, $lang_regex, 2);
15158
+ die "Invalid --ignore-regex: $lang_regex\n"
15159
+ unless defined $lang and defined $regex;
15160
+ my @languages = split(/,/, $lang);
15161
+ foreach my $lang (@languages) {
15162
+ if ($lang eq '*') {
15163
+ foreach my $lang (keys %{$rhaa_Filters_by_Language}) {
15164
+ push @{$rha_ignore_regex->{$lang}}, $regex;
15165
+ }
15166
+ } else {
15167
+ die "Unknown language '$lang' in --ignore-regex '$lang_regex'\n"
15168
+ unless defined $rhaa_Filters_by_Language->{$lang};
15169
+ push @{$rha_ignore_regex->{$lang}}, $regex;
15170
+ }
15171
+ #print "lang=$lang regex=[$regex]\n";
15172
+ }
15173
+ }
15174
+ #use Data::Dumper;
15175
+ #print Dumper($rha_ignore_regex);
15176
+ print "<- parse_ignore_regex()\n" if $opt_v > 2;
15177
+ }
15178
+ # 1}}}
15098
15179
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
15099
15180
my %php_files = (); # really_is_php()
15100
15181
sub really_is_pascal { # {{{1
0 commit comments