-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.sh
executable file
·106 lines (95 loc) · 2.37 KB
/
stats.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
code_book=${1:-code.txt}
chinese_count=(〇 一 二 三 四 五 六 七 八 九 十)
# Summary
max_len=$(awk '{print length($1)}' $code_book | sort -un | tail -1)
printf "最大码长:$max_len "
for len in $(seq 1 $max_len); do
count[$len]=$(egrep "^[a-z;,./]{$len}[[:space:]]" $code_book | wc -l)
printf "${chinese_count[$len]}码:${count[$len]}字 "
done
dup_groups=$(grep -v '1$' $code_book | cut -f1,3 | uniq | wc -l)
dup_chars=$(grep -v '1$' $code_book | wc -l)
printf "重码:$dup_groups组$dup_chars字 "
for d in $(seq 2 20); do
dup_groups=$(grep $d $code_book | cut -f1,3 | uniq | wc -l)
if [[ $dup_groups -gt 0 ]]; then
printf "$d重$dup_groups组 "
fi
done
echo
# Per-key breakdown
keys="a b c d e f g h i j k l m n o p q r s t u v w x y z ; , . /"
show_header() {
echo
printf " "
for key in $keys; do
printf "%3s " $key
done
echo " 累计"
}
declare -A row
declare -A column_sum
add_row() {
row_sum=0
for key in $keys; do
count=${row[$key]}
printf "%4d" $count
row_sum=$((row_sum+count))
column_sum[$key]=$((column_sum[$key]+count))
done
printf "%5d" $row_sum
echo
}
clear_column_sum() {
for key in $keys; do
column_sum[$key]=0
done
}
add_column_sum() {
printf " 累计: "
row_sum=0
column_sum=$1
for key in $keys; do
printf "%4d" ${column_sum[$key]}
row_sum=$((row_sum+${column_sum[$key]}))
done
printf "%5d" $row_sum
echo
clear_column_sum
}
show_header
for len in $(seq 1 $max_len); do
printf "${chinese_count[$len]}码起始: "
for key in $keys; do
pattern=$key
if [[ $key == '.' ]]; then pattern="\\$key"; fi
count=$(egrep "^$pattern[a-z;,./]{$((len-1))}[[:space:]]" $code_book | wc -l)
row[$key]=$count
done
add_row
done
add_column_sum
show_header
for len in $(seq 1 $max_len); do
printf "编码${chinese_count[$len]}位: "
for key in $keys; do
pattern=$key
if [[ $key == '.' ]]; then pattern="\\$key"; fi
count=$(egrep "^[a-z;,./]{$((len-1))}$pattern" $code_book | wc -l)
row[$key]=$count
done
add_row
done
add_column_sum
show_header
for pos in $(seq 1 $max_len); do
printf "重码${chinese_count[$pos]}位: "
for key in $keys; do
pattern=$key
if [[ $key == '.' ]]; then pattern="\\$key"; fi
dup_count=$(grep -v '1$' $code_book | egrep "^.{$((pos-1))}$pattern" | wc -l)
row[$key]=$dup_count
done
add_row
done
add_column_sum