-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot-data.sh
executable file
·183 lines (166 loc) · 7.27 KB
/
plot-data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/bin/bash
#
# use sed to format the data file
#
sed -f ./sed-cases ./rki-data/RKI-Fallzahlen_Kum_Tab-csv/Fälle-Todesfälle-gesamt.csv > ./my-data/data-cases.csv
sed -f ./sed-nowcasting ./rki-data/RKI-Nowcasting_Zahlen-csv/Nowcast_R.csv > ./my-data/data-nowcasting.csv
# we would do this:
# sed -f ./sed-tests ./rki-data/RKI-Testzahlen-gesamt-csv/1_Testzahlerfassung.csv > ./my-data/data-tests.csv
# but we have to convert the weeknr/year to proper date ranges for plotting (e.g. "50 2020" to "2020-12-14;2020-12-20")
# so we read the file ./rki-data/RKI-Testzahlen-gesamt-csv/1_Testzahlerfassung.csv line by line and
# process each line and use the funtion iso_week_num_to_date() on the first coumn
# the result is file ./my-data/data-tests.csv.tmp which is then processed with sed
# The data about the PCR tests is only given per week. I assume, RKI uses ISO week numbers for this information:
# * year 2020 ends with week number 53
# * year 2021 starts with week 01
# ISO week 53/2020 starts with Monday, 2020-12-28, and ends with Sunday, 2021-01-03.
# ISO week 01/2021 starts with Monday, 2021-01-04, and ends with Sunday, 2021-01-10.
# we use this function to convert weeknumber to date range
# e.g. "43/2020" to "2020-10-26;2020-11-01"
function iso_week_num_to_date() {
local week=${1} year=${2}
local dow_jan_4
local first_mon_in_year
local date_fmt="+%a %b %d %Y" # something like "So Apr 04 2021"
local date_fmt="+%Y-%m-%d" # something like "2021-08-29"
local mon sun
if ((week>53)) ; then
echo "maximal ISO week number is 53"
exit 1
fi
if ((week<1)) ; then
echo "minimal ISO week number is 1"
exit 2
fi
#echo "${week}/${year}"
# by definition, the 4th of January is in (ISO) week number 1
# ISO conform: %u day of week (1..7); 1 is Monday
dow_jan_4=$(/usr/bin/date -d ${year}-01-04 +%u)
if ((dow_jan_4==1)) ; then
# Jan 4 is a Monday and this the first Monday in the year
first_mon_in_year=${year}-01-04
mon=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 1)) week" "${date_fmt}")
sun=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 1)) week + 6 day" "${date_fmt}")
else
if ((dow_jan_4<4)) ; then
# the first Monday is in this year
first_mon_in_year=${year}-01-$((04 - dow_jan_4))
mon=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 1)) week" "${date_fmt}")
sun=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 1)) week + 6 day" "${date_fmt}")
else
# the first Monday is in the previous year
first_mon_in_year=${year}-01-$((04 + 7 - dow_jan_4 + 1))
mon=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 2)) week" "${date_fmt}")
sun=$(/usr/bin/date -d "${first_mon_in_year} +$((week - 2)) week + 6 day" "${date_fmt}")
fi
fi
#echo " first_mon_in_year is ${first_mon_in_year} ( $(/usr/bin/date -d ${first_mon_in_year} +%a) )"
echo "${mon};${sun}"
} # function iso_week_num_to_date() {
function format_test_data() {
# delete the temporary file ./my-data/data-tests.csv.tmp:
/bin/rm -f ./mydata/data-tests.csv.tmp
# initialize the line counter:
n=1
# Set semicolon as the delimiter:
delimiter=";"
# read the file line by line and process eacj line:
while read -r line; do
#echo "line ${n} is \"${line}\""
echo -n "processing line ${n}:"
n=$((n+1))
# replace '*' e.g. "11/2021*;" by "11/2021;"
line=${line/\*;/;}
# split the line at the delimiter
s=${line}${delimiter}
#echo " s is \"${s}\""
array=();
while [[ $s ]]; do
array+=( "${s%%"$delimiter"*}" );
s=${s#*"$delimiter"};
done;
#echo "array[0] is \"${array[0]}\""
first=${array[0]}
weeknr=1
year=2022
if [[ ${#first} == 7 ]] ; then
# we have something like "43/2020"
weeknr=${first:0:2}
year=${first:3:5}
fi
if [[ ${#first} == 6 ]] ; then
# we have something like "3/2021"
weeknr=${first:0:1}
year=${first:2:5}
fi
echo " weeknr = ${weeknr} ; year = ${year}"
firstnew=$(iso_week_num_to_date ${weeknr} ${year})
if [[ ${#first} > 5 ]] ; then
# we should have a valid week number and a valid date range
echo -n "${firstnew};" >> ./my-data/data-tests.csv.tmp
else
# no valid week number
echo -n ";;" >> ./my-data/data-tests.csv.tmp
fi
# simply append the original line
echo "${line}" >> ./my-data/data-tests.csv.tmp
# declare -p array
done < ./rki-data/RKI-Testzahlen-gesamt-csv/1_Testzahlerfassung.csv
sed -f ./sed-tests ./my-data/data-tests.csv.tmp > ./my-data/data-tests.csv
/bin/rm -f ./my-data/data-tests.csv.tmp
} # function format_test_data() {
function format_clinical_data() {
# delete the temporary file ./my-data/data-clinical.csv.tmp:
/bin/rm -f ./my-data/data-clinical.csv.tmp
# initialize the line counter:
n=1
# Set semicolon as the delimiter:
delimiter=";"
regexp_number='^[0-9]+$'
# read the file line by line and process eacj line:
while read -r line; do
#echo "line ${n} is \"${line}\""
echo -n "processing line ${n}:"
n=$((n+1))
# replace '*' e.g. "11/2021*;" by "11/2021;"
line=${line/\*;/;}
# split the line at the delimiter
s=${line}${delimiter}
#echo " s is \"${s}\""
array=();
while [[ $s ]]; do
array+=( "${s%%"$delimiter"*}" );
s=${s#*"$delimiter"};
done;
#echo "array[0] is \"${array[0]}\""
year=${array[0]}
weeknr=${array[1]}
firstnew="Mon;Sun"
echo " weeknr = ${weeknr} ; year = ${year}"
if [[ ${weeknr} =~ ${regexp_number} ]] ; then
# we should have a valid week number and a valid date range
firstnew=$(iso_week_num_to_date ${weeknr} ${year})
fi
echo -n "${firstnew};" >> ./my-data/data-clinical.csv.tmp
# simply append the original line
echo "${line}" >> ./my-data/data-clinical.csv.tmp
# declare -p array
done < ./rki-data/RKI-Klinische-Aspekte-csv/Klinische_Aspekte.csv
sed -f ./sed-clinical ./my-data/data-clinical.csv.tmp > ./my-data/data-clinical.csv
/bin/rm -f ./my-data/data-clinical.csv.tmp
} # function format_clinical_data() {
format_test_data
format_clinical_data
# The DIVI data comes in one file for all German states. So, we split it up:
for f in DEUTSCHLAND HAMBURG THUERINGEN SCHLESWIG_HOLSTEIN SACHSEN BADEN_WUERTTEMBERG SACHSEN_ANHALT BAYERN BERLIN MECKLENBURG_VORPOMMERN BREMEN NIEDERSACHSEN RHEINLAND_PFALZ SAARLAND NORDRHEIN_WESTFALEN HESSEN BRANDENBURG; do
echo "creating DIVI data for ${f} ..."
/bin/rm -f ./my-data/data-divi-${f}.csv.tmp
/usr/bin/head -1 ./rki-data/bundesland-zeitreihe.csv > ./my-data/data-divi-${f}.csv.tmp
/usr/bin/grep ${f} ./rki-data/bundesland-zeitreihe.csv >> ./my-data/data-divi-${f}.csv.tmp
/usr/bin/sed -f ./sed-divi ./my-data/data-divi-${f}.csv.tmp > ./my-data/data-divi-${f}.csv
/bin/rm -f ./my-data/data-divi-${f}.csv.tmp
done
#
# use gnuplot to plot some graphs
#
gnuplot plot-data.plt