-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhera-disk-usage.xml
127 lines (119 loc) · 5.05 KB
/
hera-disk-usage.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE workflow [
<!ENTITY TOPDIR SYSTEM "./topdir.ent">
<!ENTITY OUTDIR "&TOPDIR;/out">
<!ENTITY DISK_USAGE_PROGRAM "&TOPDIR;/lustre-disk-usage/lustre-disk-usage.exe">
<!ENTITY REPORT_SCRIPT "&TOPDIR;/scripts/final_report.sh">
<!ENTITY XML_SCRIPT "&TOPDIR;/scripts/report_to_xml.pl">
<!ENTITY REPORT "&OUTDIR;/report.txt">
<!ENTITY DISK_AREAS "&TOPDIR;/disk-areas.lst">
<!ENTITY BIG_DISK_AREAS "&TOPDIR;/big-disk-areas.lst">
<!ENTITY DEADLINE "00:23:42:00">
<!ENTITY SCRUB_OFFSET "00:00:30:00">
<!ENTITY SCAN_OFFSET "00:00:30:00">
<!ENTITY DIRS SYSTEM "./dir-entity-list.ent">
<!ENTITY BIG_DIRS SYSTEM "./big-dir-entity-list.ent">
<!ENTITY ALL_DIRS SYSTEM "./all-dir-entity-list.ent">
<!ENTITY CYCLEDEF SYSTEM "./cycledef.ent">
<!ENTITY SEARCH_DAYS "0 1 2 3">
<!ENTITY ACCOUNT "zrtrr">
<!ENTITY RESTART_HANG_TIME "1500">
<!ENTITY RESTART_INTERVAL "120">
<!ENTITY TOPDIR_ENVAR "<envar><name>USAGE_MONITOR</name><value>&TOPDIR;</value></envar>">
]>
<workflow realtime="T" scheduler="slurm" taskthrottle="8" cyclethrottle="2" cyclelifespan="01:06:00:00">
<log>
<cyclestr>&OUTDIR;/@Y@m@d/log/workflow.log</cyclestr>
</log>
<cycledef>&CYCLEDEF;</cycledef>
<task name="scrub" maxtries="3">
<queue>batch</queue>
<walltime>00:15:00</walltime>
<account>&ACCOUNT;</account>
<cores>1</cores>
<command>set -x ; rm -rf <cyclestr offset="-2:00:00:00">&OUTDIR;/@Y@m@d</cyclestr> <cyclestr offset="-2:00:00:00">&OUTDIR;/@Y@m@d</cyclestr> <cyclestr offset="-2:00:00:00">&OUTDIR;/@Y@m@d</cyclestr></command>
<join><cyclestr>&OUTDIR;/@Y@m@d/log/scrub.log</cyclestr></join>
&TOPDIR_ENVAR;
<dependency>
<timedep><cyclestr offset="&SCRUB_OFFSET;">@Y@m@d@H@M@S</cyclestr></timedep>
</dependency>
</task>
<metatask name="du">
<var name="dir">&DIRS;</var>
<task name="#dir#" maxtries="15">
<queue>batch</queue>
<walltime>03:00:00</walltime>
<account>&ACCOUNT;</account>
<cores>1</cores>
<command><cyclestr>set -x ; ulimit -c 0 ; cd "&OUTDIR;/@Y@m@d" ; mkdir "#dir#" ; &DISK_USAGE_PROGRAM; -i -t "&RESTART_INTERVAL;" -d "#dir#.done" -r `echo /"#dir#" | sed s,--,/,g` "#dir#.rst.gz" "#dir#/" -s "#dir#-short.txt" -o "#dir#-full.txt"</cyclestr></command>
<join><cyclestr>&OUTDIR;/@Y@m@d/log/#dir#.log</cyclestr></join>
&TOPDIR_ENVAR;
<dependency>
<timedep><cyclestr offset="&SCAN_OFFSET;">@Y@m@d@H@M@S</cyclestr></timedep>
</dependency>
<hangdependency>
<or>
<not>
<datadep><cyclestr>&OUTDIR;/@Y@m@d/#dir#.rst.gz</cyclestr></datadep>
</not>
<datadep age="&RESTART_HANG_TIME;"><cyclestr>&OUTDIR;/@Y@m@d/#dir#.rst.gz</cyclestr></datadep>
</or>
</hangdependency>
</task>
</metatask>
<metatask name="big_du">
<var name="dir">&BIG_DIRS;</var>
<task name="big_du_#dir#" maxtries="15">
<queue>batch</queue>
<walltime>03:00:00</walltime>
<account>&ACCOUNT;</account>
<nodes>1:ppn=40</nodes>
<native>--exclusive</native>
<command><cyclestr>set -x ; ulimit -c 0 ; cd "&OUTDIR;/@Y@m@d" ; mkdir "#dir#" ; &DISK_USAGE_PROGRAM; -i -t "&RESTART_INTERVAL;" -d "#dir#.done" -r `echo /"#dir#" | sed s,--,/,g` "#dir#.rst.gz" "#dir#/" -s "#dir#-short.txt" -o "#dir#-full.txt"</cyclestr></command>
<join><cyclestr>&OUTDIR;/@Y@m@d/log/#dir#.log</cyclestr></join>
&TOPDIR_ENVAR;
<dependency>
<timedep><cyclestr offset="&SCAN_OFFSET;">@Y@m@d@H@M@S</cyclestr></timedep>
</dependency>
<hangdependency>
<or>
<not>
<datadep><cyclestr>&OUTDIR;/@Y@m@d/#dir#.rst.gz</cyclestr></datadep>
</not>
<datadep age="&RESTART_HANG_TIME;"><cyclestr>&OUTDIR;/@Y@m@d/#dir#.rst.gz</cyclestr></datadep>
</or>
</hangdependency>
</task>
</metatask>
<task name="report_at_completion" maxtries="3" final="T">
<partition>service</partition>
<walltime>00:05:00</walltime>
<account>&ACCOUNT;</account>
<cores>1</cores>
<command><cyclestr>set -x ; cd "&OUTDIR;" ; "&REPORT_SCRIPT;" @Y@m@d "&XML_SCRIPT;" "&SEARCH_DAYS;" &ALL_DIRS;</cyclestr></command>
<join><cyclestr>&OUTDIR;/@Y@m@d/log/report.log</cyclestr></join>
&TOPDIR_ENVAR;
<dependency>
<and>
<metataskdep metatask="du"></metataskdep>
<metataskdep metatask="big_du"></metataskdep>
<not><taskdep state="running" task="report_at_time"/></not>
</and>
</dependency>
</task>
<task name="report_at_time" maxtries="3">
<partition>service</partition>
<walltime>00:05:00</walltime>
<account>&ACCOUNT;</account>
<cores>1</cores>
<command><cyclestr>set -x ; cd "&OUTDIR;" ; "&REPORT_SCRIPT;" @Y@m@d "&XML_SCRIPT;" "&SEARCH_DAYS;" &ALL_DIRS;</cyclestr></command>
<join><cyclestr>&OUTDIR;/@Y@m@d/log/report.log</cyclestr></join>
&TOPDIR_ENVAR;
<dependency>
<and>
<timedep><cyclestr offset="&DEADLINE;">@Y@m@d@H@M@S</cyclestr></timedep>
<not><taskdep state="running" task="report_at_completion"/></not>
</and>
</dependency>
</task>
</workflow>