-
Notifications
You must be signed in to change notification settings - Fork 2
/
run.sh
executable file
·246 lines (233 loc) · 8.3 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#!/bin/bash
#!/usr/bin/env sh
#
# \description Parallel execution of the benchmarking
#
# \author Artem V L <artem@exascale.info> https://exascale.info
#
# Note: Uses the benchmarking is based on the GNU Parallel, paper:
# O. Tange (2011): GNU Parallel - The Command-Line Power Tool, USENIX Magazine, February 2011:42-47.
FREEMEM="8G" # >= 8G for youtube; 5%
OUTP="res/algs.res"
GRAMDIR="res/gram"
##METRIC=cosine # cosine, jaccard, hamming
METRICS="cosine jaccard" # "cosine jaccard hamming" # jacnor/nopjac/norjac
METRMARK=([cosine]=c [jaccard]=j [hamming]=h [jacnop]=p) # Note: spaces are significant; jacnor/nopjac/norjac
#METRICS=cosine
EMBDIMS=128
CLSDIMS='' # Disable
#ROOTDIMS=''
BINARIZE=''
# Algorithms based on the cosine similarity metric-based optimization dimensions building
ALGORITHMS="Deepwalk GraRep harp-deepwalk harp-line HOPE LINE12 netmf Node2vec Verse" # NetHash, nodesketch SK_ANH sketch_o1 daoc-gr:=1
# nodesketch, SK_ANH, sketch_o1: uint16 (0 .. 2^16) !!
# Algorithms based on the hamming distance metric-based optimization dimensions building
#ALGORITHMS_HAMMING="LTH_INHMF LTH_ITQ LTH_SGH LTH_SH NetHash, nodesketch, SK_ANH, sketch_o1"
# Types> LTH_INHMF: int16 (-1, 1); LTH_ITQ/SGH/SH: uint8(0, 1);
#ALGORITHMS="GraRep"
GRAPHS="blogcatalog dblp homo wiki" # youtube
# blogcatalog=blog, wiki=pos, homo=ppi
#GRAPHS=blogcatalog
GRAM=0 # The number of instances for the GRAM matrices evaluation
# Max swappiness, should be 1..10 (low swappiness to hold most of data in RAM)
MAX_SWAP=5
USAGE="$0 -h | [-o <output>=res/algs.res] [-m \"{`echo $METRICS | tr ' ' ','`} \"+] [-a \"{`echo $ALGORITHMS | tr ' ' ','`} \"+] [-g \"{`echo $GRAPHS | tr ' ' ','`} \"+] [--gram <number>] [-e <embdims>=${EMBDIMS}] [--force-dims] [-f <min_available_RAM>]
-o,--output - output file for the aggregated results on evaluation, execution logs are stored in the same dir. Default: $OUTP
-m,--metrics - metrics used for the gram matrix construction. Default: \"$METRICS\"
-b,--binarize - binarize embedding by the mean square error
-a,--algorithms - evaluationg algorithms. Default: \"$ALGORITHMS\"
-g,--graphs - input graphs (networks) specified by the adjacency matrix in the .mat format. Default: \"$GRAPHS\"
--gram - evaluate gram matrices for the specified number of embeddings instead of the embeddings accuracy, 0 disables the gram mode
-e,--emb-dims - the number of dimensions in the input embeddings or any identifier to select the embeddings filename
and load from the dir embs<dims>. Default: $EMBDIMS
--force-dims - force the number of cluster-based dimensions to the specified --emb-dims bounding with [root-cls, total-cls].
The out of range value is adjusted to the actual bound, so <=1 means output only the root clusters as dimensions. Actual only for the NVC format
-f,--free-mem - limit the minimal amount of the available RAM to start subsequent job. Default: $FREEMEM
-h,--help - help, show this usage description
Examples:
\$ $0 -a \"daoc-g=1 daoc-gr:=1\" --force-dims --gram 5
\$ $0 -f 8.5G -o res/algs.res -m cosine -a Deepwalk -g 'dblp wiki'
\$ $0 -m jaccard -a 'daoc-gr:=1' --force-dims -e 128 -g 'blogcatalog dblp homo wiki' --gram
"
# -d,--default - execute everithing with default arguments
if [ $# -lt 1 ]; then
echo -e "Usage: $USAGE" # -e to interpret correctly '\n'
exit 1
fi
while [ $1 ]; do
case $1 in
-h|--help)
# Use defaults for the remained parameters
echo -e $USAGE # -e to interpret '\n'
exit 0
;;
# -d|--default)
# # Use defaults for the remained parameters
# break
# ;;
-o|--output)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
OUTP=$2
echo "Set $1: $2"
shift 2
;;
-m|--metrics)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
METRICS=$2
echo "Set $1: $2"
shift 2
;;
-b|--binarize)
BINARIZE=$1
echo "Set BINARIZE: $BINARIZE"
shift
;;
-a|--algorithms)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
ALGORITHMS=$2
echo "Set $1: $2"
shift 2
;;
-g|--graphs)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
GRAPHS=$2
echo "Set $1: $2"
shift 2
;;
--gram)
if [ "${2::1}" = "-" ] || [ $2 -lt 0 ] || [ $? -ne 0 ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
GRAM=$2
if [ $GRAM -lt 0 ]; then
echo "ERROR, invalid argument value of $1: $2 (positive integer is expected)"
exit 1
fi
echo "Set $1: $2" # GRAPHS are used only to identify embedding file names
shift 2
;;
-e|--emb-dims)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
EMBDIMS=$2
echo "Set $1: $2"
shift 2
;;
--force-dims)
CLSDIMS=1
echo "Set $1"
shift
;;
-f|--free-mem)
if [ "${2::1}" = "-" ]; then
echo "ERROR, invalid argument value of $1: $2"
exit 1
fi
FREEMEM=$2
echo "Set $1: $2"
shift 2
;;
# -*)
# printf "Error: Invalid option specified.\n\n$USAGE"
# exit 1
# ;;
*)
printf "Error: Invalid option specified: $1 $2 ...\n\n$USAGE"
exit 1
;;
esac
done
OUTDIR="$(dirname "$OUTP")" # Output directory for the executable package
mkdir -p "$OUTDIR"
#EXECLOG="$(echo "$OUTP" | cut -f 1 -d '.').log" # Get first file name in the directory
EXECLOG="${OUTDIR}/algs.log"
echo "ALGORITHMS: $ALGORITHMS"
echo "GRAPHS: $GRAPHS"
echo "EMBDIMS: $EMBDIMS"
#echo "CLSDIMS: $CLSDIMS"
echo "EXECLOG: $EXECLOG"
if [ `cat /proc/sys/vm/swappiness` -gt $MAX_SWAP ]
then
echo "Setting vm.swappiness to $MAX_SWAP (Ctrl+C to omit)..."
sudo sysctl -w vm.swappiness=$MAX_SWAP
fi
if [ "$LC_ALL" = '' ] # Note: "" = '' => True
then
export LC_ALL="en_US.UTF-8"
export LC_CTYPE="en_US.UTF-8"
export LANGUAGE="en_US.UTF-8"
fi
# Check exictence of the requirements
EXECUTOR=python3 # pypy3
EXECUTORX="$EXECUTOR -O" # Executor with options
UTILS="free sed bc parallel ${EXECUTOR}" # awk
for UT in $UTILS; do
$UT --version
ERR=$?
if [ $ERR -ne 0 ]; then
echo "ERROR, $UT utility is required to be installed, errcode: $ERR"
exit $ERR
fi
done
#TOTMEM=`free | sed -rn 's;^Mem:\s+([0-9]+).*;\1;p'`
if [ "${FREEMEM:(-1)}" = "%" ]; then
# Remove the percent sign and evaluate the absolute value from the available RAM
#FREEMEM=${FREEMEM/%%/}
#FREEMEM=${FREEMEM::-1}
#FREEMEM=`free | awk '/^Mem:/{print $2"*1/100"}' | bc` # - total amount of memory (1%); 10G
# free -m | grep -m 1 -oP '\d+' | head -n 1
FREEMEM=`free | sed -rn "s;^Mem:\s+([0-9]+).*;\1*${FREEMEM::-1}/100;p" | bc`
#FREEMEM=`echo "${TOTMEM}*${FREEMEM::-1}/100" | bc`
fi
echo "FREEMEM: $FREEMEM"
## Ensure that the specified number is lower than the total RAM
#case ${FREEMEM:-1} in
#G|g)
## Use defaults for the remained parameters
#FREEMEM=`echo ${FREEMEM::-1}*1024*1024*1024 | bc`
#;;
#M|m)
#FREEMEM=`echo ${FREEMEM::-1}*1024*1024 | bc`
#;;
#K|k)
#FREEMEM=`echo ${FREEMEM::-1}*1024 | bc`
#;;
#esac
#if [ $FREEMEM -lt $TOTMEM ]; then
#echo 'The specified FREEMEM ($FREEMEM) is larger that the total available RAM ($TOTMEM)'
#exit 1
#fi
# Set CLSDIMS to EMBDIMS if required
SUFDIMS=''
if [ "$CLSDIMS" != "" ]; then
CLSDIMS="-d $EMBDIMS"
SUFDIMS="-d${EMBDIMS}"
fi
#echo "> ALGORITHMS: ${ALGORITHMS}, FREEMEM: $FREEMEM"
# embs_{2}_{1}.* # *: .mat | .nvc
echo -e "\n\nStarting the evaluations giving the FREEMEM=${FREEMEM}...\n"
if [ "$GRAM" -ge "1" ]; then
GRAMDIR=${GRAMDIR}$EMBDIMS
echo "GRAMDIR: $GRAMDIR"
mkdir -p $GRAMDIR
# Note: parallel '--plus' can can be used for the advanced parameter substitution
parallel --header : --results "$OUTDIR" --joblog "$EXECLOG" --bar --plus --tagstring {2}${SUFDIMS}_{1}_{3}_{4} --verbose --noswap --memfree ${FREEMEM} --load 96% ${EXECUTORX} scoring_classif.py -m {3} $BINARIZE $CLSDIMS -o "${GRAMDIR}/gram_{2}${SUFDIMS}-{3}_{1}{4}.mat" gram --embedding embeds/embs${EMBDIMS}/embs_{2}_{1}{4}.* ::: Graphs ${GRAPHS} ::: algs ${ALGORITHMS} ::: metrics ${METRICS} ::: gram $(seq $GRAM) # $({1..$GRAM})
else
parallel --header : --results "$OUTDIR" --joblog "$EXECLOG" --bar --tagstring {2}${SUFDIMS}_{1}_{3} --verbose --noswap --memfree ${FREEMEM} --load 96% ${EXECUTORX} scoring_classif.py -m {3} $BINARIZE $CLSDIMS -o "${OUTP}" eval --embedding embeds/embs${EMBDIMS}/embs_{2}_{1}.* --network graphs/{1}.mat ::: Graphs ${GRAPHS} ::: algs ${ALGORITHMS} ::: metrics ${METRICS}
fi
# Note: ${METRMARK[{3}]} yields an error