-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathMonitorGPUthread.py
67 lines (51 loc) · 2.78 KB
/
MonitorGPUthread.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import GPUtil
import time
import os
import sys
commands = []
# command2 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-resample --image-weights'
# commands.append(command2)
#
# command0 = 'python3 train.py --cfg cfg/yolov3-spp-44-db.cfg --data data/rubbish.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-dropblock1'
# commands.append(command0)
#
# command1 = 'python3 train.py --cfg cfg/yolov3-spp-44-pdb.cfg --data data/rubbish.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-paddledb'
# commands.append(command1)
#
# command3 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-lsmooth --smooth-ratio 0.1'
# commands.append(command3)
#
# command4 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-lbox --lbox-weight'
# commands.append(command4)
command0 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish_coco.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-coco'
commands.append(command0)
command1 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish_all.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-all --notest'
commands.append(command1)
command2 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish_all_coco.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-all_coco --notest'
commands.append(command2)
command3 = 'python3 train.py --cfg cfg/yolov3-spp-44.cfg --data data/rubbish_all_coco.data --weights weights/yolov3-sppu.pt --batch-size 16 --epochs 120 --save baseline-all_coco-nolbox --notest'
commands.append(command3)
command_idx = 0
while(True):
try:
DEVICE_ID_LIST = GPUtil.getFirstAvailable()
command = commands[command_idx]
print(command)
exec_status = os.system(command)
if exec_status:
raise OSError("System Invoke Error!")
command_idx += 1
except RuntimeError:
print ('=================GPU Information====================')
print ("Prepare to Execute Command", command_idx)
print ("Waiting GPU Free...")
print (time.strftime("%F") + ' ' + time.strftime("%T"))
print ('====================================================')
time.sleep(1 * 60 * 10)
except IndexError:
break
except:
print("========================================here, or not=================")
print("Unexpected error:", sys.exc_info()[0])
raise
print('Done!!')