-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdevice.go
87 lines (71 loc) · 1.84 KB
/
device.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
package main
import (
"bytes"
"fmt"
"os/exec"
"strings"
"sync"
)
type GPU struct {
Name string
Index string
FreeMemory string
UsedMemory string
TotalMemory string
}
const (
BASE_COMMAND = "nvidia-smi"
DEFAULT_QUERY = "name,index,memory.free,memory.used,memory.total"
DEFAULT_FORMAT = "csv"
)
var COMMAND string = fmt.Sprintf("%s --query-gpu=%s --format=%s", BASE_COMMAND, DEFAULT_QUERY, DEFAULT_FORMAT)
// We consider a GPU to be free if it has at most 5% of its memory used.
const FREE_MEMORY_PERCENTAGE = 0.05
// Get info from remote servers.
func GetGPUInfoFromHosts(hosts []string) map[string]string {
var wg sync.WaitGroup
var m sync.Map
wg.Add(len(hosts))
for _, host := range hosts {
go GetGPUInfo(host, COMMAND, &m, &wg)
}
wg.Wait()
var info = make(map[string]string)
m.Range(func(key, value interface{}) bool {
info[key.(string)] = value.(string)
return true
})
return info
}
// Run command on remote machine and return the output.
func GetGPUInfo(host string, command string, m *sync.Map, wg *sync.WaitGroup) {
defer wg.Done()
cmd := exec.Command("ssh", "-o ConnectTimeout=2", host, command)
var out bytes.Buffer
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
m.Store(host, "")
return
}
m.Store(host, out.String())
}
func ExtractGPUInfo(gpuInfo string) []GPU {
var devices []GPU
lines := strings.Split(gpuInfo, "\n")
for _, ln := range lines[1:] {
fields := strings.Split(ln, ",")
devices = append(devices, GPU{
Name: fields[0],
Index: fields[1],
FreeMemory: fields[2],
UsedMemory: fields[3],
TotalMemory: fields[4],
})
}
return devices
}
func CheckIsGPUFree(gpu GPU) bool {
usedMemory, _ := GetMemoryInMB(gpu.UsedMemory)
totalMemory, _ := GetMemoryInMB(gpu.TotalMemory)
return float64(usedMemory)/float64(totalMemory) <= FREE_MEMORY_PERCENTAGE
}