Skip to content
This repository has been archived by the owner on Nov 2, 2021. It is now read-only.
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NVIDIA/gpu-monitoring-tools
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: matthewygf/gpu-monitoring-tools
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.

Commits on May 21, 2019

  1. testing write to csv

    matthewygf committed May 21, 2019
    Copy the full SHA
    586abcc View commit details

Commits on May 22, 2019

  1. Copy the full SHA
    f938380 View commit details

Commits on May 28, 2019

  1. Copy the full SHA
    4293fc0 View commit details

Commits on May 29, 2019

  1. WIP - get AccountingStats

    matthewygf committed May 29, 2019
    Copy the full SHA
    1165190 View commit details
  2. typo

    matthewygf committed May 29, 2019
    Copy the full SHA
    bed4568 View commit details
  3. problems

    matthewygf committed May 29, 2019
    Copy the full SHA
    4eadb7a View commit details
  4. WIP - unused

    matthewygf committed May 29, 2019
    Copy the full SHA
    26380d4 View commit details
  5. Copy the full SHA
    274838c View commit details
  6. Copy the full SHA
    f466d3d View commit details
  7. WIP - test

    matthewygf committed May 29, 2019
    Copy the full SHA
    f0b1344 View commit details
  8. WIP - comment unused

    matthewygf committed May 29, 2019
    Copy the full SHA
    e0ff434 View commit details
  9. WIP - added method to nvml

    matthewygf committed May 29, 2019
    Copy the full SHA
    5f62325 View commit details
  10. WIP - print the pids

    matthewygf committed May 29, 2019
    Copy the full SHA
    939092a View commit details
  11. WIP maybe ?

    matthewygf committed May 29, 2019
    Copy the full SHA
    4a8d3c0 View commit details
  12. WIP - debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    698250d View commit details
  13. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    c6e4f54 View commit details
  14. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    a2668d5 View commit details
  15. debug accounting info

    matthewygf committed May 29, 2019
    Copy the full SHA
    b740d17 View commit details
  16. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    69bfb13 View commit details
  17. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    f7e3fa2 View commit details
  18. test

    matthewygf committed May 29, 2019
    Copy the full SHA
    7bafffe View commit details
  19. test

    matthewygf committed May 29, 2019
    Copy the full SHA
    a8aae45 View commit details
  20. Copy the full SHA
    65a1a46 View commit details
  21. Copy the full SHA
    f52532e View commit details
  22. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    4fe153e View commit details
  23. Copy the full SHA
    e369699 View commit details
  24. convert ctype to go types

    matthewygf committed May 29, 2019
    Copy the full SHA
    6df9c69 View commit details
  25. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    0829cd6 View commit details
  26. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    55fd801 View commit details
  27. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    fc0888b View commit details
  28. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    e85bbfd View commit details
  29. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    96c3582 View commit details
  30. test

    matthewygf committed May 29, 2019
    Copy the full SHA
    c48b431 View commit details
  31. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    eba60a3 View commit details
  32. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    a99e149 View commit details
  33. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    f633c1c View commit details
  34. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    eccb573 View commit details
  35. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    50915e8 View commit details
  36. debug

    matthewygf committed May 29, 2019
    Copy the full SHA
    0dfda02 View commit details
  37. test

    matthewygf committed May 29, 2019
    Copy the full SHA
    dc8f8a4 View commit details
  38. added get process name

    matthewygf committed May 29, 2019
    Copy the full SHA
    0a3f1f3 View commit details
  39. removed unused

    matthewygf committed May 29, 2019
    Copy the full SHA
    c8dd307 View commit details
  40. Copy the full SHA
    e1078e7 View commit details
  41. need to add type

    matthewygf committed May 29, 2019
    Copy the full SHA
    d4f3d2a View commit details
  42. Copy the full SHA
    add4401 View commit details
  43. syntax

    matthewygf committed May 29, 2019
    Copy the full SHA
    3eee527 View commit details
  44. syntax what

    matthewygf committed May 29, 2019
    Copy the full SHA
    61a09b1 View commit details
  45. types conversion

    matthewygf committed May 29, 2019
    Copy the full SHA
    c44f79b View commit details
  46. time and types

    matthewygf committed May 29, 2019
    Copy the full SHA
    2ff654d View commit details
  47. compiler come on

    matthewygf committed May 29, 2019
    Copy the full SHA
    e75250c View commit details
Showing with 406 additions and 1 deletion.
  1. +2 −0 .gitignore
  2. +87 −1 bindings/go/nvml/bindings.go
  3. +37 −0 bindings/go/nvml/nvml.go
  4. +60 −0 bindings/go/samples/csv/main.go
  5. +102 −0 bindings/go/samples/pcie/main.go
  6. +118 −0 bindings/go/samples/pmon/main.go
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.exe
*.csv
88 changes: 87 additions & 1 deletion bindings/go/nvml/bindings.go
Original file line number Diff line number Diff line change
@@ -170,7 +170,7 @@ func systemGetDriverVersion() (string, error) {
return C.GoString(&driver[0]), errorString(r)
}

func systemGetProcessName(pid uint) (string, error) {
func (h handle) systemGetProcessName(pid uint) (string, error) {
var proc [szProcName]C.char

r := C.nvmlSystemGetProcessName(C.uint(pid), &proc[0], szProcName)
@@ -511,6 +511,25 @@ func (h handle) deviceGetComputeRunningProcesses() ([]uint, []uint64, error) {
return pids, mems, errorString(r)
}

func (h handle) deviceGetAccountingPids() ([]uint, error) {
var count = C.uint(szProcs)
var gpuPids [szProcs]C.uint

r := C.nvmlDeviceGetAccountingPids(h.dev, &count, &gpuPids[0])
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}

n := int(count)
pids := make([]uint, n)

for i := 0; i < n; i++ {
pids[i] = uint(gpuPids[i])
}

return pids, errorString(r)
}

func (h handle) deviceGetGraphicsRunningProcesses() ([]uint, []uint64, error) {
var procs [szProcs]C.nvmlProcessInfo_t
var count = C.uint(szProcs)
@@ -529,6 +548,48 @@ func (h handle) deviceGetGraphicsRunningProcesses() ([]uint, []uint64, error) {
return pids, mems, errorString(r)
}

func (h handle) deviceGetProcessUtilization() ([]ProcessUtilization, error) {
lastSeenTimeStamp := C.ulonglong(0)
var processesSamplesCount C.uint
r := C.nvmlDeviceGetProcessUtilization(h.dev, nil, &processesSamplesCount, lastSeenTimeStamp)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}

n := int(processesSamplesCount)
processesUtilizationSamples := make([]C.nvmlProcessUtilizationSample_t, n)

r = C.nvmlDeviceGetProcessUtilization(h.dev, &processesUtilizationSamples[0], &processesSamplesCount, lastSeenTimeStamp)
if r == C.NVML_ERROR_NOT_SUPPORTED {
return nil, nil
}

var m map[uint]uint64
cPids, cpMems, err := h.deviceGetComputeRunningProcesses()
if err != nil {
return nil, err
}

m = make(map[uint]uint64)
for i, cpid := range cPids {
m[cpid] = cpMems[i]
}

utilSamples := make([]ProcessUtilization, n)
for i := 0; i < n; i++ {
utilSamples[i] = ProcessUtilization{
DecUtil: uint(processesUtilizationSamples[i].decUtil),
EncUtil: uint(processesUtilizationSamples[i].encUtil),
SmUtil: uint(processesUtilizationSamples[i].smUtil),
PID: uint(processesUtilizationSamples[i].pid),
TimeStamp: uint64(processesUtilizationSamples[i].timeStamp),
MemUtil: uint(processesUtilizationSamples[i].memUtil),
MemUsed: uint64(m[uint(processesUtilizationSamples[i].pid)]),
}
}
return utilSamples, errorString(r)
}

func (h handle) deviceGetAllRunningProcesses() ([]ProcessInfo, error) {
cPids, cpMems, err := h.deviceGetComputeRunningProcesses()
if err != nil {
@@ -586,6 +647,31 @@ func (h handle) deviceGetAllRunningProcesses() ([]ProcessInfo, error) {
return processInfo, nil
}

func (h handle) deviceGetAccountingStats(pid uint) (AccountingStats, error) {
var stats C.nvmlAccountingStats_t
r := C.nvmlDeviceGetAccountingStats(h.dev, C.uint(pid), &stats)
var accountStats AccountingStats

if r == C.NVML_ERROR_NOT_SUPPORTED {
return accountStats, nil
}

if r != C.NVML_SUCCESS {
return accountStats, errorString(r)
}

accountStats = AccountingStats{
GpuUtilization: uint(stats.gpuUtilization),
MemoryUtilization: uint(stats.memoryUtilization),
MaxMemoryUsage: uint64(stats.maxMemoryUsage),
Time: uint64(stats.time),
StartTime: uint64(stats.startTime),
IsRunning: uint(stats.isRunning),
}

return accountStats, nil
}

func (h handle) getClocksThrottleReasons() (reason ThrottleReason, err error) {
var clocksThrottleReasons C.ulonglong

37 changes: 37 additions & 0 deletions bindings/go/nvml/nvml.go
Original file line number Diff line number Diff line change
@@ -47,6 +47,15 @@ type Accounting struct {
BufferSize *uint
}

type AccountingStats struct {
GpuUtilization uint
MemoryUtilization uint
MaxMemoryUsage uint64
Time uint64
StartTime uint64
IsRunning uint
}

type DeviceMode struct {
DisplayInfo Display
Persistence ModeState
@@ -207,6 +216,7 @@ type Device struct {
Power *uint
Memory *uint64
CPUAffinity *uint
MinorNum *uint
PCI PCIInfo
Clocks ClockInfo
Topology []P2PLink
@@ -265,6 +275,16 @@ type DeviceStatus struct {
Performance PerfState
}

type ProcessUtilization struct {
DecUtil uint
EncUtil uint
MemUtil uint
PID uint
SmUtil uint
TimeStamp uint64
MemUsed uint64
}

func assert(err error) {
if err != nil {
panic(err)
@@ -426,6 +446,7 @@ func NewDeviceLite(idx uint) (device *Device, err error) {
PCI: PCIInfo{
BusID: *busid,
},
MinorNum: minor,
}
return
}
@@ -579,6 +600,22 @@ func (d *Device) GetAllRunningProcesses() ([]ProcessInfo, error) {
return d.handle.deviceGetAllRunningProcesses()
}

func (d *Device) GetAccountingPids() ([]uint, error) {
return d.handle.deviceGetAccountingPids()
}

func (d *Device) GetAccountingStats(pid uint) (AccountingStats, error) {
return d.handle.deviceGetAccountingStats(pid)
}

func (d *Device) GetProcessUtilization() ([]ProcessUtilization, error) {
return d.handle.deviceGetProcessUtilization()
}

func (d *Device) SystemGetProcessName(pid uint) (string, error) {
return d.handle.systemGetProcessName(pid)
}

func (d *Device) GetDeviceMode() (mode *DeviceMode, err error) {
defer func() {
if r := recover(); r != nil {
60 changes: 60 additions & 0 deletions bindings/go/samples/csv/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package main

import (
"encoding/csv"
"flag"
"fmt"
"log"
"os"
)

var usecsv = flag.Bool("csv", false, "csv format")
var path = flag.String("logpath", "processinfo.csv", "file path for logging")

func main() {
flag.Parse()
fmt.Printf("%v\n", *path)
fmt.Printf("%t\n", *usecsv)

var fileHandle *os.File
var err error
var writer *csv.Writer
if *usecsv {
fileHandle, err = os.Create(*path)
checkError("couldn't create", err)
defer func() {
fmt.Printf("gonna close \n")
fileHandle.Close()
}()
}

if fileHandle != nil {
writer = csv.NewWriter(fileHandle)
header := []string{"head1", "head2", "head3", "head4", "head5"}
err := writer.Write(header)
checkError("couldn't write", err)
writer.Flush()
}

if fileHandle != nil {
for i := 1; i <= 10; i++ {
value1 := fmt.Sprintf("v%d", i*1)
value2 := fmt.Sprintf("v%d", i*2)
value3 := fmt.Sprintf("v%d", i*3)
value4 := fmt.Sprintf("v%d", i*4)
value5 := fmt.Sprintf("v%d", i*5)
values := []string{value1, value2, value3, value4, value5}
err := writer.Write(values)
checkError("couldn't write", err)
writer.Flush()
}
}

fmt.Printf("i did some stuff \n")
}

func checkError(message string, err error) {
if err != nil {
log.Fatal(message, err)
}
}
102 changes: 102 additions & 0 deletions bindings/go/samples/pcie/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// modified from NVIDIA/gpu-monitoring-tools/bindings/go/samples/dmon

package main

import (
"encoding/csv"
"flag"
"fmt"
"log"
"os"
"os/signal"
"strconv"
"syscall"
"time"

"github.com/matthewygf/gpu-monitoring-tools/bindings/go/nvml"
)

var tocsv = flag.Bool("csv", false, "write values to csv instead.")
var filepath = flag.String("logpath", "processinfo.csv", "path to create the csv file.")
var interval = flag.Int("interval", 1, "interval time to run the profiler")

func checkAndPrintErrorNoFormat(message string, err error) {
if err != nil {
log.Fatalln(message, err)
}
}

func main() {
nvml.Init()
defer nvml.Shutdown()
flag.Parse()
var fileHandle *os.File
var err error
var writer *csv.Writer
if *tocsv {
fileHandle, err = os.Create(*filepath)
checkAndPrintErrorNoFormat("Could not create file", err)
defer func() {
fileHandle.Close()
}()
}

if fileHandle != nil {
writer = csv.NewWriter(fileHandle)
// bar1 in MiB
// throughput are both in MB
header := []string{"gpu_idx", "bar1_used", "pcie_read", "pcie_write"}
err := writer.Write(header)
checkAndPrintErrorNoFormat("could not write to file:", err)
writer.Flush()
}

count, err := nvml.GetDeviceCount()
if err != nil {
log.Panicln("Error getting device count:", err)
}

var devices []*nvml.Device
for i := uint(0); i < count; i++ {
device, err := nvml.NewDevice(i)
if err != nil {
log.Panicf("Error getting device %d: %v\n", i, err)
}
devices = append(devices, device)
}

sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
intervalTime := time.Duration(*interval)
ticker := time.NewTicker(time.Millisecond * intervalTime)
defer ticker.Stop()
if fileHandle == nil {
fmt.Printf("gpu_idx,bar1_used,pcie_read,pcie_write\n")
}
for {
select {
case <-ticker.C:
for i, device := range devices {
st, err := device.Status()
if err != nil {
log.Panicf("Error getting device %d status: %v\n", i, err)
}
if fileHandle != nil {
row := []string{
strconv.FormatInt(int64(i), 10),
strconv.FormatUint(uint64(*st.PCI.BAR1Used), 10),
strconv.FormatUint(uint64(*st.PCI.Throughput.RX), 10),
strconv.FormatUint(uint64(*st.PCI.Throughput.TX), 10)}
err := writer.Write(row)
checkAndPrintErrorNoFormat("Could not write row", err)
writer.Flush()
} else {
fmt.Printf("%5d,%5d,%5d,%5d\n",
i, *st.PCI.BAR1Used, *st.PCI.Throughput.RX, *st.PCI.Throughput.TX)
}
}
case <-sigs:
return
}
}
}
118 changes: 118 additions & 0 deletions bindings/go/samples/pmon/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// modified from NVIDIA/gpu-monitoring-tools/bindings/go/samples/processInfo
package main

import (
"encoding/csv"
"flag"
"fmt"
"log"
"os"
"os/signal"
"strconv"
"syscall"
"time"

"github.com/matthewygf/gpu-monitoring-tools/bindings/go/nvml"
)

var tocsv = flag.Bool("csv", false, "write values to csv instead.")
var filepath = flag.String("logpath", "processinfo.csv", "path to create the csv file.")
var interval = flag.Int("interval", 500, "interval time to run the profiler, in milliseconds")
var once = flag.Bool("once", true, "execute only once and exit with code 0.")

func checkAndPrintErrorNoFormat(message string, err error) {
if err != nil {
log.Fatalln(message, err)
}
}

func main() {
flag.Parse()
nvml.Init()
defer nvml.Shutdown()

var fileHandle *os.File
var err error
var writer *csv.Writer
if *tocsv {
fileHandle, err = os.Create(*filepath)
checkAndPrintErrorNoFormat("Could not create file", err)
defer func() {
fileHandle.Close()
}()
}

if fileHandle != nil {
writer = csv.NewWriter(fileHandle)
header := []string{"gpu_idx", "pid", "sm_util", "mem_util", "mem_used", "command_name"}
err := writer.Write(header)
checkAndPrintErrorNoFormat("could not write to file:", err)
writer.Flush()
}

count, err := nvml.GetDeviceCount()
if err != nil {
log.Panicln("Error getting device count:", err)
}

var devices []*nvml.Device
for i := uint(0); i < count; i++ {
device, err := nvml.NewDevice(i)
if err != nil {
log.Panicf("Error getting device %d: %v\n", i, err)
}
devices = append(devices, device)
}

sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)

intervalTime := time.Duration(*interval)
ticker := time.NewTicker(time.Millisecond * intervalTime)
defer ticker.Stop()
if fileHandle == nil {
fmt.Printf("gpu,pid,sm_util,mem_util,mem_used,name\n")
}

for {
select {
case <-ticker.C:
for i, device := range devices {
processUtils, err := device.GetProcessUtilization()
if err != nil {
log.Panicf("Error getting device %d processes utilization %v \n", i, err)
} else {
for j := range processUtils {
if processUtils[j].SmUtil > 0 {
name, err := device.SystemGetProcessName(processUtils[j].PID)
if err != nil {
log.Panicf("Error getting device %d proccess %d name %v \n", i, processUtils[j].PID, err)
}
if fileHandle != nil {
row := []string{
strconv.FormatInt(int64(i), 10),
strconv.FormatUint(uint64(processUtils[j].PID), 10),
strconv.FormatUint(uint64(processUtils[j].SmUtil), 10),
strconv.FormatUint(uint64(processUtils[j].MemUtil), 10),
strconv.FormatUint(uint64(processUtils[j].MemUsed), 10),
name}
err := writer.Write(row)
checkAndPrintErrorNoFormat("Could not write row", err)
writer.Flush()
} else {
fmt.Printf("%5v,%5v,%5v,%5v,%v\n",
i, processUtils[j].PID, processUtils[j].SmUtil, processUtils[j].MemUtil, processUtils[j].MemUsed, name)
}
}
}
}
}

if *once {
os.Exit(0)
}
case <-sigs:
return
}
}
}