Skip to content

Commit 77fd58c

Browse files
authored
Merge pull request #16 from PKUHPC/log_and_getclusternodesinfo
1. 日志支持打印具体文件行数;2. GetClusterNodesInfo当查询所有节点时,可以通过一行命令解析,解析代码属于CPU密集…
2 parents d1f53ad + 9843838 commit 77fd58c

File tree

3 files changed

+55
-24
lines changed

3 files changed

+55
-24
lines changed

caller/caller.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
package caller
22

33
import (
4+
"bytes"
45
"database/sql"
6+
"fmt"
57
"io"
68
"log"
79
"os"
8-
"scow-slurm-adapter/utils"
10+
"path/filepath"
911

1012
_ "github.com/go-sql-driver/mysql"
1113
"github.com/sirupsen/logrus"
1214
"gopkg.in/natefinch/lumberjack.v2"
15+
"scow-slurm-adapter/utils"
1316
)
1417

1518
var (
@@ -18,6 +21,32 @@ var (
1821
Logger *logrus.Logger
1922
)
2023

24+
type LogFormatter struct{}
25+
26+
func (m *LogFormatter) Format(entry *logrus.Entry) ([]byte, error) {
27+
var b *bytes.Buffer
28+
if entry.Buffer != nil {
29+
b = entry.Buffer
30+
} else {
31+
b = &bytes.Buffer{}
32+
}
33+
34+
timestamp := entry.Time.Format("2006-01-02 15:04:05")
35+
var newLog string
36+
37+
// HasCaller()为true才会有调用信息
38+
if entry.HasCaller() {
39+
fName := filepath.Base(entry.Caller.File)
40+
newLog = fmt.Sprintf("[%s] [%s] [%s:%d %s] %s\n",
41+
timestamp, entry.Level, fName, entry.Caller.Line, entry.Caller.Function, entry.Message)
42+
} else {
43+
newLog = fmt.Sprintf("[%s] [%s] %s\n", timestamp, entry.Level, entry.Message)
44+
}
45+
46+
b.WriteString(newLog)
47+
return b.Bytes(), nil
48+
}
49+
2150
func init() {
2251
currentPwd, _ := os.Getwd()
2352
ConfigValue = utils.ParseConfig(currentPwd + "/" + utils.DefaultConfigPath)
@@ -44,8 +73,9 @@ func initDB() {
4473

4574
func initLogger() {
4675
Logger = logrus.New()
76+
Logger.SetReportCaller(true)
4777
// 设置日志输出格式为JSON
48-
Logger.SetFormatter(&logrus.JSONFormatter{})
78+
Logger.SetFormatter(&LogFormatter{})
4979
// 设置日志级别为Info
5080
Logger.SetLevel(logrus.InfoLevel)
5181

main.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99

1010
"scow-slurm-adapter/caller"
1111
pb "scow-slurm-adapter/gen/go"
12-
1312
"scow-slurm-adapter/services/account"
1413
"scow-slurm-adapter/services/app"
1514
"scow-slurm-adapter/services/config"

services/config/config.go

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
package config
22

33
import (
4+
"bufio"
45
"context"
56
"fmt"
6-
"scow-slurm-adapter/caller"
7-
pb "scow-slurm-adapter/gen/go"
8-
"scow-slurm-adapter/utils"
97
"strconv"
108
"strings"
119
"sync"
@@ -14,6 +12,9 @@ import (
1412
"google.golang.org/genproto/googleapis/rpc/errdetails"
1513
"google.golang.org/grpc/codes"
1614
"google.golang.org/grpc/status"
15+
"scow-slurm-adapter/caller"
16+
pb "scow-slurm-adapter/gen/go"
17+
"scow-slurm-adapter/utils"
1718
)
1819

1920
type ServerConfig struct {
@@ -731,7 +732,7 @@ func (s *ServerConfig) GetAvailablePartitions(ctx context.Context, in *pb.GetAva
731732
return &pb.GetAvailablePartitionsResponse{Partitions: parts}, nil
732733
}
733734

734-
func extractNodeInfo(info string) (*pb.NodeInfo, error) {
735+
func extractNodeInfo(info string) *pb.NodeInfo {
735736
var (
736737
partitionList []string
737738
totalGpusInt int
@@ -799,7 +800,7 @@ func extractNodeInfo(info string) (*pb.NodeInfo, error) {
799800
GpuCount: uint32(totalGpusInt),
800801
AllocGpuCount: uint32(allocGpusInt),
801802
IdleGpuCount: uint32(totalGpusInt) - uint32(allocGpusInt),
802-
}, nil
803+
}
803804
}
804805

805806
func getNodeInfo(node string, wg *sync.WaitGroup, nodeChan chan<- *pb.NodeInfo, errChan chan<- error) {
@@ -817,28 +818,23 @@ func getNodeInfo(node string, wg *sync.WaitGroup, nodeChan chan<- *pb.NodeInfo,
817818
return
818819
}
819820

820-
nodeInfo, err := extractNodeInfo(info)
821-
if err != nil {
822-
errChan <- err
823-
return
824-
}
821+
nodeInfo := extractNodeInfo(info)
825822

826823
nodeChan <- nodeInfo
827824
}
828825

829826
func (s *ServerConfig) GetClusterNodesInfo(ctx context.Context, in *pb.GetClusterNodesInfoRequest) (*pb.GetClusterNodesInfoResponse, error) {
830827
var (
831-
wg sync.WaitGroup
832-
nodesInfo []*pb.NodeInfo
833-
nodesInfoList []string
828+
wg sync.WaitGroup
829+
nodesInfo []*pb.NodeInfo
834830
)
835831
caller.Logger.Infof("Received request GetClusterNodesInfo: %v", in)
836832
nodeChan := make(chan *pb.NodeInfo, len(in.NodeNames))
837833
errChan := make(chan error, len(in.NodeNames))
838834

839835
if len(in.NodeNames) == 0 {
840836
// 获取集群中全部节点的信息
841-
getNodesInfoCmd := "scontrol show nodes --oneliner | grep Partitions | awk '{print $1}' | awk -F= '{print $2}' | tr '\n' ';'" // 获取全部计算节点主机名
837+
getNodesInfoCmd := "scontrol show nodes --oneliner | grep Partitions" // 获取全部计算节点主机名
842838
output, err := utils.RunCommand(getNodesInfoCmd)
843839
if err != nil {
844840
errInfo := &errdetails.ErrorInfo{
@@ -848,17 +844,22 @@ func (s *ServerConfig) GetClusterNodesInfo(ctx context.Context, in *pb.GetCluste
848844
st, _ = st.WithDetails(errInfo)
849845
return nil, st.Err()
850846
}
851-
nodesInfoList = strings.Split(output, ";")
852-
nodesInfoList = nodesInfoList[:len(nodesInfoList)-1]
853-
} else {
854-
nodesInfoList = in.NodeNames
847+
// 按行分割输出
848+
scanner := bufio.NewScanner(strings.NewReader(output))
849+
for scanner.Scan() {
850+
line := scanner.Text()
851+
nodeInfo := extractNodeInfo(line)
852+
nodesInfo = append(nodesInfo, nodeInfo)
853+
}
854+
caller.Logger.Infof("GetClusterNodesInfoResponse: %v", nodesInfo)
855+
return &pb.GetClusterNodesInfoResponse{Nodes: nodesInfo}, nil
855856
}
856857

857-
for _, node := range nodesInfoList {
858-
node1 := node
858+
for _, node := range in.NodeNames {
859+
nodeName := node
859860
wg.Add(1)
860861
go func() {
861-
getNodeInfo(node1, &wg, chan<- *pb.NodeInfo(nodeChan), chan<- error(errChan))
862+
getNodeInfo(nodeName, &wg, chan<- *pb.NodeInfo(nodeChan), chan<- error(errChan))
862863
}()
863864
}
864865

@@ -879,6 +880,7 @@ func (s *ServerConfig) GetClusterNodesInfo(ctx context.Context, in *pb.GetCluste
879880
}
880881
default:
881882
}
883+
caller.Logger.Infof("GetClusterNodesInfoResponse: %v", nodesInfo)
882884
return &pb.GetClusterNodesInfoResponse{Nodes: nodesInfo}, nil
883885
}
884886

0 commit comments

Comments
 (0)