Skip to content

Commit

Permalink
Merge pull request #278 from kthcloud/dev
Browse files Browse the repository at this point in the history
fix gpu not attaching in cs, fix jobs not using entry and exit func (some activities stuck)
  • Loading branch information
saffronjam authored Nov 18, 2023
2 parents 201e56b + cbc9e4c commit eca9b06
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 15 deletions.
50 changes: 40 additions & 10 deletions pkg/jobs/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func (runner *Runner) Run() {
}
}

go wrapper(jobDef.JobFunc, runner.Job)
go wrapper(jobDef)
} else {
utils.PrettyPrintError(fmt.Errorf("unknown job type: %s", runner.Job.Type))

Expand All @@ -54,39 +54,69 @@ func (runner *Runner) Run() {
}
}

func wrapper(fn func(job *jobModel.Job) error, job *jobModel.Job) {
err := fn(job)
func wrapper(def *JobDefinition) {
if def.EntryFunc != nil {
err := def.EntryFunc(def.Job)
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error executing job (%s) entry function. details: %w", def.Job.Type, err))

err = jobModel.New().MarkFailed(def.Job.ID, err.Error())
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as failed. details: %w", err))
return
}
return
}
}

defer func() {
if def.ExitFunc != nil {
err := def.ExitFunc(def.Job)
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error executing job (%s) exit function. details: %w", def.Job.Type, err))

err = jobModel.New().MarkFailed(def.Job.ID, err.Error())
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as failed. details: %w", err))
return
}
return
}
}
}()

err := def.JobFunc(def.Job)

if err != nil {
if strings.HasPrefix(err.Error(), "failed") {
err = errors.Unwrap(err)
utils.PrettyPrintError(fmt.Errorf("failed job (%s). details: %w", job.Type, err))
utils.PrettyPrintError(fmt.Errorf("failed job (%s). details: %w", def.Job.Type, err))

err = jobModel.New().MarkFailed(job.ID, err.Error())
err = jobModel.New().MarkFailed(def.Job.ID, err.Error())
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as failed. details: %w", err))
return
}
} else if strings.HasPrefix(err.Error(), "terminated") {
err = errors.Unwrap(err)
utils.PrettyPrintError(fmt.Errorf("terminated job (%s). details: %w", job.Type, err))
utils.PrettyPrintError(fmt.Errorf("terminated job (%s). details: %w", def.Job.Type, err))

err = jobModel.New().MarkTerminated(job.ID, err.Error())
err = jobModel.New().MarkTerminated(def.Job.ID, err.Error())
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as terminated. details: %w", err))
return
}
} else {
utils.PrettyPrintError(fmt.Errorf("error executing job (%s). details: %w", job.Type, err))
utils.PrettyPrintError(fmt.Errorf("error executing job (%s). details: %w", def.Job.Type, err))

err = jobModel.New().MarkFailed(job.ID, err.Error())
err = jobModel.New().MarkFailed(def.Job.ID, err.Error())
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as failed. details: %w", err))
return
}
}
} else {
err = jobModel.New().MarkCompleted(job.ID)
err = jobModel.New().MarkCompleted(def.Job.ID)
if err != nil {
utils.PrettyPrintError(fmt.Errorf("error marking job as completed. details: %w", err))
return
Expand Down
11 changes: 6 additions & 5 deletions pkg/subsystems/cs/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"gopkg.in/yaml.v3"
"log"
"math/rand"
"net/url"
"strings"
"time"
)
Expand Down Expand Up @@ -120,11 +121,11 @@ func (client *Client) UpdateVM(public *models.VmPublic) (*models.VmPublic, error
params.SetName(public.Name)
params.SetDisplayname(public.Name)

//if public.ExtraConfig == "" {
// params.SetExtraconfig(url.QueryEscape("none"))
//} else {
// params.SetExtraconfig(url.QueryEscape(public.ExtraConfig))
//}
if public.ExtraConfig == "" {
params.SetExtraconfig(url.QueryEscape("none"))
} else {
params.SetExtraconfig(url.QueryEscape(public.ExtraConfig))
}

_, err = client.CsClient.VirtualMachine.UpdateVirtualMachine(params)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions service/resources/cs_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ func (cr *CsGenerator) VMs() []models.VmPublic {
if csVM := &cr.v.vm.Subsystems.CS.VM; service.Created(csVM) {
csVM.Name = cr.v.vm.Name
csVM.ServiceOfferingID = cr.v.vm.Subsystems.CS.ServiceOffering.ID
csVM.ExtraConfig = cr.v.vm.Subsystems.CS.VM.ExtraConfig

res = append(res, *csVM)
return res
} else {
Expand All @@ -59,6 +61,7 @@ func (cr *CsGenerator) VMs() []models.VmPublic {
ServiceOfferingID: cr.v.vm.Subsystems.CS.ServiceOffering.ID,
TemplateID: constants.TemplateID,
Tags: createTags(cr.v.vm.Name, cr.v.vm.Name),
ExtraConfig: cr.v.vm.Subsystems.CS.VM.ExtraConfig,
})
return res
}
Expand Down
13 changes: 13 additions & 0 deletions service/vm_service/cs_service/cs_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
configModels "go-deploy/models/config"
vmModel "go-deploy/models/sys/vm"
gpuModel "go-deploy/models/sys/vm/gpu"
"go-deploy/pkg/config"
"go-deploy/pkg/subsystems/cs/commands"
csModels "go-deploy/pkg/subsystems/cs/models"
Expand Down Expand Up @@ -341,6 +342,18 @@ func Repair(id string) error {

// only repair if the vm is stopped to prevent downtime for the user
if status == "Stopped" {
var gpu *gpuModel.GPU
if context.VM.GpuID != "" {
gpu, err = gpuModel.New().GetByID(context.VM.GpuID)
if err != nil {
return makeError(err)
}
}

if gpu != nil {
vm.ExtraConfig = CreateExtraConfig(gpu)
}

err = resources.SsRepairer(
context.Client.ReadVM,
context.Client.CreateVM,
Expand Down

0 comments on commit eca9b06

Please sign in to comment.