From 6e8c46484f1de54e2bef66c2eeb4b572e73fea81 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Fri, 25 Oct 2024 12:29:04 +0900
Subject: [PATCH 1/8] pool-agent apply image selectable

---
 pool-agent/cmd/agent.go  | 267 ++++++++++++++++++++-------------------
 pool-agent/cmd/config.go |  23 ++--
 pool-agent/cmd/create.go |  20 +--
 pool-agent/go.mod        |   1 +
 pool-agent/go.sum        |   2 +
 5 files changed, 164 insertions(+), 149 deletions(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index db82a5c..bee58f0 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -15,18 +15,11 @@ import (
 	slm "github.com/whywaita/shoes-lxd-multi/server/pkg/api"
 )
 
-// Agent is an agent for pool mode.
-type Agent struct {
+type AgentConfig struct {
 	ImageAlias     string
 	InstanceSource api.InstanceSource
 
-	ResourceTypesMap    []ResourceTypesMap
 	ResourceTypesCounts ResourceTypesCounts
-	Client              lxd.InstanceServer
-
-	CheckInterval   time.Duration
-	WaitIdleTime    time.Duration
-	ZombieAllowTime time.Duration
 
 	creatingInstances map[string]instances
 	deletingInstances instances
@@ -34,7 +27,17 @@ type Agent struct {
 		Hash      string
 		CreatedAt time.Time
 	}
-	registry *prometheus.Registry
+}
+
+// Agent is an agent for pool mode.
+type Agent struct {
+	Config           map[string]*AgentConfig
+	CheckInterval    time.Duration
+	WaitIdleTime     time.Duration
+	ZombieAllowTime  time.Duration
+	registry         *prometheus.Registry
+	ResourceTypesMap ResourceTypesMap
+	Client           lxd.InstanceServer
 }
 
 var (
@@ -44,79 +47,80 @@ var (
 type instances map[string]struct{}
 
 func newAgent(ctx context.Context) (*Agent, error) {
-	conf, err := LoadConfig()
+	confmap, err := LoadConfig()
 	if err != nil {
 		return nil, fmt.Errorf("load config: %w", err)
 	}
-	source, err := slm.ParseAlias(conf.ImageAlias)
-	if err != nil {
-		return nil, err
-	}
-	source.Server = ""
+	fmt.Printf("config: %+v", confmap)
+	ac := make(map[string]*AgentConfig, len(confmap.Config))
+	for version, conf := range confmap.Config {
+		s, err := slm.ParseAlias(conf.ImageAlias)
+		if err != nil {
+			return nil, err
+		}
+		s.Server = ""
+		creatingInstances := make(map[string]instances)
+		for k, v := range conf.ResourceTypesCounts {
+			configuredInstancesCount.WithLabelValues(k).Set(float64(v))
+		}
+		for name, _ := range confmap.ResourceTypesMap {
+			creatingInstances[name] = make(instances)
+		}
+		ac[version] = &AgentConfig{
+			ImageAlias:     conf.ImageAlias,
+			InstanceSource: *s,
 
-	c, err := lxd.ConnectLXDUnixWithContext(ctx, "", &lxd.ConnectionArgs{})
-	if err != nil {
-		return nil, fmt.Errorf("connect lxd: %w", err)
+			ResourceTypesCounts: conf.ResourceTypesCounts,
+
+			currentImage: struct {
+				Hash      string
+				CreatedAt time.Time
+			}{Hash: "", CreatedAt: time.Time{}},
+
+			creatingInstances: creatingInstances,
+			deletingInstances: make(instances),
+		}
 	}
 	checkInterval, waitIdleTime, zombieAllowTime, err := LoadParams()
 	if err != nil {
 		return nil, fmt.Errorf("load params: %w", err)
 	}
-	creatingInstances := make(map[string]instances)
-	for _, rt := range conf.ResourceTypesMap {
-		creatingInstances[rt.Name] = make(instances)
-	}
-
 	registry := prometheus.NewRegistry()
 	registry.Register(configuredInstancesCount)
 	registry.Register(lxdInstances)
-	for k, v := range conf.ResourceTypesCounts {
-		configuredInstancesCount.WithLabelValues(k).Set(float64(v))
+	c, err := lxd.ConnectLXDUnixWithContext(ctx, "", &lxd.ConnectionArgs{})
+	if err != nil {
+		return nil, fmt.Errorf("connect lxd: %w", err)
 	}
-	agent := &Agent{
-		ImageAlias:     conf.ImageAlias,
-		InstanceSource: *source,
-
-		ResourceTypesMap:    conf.ResourceTypesMap,
-		ResourceTypesCounts: conf.ResourceTypesCounts,
-		Client:              c,
 
-		CheckInterval:   checkInterval,
-		WaitIdleTime:    waitIdleTime,
-		ZombieAllowTime: zombieAllowTime,
-		currentImage: struct {
-			Hash      string
-			CreatedAt time.Time
-		}{Hash: "", CreatedAt: time.Time{}},
-
-		creatingInstances: creatingInstances,
-		deletingInstances: make(instances),
-		registry:          registry,
+	agent := &Agent{
+		Config:           ac,
+		Client:           c,
+		CheckInterval:    checkInterval,
+		WaitIdleTime:     waitIdleTime,
+		ZombieAllowTime:  zombieAllowTime,
+		registry:         registry,
+		ResourceTypesMap: confmap.ResourceTypesMap,
 	}
+
 	return agent, nil
 }
 
 func (a *Agent) reloadConfig() error {
-	conf, err := LoadConfig()
+	confmap, err := LoadConfig()
 	if err != nil {
 		return fmt.Errorf("reload config: %w", err)
 	}
 
-	for k, v := range conf.ResourceTypesCounts {
-		configuredInstancesCount.WithLabelValues(k).Set(float64(v))
-	}
-
-	if conf.ImageAlias != a.ImageAlias {
-		source, err := slm.ParseAlias(conf.ImageAlias)
-		if err != nil {
-			return fmt.Errorf("parse image alias: %w", err)
+	for version, conf := range confmap.Config {
+		for k, v := range conf.ResourceTypesCounts {
+			configuredInstancesCount.WithLabelValues(k).Set(float64(v))
 		}
-		a.InstanceSource = *source
-		a.InstanceSource.Server = ""
-		a.ImageAlias = conf.ImageAlias
+		a.Config[version].ImageAlias = conf.ImageAlias
+		a.Config[version].ImageAlias = conf.ImageAlias
+		a.Config[version].ResourceTypesCounts = conf.ResourceTypesCounts
 	}
-	a.ResourceTypesMap = conf.ResourceTypesMap
-	a.ResourceTypesCounts = conf.ResourceTypesCounts
+	a.ResourceTypesMap = confmap.ResourceTypesMap
 	return nil
 }
 
@@ -144,16 +148,16 @@ func (a *Agent) Run(ctx context.Context, sigHupCh chan os.Signal) error {
 	}
 }
 
-func (a *Agent) countPooledInstances(instances []api.Instance, resourceTypeName string) int {
+func (a *Agent) countPooledInstances(instances []api.Instance, resourceTypeName, version string) int {
 	count := 0
 	for _, i := range instances {
 		if i.StatusCode != api.Frozen {
 			continue
 		}
-		if i.Config[configKeyImageAlias] != a.ImageAlias {
+		if i.Config[configKeyResourceType] != resourceTypeName {
 			continue
 		}
-		if i.Config[configKeyResourceType] != resourceTypeName {
+		if i.Config[configKeyImageAlias] != a.Config[version].ImageAlias {
 			continue
 		}
 		if _, ok := i.Config[configKeyRunnerName]; ok {
@@ -182,73 +186,73 @@ func (a *Agent) adjustInstancePool() error {
 	}
 
 	toDelete := []string{}
-
-	for _, rt := range a.ResourceTypesMap {
-		current := a.countPooledInstances(s, rt.Name)
-		creating := len(a.creatingInstances[rt.Name])
-		rtCount, ok := a.ResourceTypesCounts[rt.Name]
-		if !ok {
-			toDelete = append(toDelete, rt.Name)
-			continue
-		} else if rtCount == 0 {
-			toDelete = append(toDelete, rt.Name)
-			continue
-		}
-		createCount := rtCount - current - creating
-		if createCount < 1 {
-			continue
-		}
-		slog.Info("Create instances", "count", createCount, "flavor", rt.Name)
-		for i := 0; i < createCount; i++ {
-			name, err := generateInstanceName()
-			if err != nil {
-				return fmt.Errorf("generate instance name: %w", err)
+	for version, _ := range a.Config {
+		for rtName, rt := range a.ResourceTypesMap {
+			current := a.countPooledInstances(s, rtName, version)
+			creating := len(a.Config[version].creatingInstances[rtName])
+			rtCount, ok := a.Config[version].ResourceTypesCounts[rtName]
+			if !ok {
+				toDelete = append(toDelete, rtName)
+				continue
+			} else if rtCount == 0 {
+				toDelete = append(toDelete, rtName)
+				continue
 			}
-			l := slog.With("instance", name, "flavor", rt.Name)
-			a.creatingInstances[rt.Name][name] = struct{}{}
+			createCount := rtCount - current - creating
+			if createCount < 1 {
+				continue
+			}
+			slog.Info("Create instances", "count", createCount, "flavor", rtName)
+			for i := 0; i < createCount; i++ {
+				iname, err := generateInstanceName()
+				if err != nil {
+					return fmt.Errorf("generate instance name: %w", err)
+				}
+				l := slog.With("instance", iname, "flavor", rtName, "version", version)
+				a.Config[version].creatingInstances[rtName][iname] = struct{}{}
 
-			defer delete(a.creatingInstances[rt.Name], name)
+				defer delete(a.Config[version].creatingInstances[rtName], iname)
 
-			if err := a.createInstance(name, rt, l); err != nil {
-				l.Error("failed to create instance", "err", err.Error())
+				if err := a.createInstance(iname, rtName, rt, version, l); err != nil {
+					l.Error("failed to create instance", "err", err.Error())
+				}
 			}
 		}
-	}
-
-	for _, i := range s {
-		if i.Config[configKeyResourceType] == "" || i.Config[configKeyImageAlias] == "" {
-			continue
-		}
-		l := slog.With("instance", i.Name)
-		if _, ok := a.ResourceTypesCounts[i.Config[configKeyResourceType]]; !ok {
-			toDelete = append(toDelete, i.Config[configKeyResourceType])
-		}
-		for _, rt := range toDelete {
-			if i.Config[configKeyResourceType] == rt {
-				l := l.With("flavor", rt)
-				l.Info("Deleting disabled flavor instance")
-				if err := a.deleteInstance(i); err != nil {
-					l.Error("failed to delete instance", "err", err.Error())
-					continue
+		for _, i := range s {
+			if i.Config[configKeyResourceType] == "" || i.Config[configKeyImageAlias] == "" {
+				continue
+			}
+			l := slog.With("instance", i.Name, "version", version)
+			if _, ok := a.Config[version].ResourceTypesCounts[i.Config[configKeyResourceType]]; !ok {
+				toDelete = append(toDelete, i.Config[configKeyResourceType])
+			}
+			for _, rt := range toDelete {
+				if i.Config[configKeyResourceType] == rt {
+					l := l.With("flavor", rt)
+					l.Info("Deleting disabled flavor instance")
+					if err := a.deleteInstance(i, version); err != nil {
+						l.Error("failed to delete instance", "err", err.Error())
+						continue
+					}
+					l.Info("Deleted disabled flavor instance")
 				}
-				l.Info("Deleted disabled flavor instance")
 			}
-		}
-		if a.isZombieInstance(i) {
-			l.Info("Deleting zombie instance")
-			if err := a.deleteInstance(i); err != nil {
-				l.Error("failed to delete zombie instance", "err", err.Error())
+			if a.isZombieInstance(i, version) {
+				l.Info("Deleting zombie instance")
+				if err := a.deleteInstance(i, version); err != nil {
+					l.Error("failed to delete zombie instance", "err", err.Error())
+				}
+				l.Info("Deleted zombie instance")
 			}
-			l.Info("Deleted zombie instance")
-		}
-		if isOld, err := a.isOldImageInstance(i); err != nil {
-			l.Error("failed to check old image instance", "err", err.Error())
-		} else if isOld {
-			l.Info("Deleting old image instance")
-			if err := a.deleteInstance(i); err != nil {
-				l.Error("failed to delete old image instance", "err", err.Error())
+			if isOld, err := a.isOldImageInstance(i, version); err != nil {
+				l.Error("failed to check old image instance", "err", err.Error())
+			} else if isOld {
+				l.Info("Deleting old image instance")
+				if err := a.deleteInstance(i, version); err != nil {
+					l.Error("failed to delete old image instance", "err", err.Error())
+				}
+				l.Info("Deleted old image instance")
 			}
-			l.Info("Deleted old image instance")
 		}
 	}
 
@@ -288,14 +292,14 @@ func (a *Agent) collectMetrics() error {
 	return nil
 }
 
-func (a *Agent) isZombieInstance(i api.Instance) bool {
+func (a *Agent) isZombieInstance(i api.Instance, version string) bool {
 	if i.StatusCode == api.Frozen {
 		return false
 	}
 	if _, ok := i.Config[configKeyRunnerName]; ok {
 		return false
 	}
-	if i.Config[configKeyImageAlias] != a.ImageAlias {
+	if i.Config[configKeyImageAlias] != a.Config[version].ImageAlias {
 		return false
 	}
 	if i.CreatedAt.Add(a.ZombieAllowTime).After(time.Now()) {
@@ -303,37 +307,40 @@ func (a *Agent) isZombieInstance(i api.Instance) bool {
 	}
 	if rt, ok := i.Config[configKeyResourceType]; !ok {
 		return false
-	} else if _, ok := a.creatingInstances[rt][i.Name]; ok {
+	} else if _, ok := a.Config[version].creatingInstances[rt][i.Name]; ok {
 		return false
 	}
 	return true
 }
 
-func (a *Agent) isOldImageInstance(i api.Instance) (bool, error) {
+func (a *Agent) isOldImageInstance(i api.Instance, version string) (bool, error) {
 	baseImage, ok := i.Config["volatile.base_image"]
 	if !ok {
 		return false, errors.New("Failed to get volatile.base_image")
 	}
-	if baseImage != a.currentImage.Hash {
-		if i.CreatedAt.Before(a.currentImage.CreatedAt) {
+	if i.Config[configKeyImageAlias] != a.Config[version].ImageAlias {
+		return false, nil
+	}
+	if baseImage != a.Config[version].currentImage.Hash {
+		if i.CreatedAt.Before(a.Config[version].currentImage.CreatedAt) {
 			if i.StatusCode == api.Frozen {
 				return true, nil
 			}
 			return false, nil
 		}
-		a.currentImage.Hash = baseImage
-		a.currentImage.CreatedAt = i.CreatedAt
+		a.Config[version].currentImage.Hash = baseImage
+		a.Config[version].currentImage.CreatedAt = i.CreatedAt
 		return false, nil
 	}
 	return false, nil
 }
 
-func (a *Agent) deleteInstance(i api.Instance) error {
-	if _, ok := a.deletingInstances[i.Name]; ok {
+func (a *Agent) deleteInstance(i api.Instance, version string) error {
+	if _, ok := a.Config[version].deletingInstances[i.Name]; ok {
 		return nil
 	}
-	a.deletingInstances[i.Name] = struct{}{}
-	defer delete(a.deletingInstances, i.Name)
+	a.Config[version].deletingInstances[i.Name] = struct{}{}
+	defer delete(a.Config[version].deletingInstances, i.Name)
 	_, etag, err := a.Client.GetInstance(i.Name)
 	if err != nil {
 		return fmt.Errorf("get instance: %w", err)
diff --git a/pool-agent/cmd/config.go b/pool-agent/cmd/config.go
index 9f3654e..1047134 100644
--- a/pool-agent/cmd/config.go
+++ b/pool-agent/cmd/config.go
@@ -6,37 +6,42 @@ import (
 	"time"
 
 	"github.com/lxc/lxd/shared/api"
-	slm "github.com/whywaita/shoes-lxd-multi/server/pkg/api"
 	"github.com/pelletier/go-toml/v2"
+	slm "github.com/whywaita/shoes-lxd-multi/server/pkg/api"
 )
 
 // Config is config map for pool agent.
 type Config struct {
 	ImageAlias          string              `toml:"image_alias"`
-	ResourceTypesMap    []ResourceTypesMap  `toml:"resource_types_map"`
 	ResourceTypesCounts ResourceTypesCounts `toml:"resource_types_counts"`
 }
 
-// ResourceTypesMap is resource configuration for pool mode.
-type ResourceTypesMap struct {
-	Name string `toml:"name"`
+// ConfigPerVersion is config map for pool agent per version.
+type ConfigMap struct {
+	ResourceTypesMap ResourceTypesMap  `toml:"resource_types_map"`
+	Config           map[string]Config `toml:"config"`
+}
 
+type resourceType struct {
 	CPUCore int    `toml:"cpu"`
 	Memory  string `toml:"memory"`
 }
 
+// ResourceTypesMap is resource configuration for pool mode.
+type ResourceTypesMap map[string]resourceType
+
 // ResourceTypesCounts is counts for resouce types.
 type ResourceTypesCounts map[string]int
 
 // LoadConfig LoadConfig loads config from configPath
-func LoadConfig() (Config, error) {
+func LoadConfig() (ConfigMap, error) {
 	f, err := os.ReadFile(configPath)
 	if err != nil {
-		return Config{}, fmt.Errorf("failed read config file: %w", err)
+		return ConfigMap{}, fmt.Errorf("failed read config file: %w", err)
 	}
-	var s Config
+	var s ConfigMap
 	if err := toml.Unmarshal(f, &s); err != nil {
-		return Config{}, fmt.Errorf("parse config file: %w", err)
+		return ConfigMap{}, fmt.Errorf("parse config file: %w", err)
 	}
 	return s, nil
 }
diff --git a/pool-agent/cmd/create.go b/pool-agent/cmd/create.go
index f1c6c96..83b1f96 100644
--- a/pool-agent/cmd/create.go
+++ b/pool-agent/cmd/create.go
@@ -10,10 +10,10 @@ import (
 	"github.com/lxc/lxd/shared/api"
 )
 
-func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger) error {
+func (a *Agent) createInstance(iname, rtName string, rt resourceType, version string, l *slog.Logger) error {
 	l.Info("Creating instance")
 	op, err := a.Client.CreateInstance(api.InstancesPost{
-		Name: name,
+		Name: iname,
 		InstancePut: api.InstancePut{
 			Config: map[string]string{
 				"limits.cpu":          strconv.Itoa(rt.CPUCore),
@@ -25,8 +25,8 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 					"lxc.cgroup.devices.allow = a",
 					"lxc.cap.drop=",
 				}, "\n"),
-				configKeyImageAlias:   a.ImageAlias,
-				configKeyResourceType: rt.Name,
+				configKeyImageAlias:   a.Config[version].ImageAlias,
+				configKeyResourceType: rtName,
 			},
 			Devices: map[string]map[string]string{
 				"kmsg": {
@@ -36,7 +36,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 				},
 			},
 		},
-		Source: a.InstanceSource,
+		Source: a.Config[version].InstanceSource,
 	})
 	if err != nil {
 		return fmt.Errorf("create: %w", err)
@@ -46,7 +46,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 	}
 
 	l.Info("Starting instance")
-	op, err = a.Client.UpdateInstanceState(name, api.InstanceStatePut{
+	op, err = a.Client.UpdateInstanceState(iname, api.InstanceStatePut{
 		Action:  "start",
 		Timeout: -1,
 	}, "")
@@ -58,7 +58,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 	}
 
 	l.Info("Waiting system bus in instance")
-	op, err = a.Client.ExecInstance(name, api.InstanceExecPost{
+	op, err = a.Client.ExecInstance(iname, api.InstanceExecPost{
 		Command: []string{"bash", "-c", "until test -e /var/run/dbus/system_bus_socket; do sleep 0.5; done"},
 	}, nil)
 	if err != nil {
@@ -69,7 +69,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 	}
 
 	l.Info("Waiting system running for instance")
-	op, err = a.Client.ExecInstance(name, api.InstanceExecPost{
+	op, err = a.Client.ExecInstance(iname, api.InstanceExecPost{
 		Command: []string{"systemctl", "is-system-running", "--wait"},
 	}, nil)
 	if err != nil {
@@ -80,7 +80,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 	}
 
 	l.Info("Disabling systemd service watchdogs in instance")
-	op, err = a.Client.ExecInstance(name, api.InstanceExecPost{
+	op, err = a.Client.ExecInstance(iname, api.InstanceExecPost{
 		Command: []string{"systemctl", "service-watchdogs", "no"},
 	}, nil)
 	if err != nil {
@@ -94,7 +94,7 @@ func (a *Agent) createInstance(name string, rt ResourceTypesMap, l *slog.Logger)
 	time.Sleep(a.WaitIdleTime)
 
 	l.Info("Freezing instance")
-	op, err = a.Client.UpdateInstanceState(name, api.InstanceStatePut{
+	op, err = a.Client.UpdateInstanceState(iname, api.InstanceStatePut{
 		Action:  "freeze",
 		Timeout: -1,
 	}, "")
diff --git a/pool-agent/go.mod b/pool-agent/go.mod
index d5b30e3..6a5acb6 100644
--- a/pool-agent/go.mod
+++ b/pool-agent/go.mod
@@ -5,6 +5,7 @@ go 1.22.0
 toolchain go1.22.2
 
 require (
+	dario.cat/mergo v1.0.1
 	github.com/lxc/lxd v0.0.0-20220308034307-91f3610e71c1
 	github.com/pelletier/go-toml/v2 v2.2.1
 	github.com/pkg/errors v0.9.1
diff --git a/pool-agent/go.sum b/pool-agent/go.sum
index 6e28afa..4ab3010 100644
--- a/pool-agent/go.sum
+++ b/pool-agent/go.sum
@@ -45,6 +45,8 @@ cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0Zeo
 cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
 cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
 cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
+dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
+dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=

From 4171a5633f51fd745a2d4a2fa5c186d06929aaa5 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Fri, 15 Nov 2024 19:04:26 +0900
Subject: [PATCH 2/8] update metrics

---
 pool-agent/cmd/agent.go   | 8 ++++----
 pool-agent/cmd/metrics.go | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index bee58f0..95f75ab 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -61,9 +61,9 @@ func newAgent(ctx context.Context) (*Agent, error) {
 		s.Server = ""
 		creatingInstances := make(map[string]instances)
 		for k, v := range conf.ResourceTypesCounts {
-			configuredInstancesCount.WithLabelValues(k).Set(float64(v))
+			configuredInstancesCount.WithLabelValues(k, conf.ImageAlias).Set(float64(v))
 		}
-		for name, _ := range confmap.ResourceTypesMap {
+		for name := range confmap.ResourceTypesMap {
 			creatingInstances[name] = make(instances)
 		}
 		ac[version] = &AgentConfig{
@@ -186,7 +186,7 @@ func (a *Agent) adjustInstancePool() error {
 	}
 
 	toDelete := []string{}
-	for version, _ := range a.Config {
+	for version := range a.Config {
 		for rtName, rt := range a.ResourceTypesMap {
 			current := a.countPooledInstances(s, rtName, version)
 			creating := len(a.Config[version].creatingInstances[rtName])
@@ -287,7 +287,7 @@ func (a *Agent) collectMetrics() error {
 	}
 	lxdInstances.Reset()
 	for _, i := range s {
-		lxdInstances.WithLabelValues(i.Status, i.Config[configKeyResourceType]).Inc()
+		lxdInstances.WithLabelValues(i.Status, i.Config[configKeyResourceType], i.Config[configKeyImageAlias]).Inc()
 	}
 	return nil
 }
diff --git a/pool-agent/cmd/metrics.go b/pool-agent/cmd/metrics.go
index 40984cf..4585df3 100644
--- a/pool-agent/cmd/metrics.go
+++ b/pool-agent/cmd/metrics.go
@@ -12,7 +12,7 @@ var (
 			Subsystem: "configured",
 			Namespace: "pool_agent",
 		},
-		[]string{"flavor"},
+		[]string{"flavor", "image_alias"},
 	)
 	lxdInstances = prometheus.NewGaugeVec(
 		prometheus.GaugeOpts{
@@ -21,6 +21,6 @@ var (
 			Subsystem: "lxd",
 			Namespace: "pool_agent",
 		},
-		[]string{"status", "flavor"},
+		[]string{"status", "flavor", "image_alias"},
 	)
 )

From 06aad759fe38183cd1d84ea476815913917c6a21 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Tue, 19 Nov 2024 14:01:18 +0900
Subject: [PATCH 3/8] fix disable resource type handling

---
 pool-agent/cmd/agent.go | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index 95f75ab..442f3b7 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -185,12 +185,12 @@ func (a *Agent) adjustInstancePool() error {
 		return fmt.Errorf("get instances: %w", err)
 	}
 
-	toDelete := []string{}
-	for version := range a.Config {
+	for version, config := range a.Config {
+		toDelete := []string{}
 		for rtName, rt := range a.ResourceTypesMap {
 			current := a.countPooledInstances(s, rtName, version)
-			creating := len(a.Config[version].creatingInstances[rtName])
-			rtCount, ok := a.Config[version].ResourceTypesCounts[rtName]
+			creating := len(config.creatingInstances[rtName])
+			rtCount, ok := config.ResourceTypesCounts[rtName]
 			if !ok {
 				toDelete = append(toDelete, rtName)
 				continue
@@ -209,9 +209,9 @@ func (a *Agent) adjustInstancePool() error {
 					return fmt.Errorf("generate instance name: %w", err)
 				}
 				l := slog.With("instance", iname, "flavor", rtName, "version", version)
-				a.Config[version].creatingInstances[rtName][iname] = struct{}{}
+				config.creatingInstances[rtName][iname] = struct{}{}
 
-				defer delete(a.Config[version].creatingInstances[rtName], iname)
+				defer delete(config.creatingInstances[rtName], iname)
 
 				if err := a.createInstance(iname, rtName, rt, version, l); err != nil {
 					l.Error("failed to create instance", "err", err.Error())
@@ -219,12 +219,14 @@ func (a *Agent) adjustInstancePool() error {
 			}
 		}
 		for _, i := range s {
-			if i.Config[configKeyResourceType] == "" || i.Config[configKeyImageAlias] == "" {
+			if i.Config[configKeyResourceType] == "" || i.Config[configKeyImageAlias] != config.ImageAlias {
 				continue
 			}
 			l := slog.With("instance", i.Name, "version", version)
-			if _, ok := a.Config[version].ResourceTypesCounts[i.Config[configKeyResourceType]]; !ok {
-				toDelete = append(toDelete, i.Config[configKeyResourceType])
+			if _, ok := config.ResourceTypesCounts[i.Config[configKeyResourceType]]; !ok {
+				if i.Config[configKeyImageAlias] == config.ImageAlias {
+					toDelete = append(toDelete, i.Config[configKeyResourceType])
+				}
 			}
 			for _, rt := range toDelete {
 				if i.Config[configKeyResourceType] == rt {

From 4ccf1f066ca0f5640b71905be515e4d2659e1bb2 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Tue, 19 Nov 2024 14:43:15 +0900
Subject: [PATCH 4/8] update README.md

---
 pool-agent/README.md | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/pool-agent/README.md b/pool-agent/README.md
index 4fa6e8c..e2573f8 100644
--- a/pool-agent/README.md
+++ b/pool-agent/README.md
@@ -10,7 +10,6 @@ configuration file format is toml
 
 ```toml
 # sample configuration
-image_alias = "https://example.com/container-image"
 [[resource_types_map]]
 name = "large"
 cpu = 2
@@ -31,12 +30,22 @@ memory = "120GB"
 name = "4xlarge"
 cpu = 16
 memory = "160GB"
-[resource_types_counts]
-large = 10
-xlarge = 10
-2xlarge = 10
-3xlarge = 10
-4xlarge = 10
+[config.ubuntu2404]
+image_alias = "https://example.com/container-image-noble"
+[config.noble.resource_types_counts]
+large = 1
+xlarge = 1
+2xlarge = 1
+3xlarge = 1
+4xlarge = 1
+[config.ubuntu2204]
+image_alias = "https://example.com/container-image-jammy"
+[config.focal.resource_types_counts]
+large = 1
+xlarge = 1
+2xlarge = 1
+3xlarge = 1
+4xlarge = 1
 ```
 
 ### command line options

From ce6046fc1f33872b14f6c47a9380eca2d53b6264 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Tue, 19 Nov 2024 16:56:23 +0900
Subject: [PATCH 5/8] fix README.md

---
 pool-agent/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pool-agent/README.md b/pool-agent/README.md
index e2573f8..98dcc16 100644
--- a/pool-agent/README.md
+++ b/pool-agent/README.md
@@ -32,7 +32,7 @@ cpu = 16
 memory = "160GB"
 [config.ubuntu2404]
 image_alias = "https://example.com/container-image-noble"
-[config.noble.resource_types_counts]
+[config.ubuntu2404.resource_types_counts]
 large = 1
 xlarge = 1
 2xlarge = 1
@@ -40,7 +40,7 @@ xlarge = 1
 4xlarge = 1
 [config.ubuntu2204]
 image_alias = "https://example.com/container-image-jammy"
-[config.focal.resource_types_counts]
+[config.ubuntu2204.resource_types_counts]
 large = 1
 xlarge = 1
 2xlarge = 1

From fa8e2e31495a69c424ed69f7d84930ff1da02c4f Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Tue, 19 Nov 2024 17:28:16 +0900
Subject: [PATCH 6/8] remove debug log

---
 pool-agent/cmd/agent.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index 442f3b7..dff7d8a 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -51,7 +51,6 @@ func newAgent(ctx context.Context) (*Agent, error) {
 	if err != nil {
 		return nil, fmt.Errorf("load config: %w", err)
 	}
-	fmt.Printf("config: %+v", confmap)
 	ac := make(map[string]*AgentConfig, len(confmap.Config))
 	for version, conf := range confmap.Config {
 		s, err := slm.ParseAlias(conf.ImageAlias)

From a38a1eb37a73bfdaec1a7c47c3f24844c2f619ff Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Fri, 6 Dec 2024 16:02:40 +0900
Subject: [PATCH 7/8] fix panic in relaodConfig

---
 pool-agent/cmd/agent.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index dff7d8a..949752a 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -113,7 +113,7 @@ func (a *Agent) reloadConfig() error {
 
 	for version, conf := range confmap.Config {
 		for k, v := range conf.ResourceTypesCounts {
-			configuredInstancesCount.WithLabelValues(k).Set(float64(v))
+			configuredInstancesCount.WithLabelValues(k, conf.ImageAlias).Set(float64(v))
 		}
 		a.Config[version].ImageAlias = conf.ImageAlias
 		a.Config[version].ImageAlias = conf.ImageAlias

From b592f269c15677c1e5a44effae0d5aeac3812397 Mon Sep 17 00:00:00 2001
From: gamoutatsumi <tatsumi@gamou.dev>
Date: Thu, 12 Dec 2024 10:58:15 +0900
Subject: [PATCH 8/8] fix panic on reload config

---
 pool-agent/cmd/agent.go | 75 ++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/pool-agent/cmd/agent.go b/pool-agent/cmd/agent.go
index 949752a..69d3504 100644
--- a/pool-agent/cmd/agent.go
+++ b/pool-agent/cmd/agent.go
@@ -46,6 +46,30 @@ var (
 
 type instances map[string]struct{}
 
+func genAgentConfig(config Config) *AgentConfig {
+	s, err := slm.ParseAlias(config.ImageAlias)
+	if err != nil {
+		return nil
+	}
+	s.Server = ""
+	creatingInstances := make(map[string]instances)
+	for k, v := range config.ResourceTypesCounts {
+		configuredInstancesCount.WithLabelValues(k, config.ImageAlias).Set(float64(v))
+		creatingInstances[k] = make(instances)
+	}
+	return &AgentConfig{
+		ImageAlias:          config.ImageAlias,
+		InstanceSource:      *s,
+		ResourceTypesCounts: config.ResourceTypesCounts,
+		currentImage: struct {
+			Hash      string
+			CreatedAt time.Time
+		}{Hash: "", CreatedAt: time.Time{}},
+		deletingInstances: make(instances),
+		creatingInstances: creatingInstances,
+	}
+}
+
 func newAgent(ctx context.Context) (*Agent, error) {
 	confmap, err := LoadConfig()
 	if err != nil {
@@ -53,32 +77,11 @@ func newAgent(ctx context.Context) (*Agent, error) {
 	}
 	ac := make(map[string]*AgentConfig, len(confmap.Config))
 	for version, conf := range confmap.Config {
-		s, err := slm.ParseAlias(conf.ImageAlias)
-		if err != nil {
-			return nil, err
-		}
-		s.Server = ""
-		creatingInstances := make(map[string]instances)
-		for k, v := range conf.ResourceTypesCounts {
-			configuredInstancesCount.WithLabelValues(k, conf.ImageAlias).Set(float64(v))
-		}
-		for name := range confmap.ResourceTypesMap {
-			creatingInstances[name] = make(instances)
-		}
-		ac[version] = &AgentConfig{
-			ImageAlias:     conf.ImageAlias,
-			InstanceSource: *s,
-
-			ResourceTypesCounts: conf.ResourceTypesCounts,
-
-			currentImage: struct {
-				Hash      string
-				CreatedAt time.Time
-			}{Hash: "", CreatedAt: time.Time{}},
-
-			creatingInstances: creatingInstances,
-			deletingInstances: make(instances),
+		agentConfig := genAgentConfig(conf)
+		if agentConfig == nil {
+			return nil, fmt.Errorf("failed to generate agent config")
 		}
+		ac[version] = agentConfig
 	}
 	checkInterval, waitIdleTime, zombieAllowTime, err := LoadParams()
 	if err != nil {
@@ -115,9 +118,25 @@ func (a *Agent) reloadConfig() error {
 		for k, v := range conf.ResourceTypesCounts {
 			configuredInstancesCount.WithLabelValues(k, conf.ImageAlias).Set(float64(v))
 		}
-		a.Config[version].ImageAlias = conf.ImageAlias
-		a.Config[version].ImageAlias = conf.ImageAlias
-		a.Config[version].ResourceTypesCounts = conf.ResourceTypesCounts
+		if _, ok := a.Config[version]; !ok {
+			agentConfig := genAgentConfig(conf)
+			if agentConfig == nil {
+				return fmt.Errorf("failed to generate agent config")
+			}
+			a.Config[version] = agentConfig
+			continue
+		} else {
+			s, err := slm.ParseAlias(conf.ImageAlias)
+			if err != nil {
+				return err
+			}
+			s.Server = ""
+			a.Config[version] = &AgentConfig{
+				ImageAlias:          conf.ImageAlias,
+				InstanceSource:      *s,
+				ResourceTypesCounts: conf.ResourceTypesCounts,
+			}
+		}
 	}
 	a.ResourceTypesMap = confmap.ResourceTypesMap
 	return nil